POSTGRESQL (PG) 性能优化之索引使用技巧（复合多列索引、局部索引、表达式索引、覆盖索引、唯一索引）

一复合索引（多列索引）

复合索引也称多列索引，是在某个关系表上的多列建立索引，为了提高索引查询效率，将经常使用的列放在复合索引的前面。当查询语句中where字句引用了复合索引中的所有列或者先导列，复合索引会带来查询性能提升。

目前PG只有B树索引、GiST、GIN和BRIN索引类型支持复合索引，最多可以支持32个列。

复合索引使用实例：

等值情况下，where子句中有先导列a，不管后面跟着b或者c或者d，或者三者的组合，都会使用复合索引，但是如果where子句中没有先导列a，则b或者c或者d或者三者组合都不能使用复合索引。

--等值情况下索引使用情况
test=# create table t1(a int, b int, c int, d int);
CREATE TABLE
test=# insert into t1 values(generate_series(1,100000), generate_series(1,1000), generate_series(1,10000), generate_series(1,1000000));
INSERT 0 1000000
test=# \d t1
                 Table "public.t1"
 Column |  Type   | Collation | Nullable | Default
--------+---------+-----------+----------+---------
 a      | integer |           |          |
 b      | integer |           |          |
 c      | integer |           |          |
 d      | integer |           |          |

test=# explain select * from t1 where a=10 and b=30 and c=1555 and d=888999;
                               QUERY PLAN
-------------------------------------------------------------------------
 Gather  (cost=1000.00..13768.43 rows=1 width=16)
   Workers Planned: 2
   ->  Parallel Seq Scan on t1  (cost=0.00..12768.33 rows=1 width=16)
         Filter: ((a = 10) AND (b = 30) AND (c = 1555) AND (d = 888999))
(4 rows)

test=# create index on t1(a,b,c,d);
CREATE INDEX
test=# analyze t1;
ANALYZE
test=# \d t1
                 Table "public.t1"
 Column |  Type   | Collation | Nullable | Default
--------+---------+-----------+----------+---------
 a      | integer |           |          |
 b      | integer |           |          |
 c      | integer |           |          |
 d      | integer |           |          |
Indexes:
    "t1_a_b_c_d_idx" btree (a, b, c, d)

test=# explain select * from t1 where a=10 and b=30 and c=1555;
                                  QUERY PLAN
-------------------------------------------------------------------------------
 Index Only Scan using t1_a_b_c_d_idx on t1  (cost=0.42..8.45 rows=1 width=16)
   Index Cond: ((a = 10) AND (b = 30) AND (c = 1555))
(2 rows)

test=# explain select * from t1 where a=10 and b=30;
                                  QUERY PLAN
-------------------------------------------------------------------------------
 Index Only Scan using t1_a_b_c_d_idx on t1  (cost=0.42..8.45 rows=1 width=16)
   Index Cond: ((a = 10) AND (b = 30))
(2 rows)

test=# explain select * from t1 where a=10 and c=1555;
                                  QUERY PLAN
-------------------------------------------------------------------------------
 Index Only Scan using t1_a_b_c_d_idx on t1  (cost=0.42..8.45 rows=1 width=16)
   Index Cond: ((a = 10) AND (c = 1555))
(2 rows)

test=# explain select * from t1 where a=10 and d=888999;
                                  QUERY PLAN
-------------------------------------------------------------------------------
 Index Only Scan using t1_a_b_c_d_idx on t1  (cost=0.42..8.45 rows=1 width=16)
   Index Cond: ((a = 10) AND (d = 888999))
(2 rows)

test=# explain select * from t1 where b=30 and c=1555 and d=888999;
                              QUERY PLAN
----------------------------------------------------------------------
 Gather  (cost=1000.00..12726.77 rows=1 width=16)
   Workers Planned: 2
   ->  Parallel Seq Scan on t1  (cost=0.00..11726.67 rows=1 width=16)
         Filter: ((b = 30) AND (c = 1555) AND (d = 888999))
(4 rows)

test=# explain select * from t1 where b=30;
                             QUERY PLAN
---------------------------------------------------------------------
 Gather  (cost=1000.00..10643.43 rows=1 width=16)
   Workers Planned: 2
   ->  Parallel Seq Scan on t1  (cost=0.00..9643.33 rows=1 width=16)
         Filter: (b = 30)
(4 rows)

test=# explain select * from t1 where b=30 and d=888999;
                              QUERY PLAN
----------------------------------------------------------------------
 Gather  (cost=1000.00..11685.10 rows=1 width=16)
   Workers Planned: 2
   ->  Parallel Seq Scan on t1  (cost=0.00..10685.00 rows=1 width=16)
         Filter: ((b = 30) AND (d = 888999))
(4 rows)

不等值情况下和等值规则一致，即where条件中必须有先导列a，否则不能使用复合索引。

--不等值情况下使用和等值一致
test=# explain select * from t1 where a=10 and b<30 and c>9988 and d=888999;
                                  QUERY PLAN
-------------------------------------------------------------------------------
 Index Only Scan using t1_a_b_c_d_idx on t1  (cost=0.42..8.45 rows=1 width=16)
   Index Cond: ((a = 10) AND (b < 30) AND (c > 9988) AND (d = 888999))
(2 rows)

test=# explain select * from t1 where a=10 and b<30 and c>9988;
                                  QUERY PLAN
-------------------------------------------------------------------------------
 Index Only Scan using t1_a_b_c_d_idx on t1  (cost=0.42..8.45 rows=1 width=16)
   Index Cond: ((a = 10) AND (b < 30) AND (c > 9988))
(2 rows)

test=# explain select * from t1 where a=10 and c>9988;
                                  QUERY PLAN
-------------------------------------------------------------------------------
 Index Only Scan using t1_a_b_c_d_idx on t1  (cost=0.42..8.45 rows=1 width=16)
   Index Cond: ((a = 10) AND (c > 9988))
(2 rows)

test=# explain select * from t1 where a<876 and b<30 and c>9988 and d=888999;
                                   QUERY PLAN
--------------------------------------------------------------------------------
 Index Only Scan using t1_a_b_c_d_idx on t1  (cost=0.42..40.23 rows=1 width=16)
   Index Cond: ((a < 876) AND (b < 30) AND (c > 9988) AND (d = 888999))
(2 rows)

test=# explain select * from t1 where b<30 and c>9988 and d=888999;
                              QUERY PLAN
----------------------------------------------------------------------
 Gather  (cost=1000.00..12726.77 rows=1 width=16)
   Workers Planned: 2
   ->  Parallel Seq Scan on t1  (cost=0.00..11726.67 rows=1 width=16)
         Filter: ((b < 30) AND (c > 9988) AND (d = 888999))
(4 rows)

二部分索引（局部索引）

部分索引也称局部索引，是建立在关系表上得子集，而该子集是由一个条件表达式定义的（叫做部分索引的谓词）。该索引只包含表中那些满足这个谓词的行。

部分索引得有点在于提高数据插入和更新的效率，减少维护成本，因为部分索引不是在所有情况下都需更新索引，只有符合部分索引条件表达式的数据才会更新索引。另外部分索引比普通索引要小，减少索引存储空间。

部分索引主要在排除公共值、排除不感兴趣的值方面比较有用，比如IP网段排除等。

部分索引实例：

test=# create index on t1(b) where b < 500;
CREATE INDEX
test=# analyze t1;
ANALYZE
test=# \d t1
                 Table "public.t1"
 Column |  Type   | Collation | Nullable | Default
--------+---------+-----------+----------+---------
 a      | integer |           |          |
 b      | integer |           |          |
 c      | integer |           |          |
 d      | integer |           |          |
Indexes:
    "t1_a_b_c_d_idx" btree (a, b, c, d)
    "t1_b_idx" btree (b) WHERE b < 500

test=# explain select * from t1 where b <150;
                             QUERY PLAN
---------------------------------------------------------------------
 Index Scan using t1_b_idx on t1  (cost=0.28..9.52 rows=71 width=16)
   Index Cond: (b < 150)
(2 rows)

test=# explain select * from t1 where b <650;
                              QUERY PLAN
-----------------------------------------------------------------------
 Gather  (cost=1000.00..10723.03 rows=797 width=16)
   Workers Planned: 2
   ->  Parallel Seq Scan on t1  (cost=0.00..9643.33 rows=332 width=16)
         Filter: (b < 650)
(4 rows)

test=# explain select * from t1 where b > 300;
                              QUERY PLAN
-----------------------------------------------------------------------
 Gather  (cost=1000.00..10721.03 rows=777 width=16)
   Workers Planned: 2
   ->  Parallel Seq Scan on t1  (cost=0.00..9643.33 rows=324 width=16)
         Filter: (b > 300)
(4 rows)

test=# explain select * from t1 where b > 499;
                              QUERY PLAN
-----------------------------------------------------------------------
 Gather  (cost=1000.00..10689.63 rows=463 width=16)
   Workers Planned: 2
   ->  Parallel Seq Scan on t1  (cost=0.00..9643.33 rows=193 width=16)
         Filter: (b > 499)
(4 rows)

三表达式索引

表达式索引是基于关系表的一列或者多列计算而来的一个函数或者标量表达式来建立的索引，可以根据计算结果快速获取表中的内容。但是表达式索引存储的是表达式的值，并不是在使用索引时候创建的，而是在创建索引的时候计算好的，因此插入数据或者数据更新时，需要进行表达式计算，索引创建会比较慢，因此表达式索引的维护代价比较昂贵，需要谨慎使用。

最常见的是创建基于函数的索引，比如经常使用lower(name)或者upper(name)函数做大小写无关的比较，但是因为用了lower或者upper函数，无法有效利用name列上得索引，此时就需要表达式索引：

test=# create table t2(id int, name text);
CREATE TABLE
test=# insert into t2 values(generate_series(1,100000), md5(random()::text));
INSERT 0 100000
test=# create index t2_name_idx1 on t2(name);
CREATE INDEX
test=# analyze t2;
ANALYZE
test=# \d t2
                 Table "public.t2"
 Column |  Type   | Collation | Nullable | Default
--------+---------+-----------+----------+---------
 id     | integer |           |          |
 name   | text    |           |          |
Indexes:
    "t2_name_idx1" btree (name)

test=# explain select * from t2 where name='ada';
                               QUERY PLAN
------------------------------------------------------------------------
 Index Scan using t2_name_idx1 on t2  (cost=0.42..8.44 rows=1 width=37)
   Index Cond: (name = 'ada'::text)
(2 rows)

test=# explain select * from t2 where lower(name)='ada';
                       QUERY PLAN
--------------------------------------------------------
 Seq Scan on t2  (cost=0.00..2334.00 rows=500 width=37)
   Filter: (lower(name) = 'ada'::text)
(2 rows)

test=# create index t2_name_idx2 on t2(lower(name));
CREATE INDEX
test=# analyze t2;
ANALYZE
test=# \d t2
                 Table "public.t2"
 Column |  Type   | Collation | Nullable | Default
--------+---------+-----------+----------+---------
 id     | integer |           |          |
 name   | text    |           |          |
Indexes:
    "t2_name_idx1" btree (name)
    "t2_name_idx2" btree (lower(name))

test=# explain select * from t2 where lower(name)='ada';
                               QUERY PLAN
------------------------------------------------------------------------
 Index Scan using t2_name_idx2 on t2  (cost=0.42..8.44 rows=1 width=37)
   Index Cond: (lower(name) = 'ada'::text)
(2 rows)

另外，我们也可能经常进行如下的查询来查询人名相关的数据，这时候需要创建基于复杂表达式的函数索引：

test=# create table t3(id int, first_name text, last_name text);
CREATE TABLE
test=# insert into t3 values(generate_series(1,100000), md5(random()::text), md5(random()::text));
INSERT 0 100000
test=# \d t3
                   Table "public.t3"
   Column   |  Type   | Collation | Nullable | Default
------------+---------+-----------+----------+---------
 id         | integer |           |          |
 first_name | text    |           |          |
 last_name  | text    |           |          |

test=# explain select * from t3 where (first_name || ' ' || last_name) = 'Jim Covert';
                                QUERY PLAN
---------------------------------------------------------------------------
 Seq Scan on t3  (cost=0.00..2985.00 rows=500 width=70)
   Filter: (((first_name || ' '::text) || last_name) = 'Jim Covert'::text)
(2 rows)

test=# create index on t3(first_name, last_name);
CREATE INDEX
test=# analyze t3;
ANALYZE
test=# \d t3
                   Table "public.t3"
   Column   |  Type   | Collation | Nullable | Default
------------+---------+-----------+----------+---------
 id         | integer |           |          |
 first_name | text    |           |          |
 last_name  | text    |           |          |
Indexes:
    "t3_first_name_last_name_idx" btree (first_name, last_name)

test=# explain select * from t3 where (first_name || ' ' || last_name) = 'Jim Covert';
                                QUERY PLAN
---------------------------------------------------------------------------
 Seq Scan on t3  (cost=0.00..2985.00 rows=500 width=70)
   Filter: (((first_name || ' '::text) || last_name) = 'Jim Covert'::text)
(2 rows)

test=# create index on t3((first_name || ' ' || last_name));
CREATE INDEX
test=# analyze t3;
ANALYZE
test=# \d t3
                   Table "public.t3"
   Column   |  Type   | Collation | Nullable | Default
------------+---------+-----------+----------+---------
 id         | integer |           |          |
 first_name | text    |           |          |
 last_name  | text    |           |          |
Indexes:
    "t3_expr_idx" btree (((first_name || ' '::text) || last_name))
    "t3_first_name_last_name_idx" btree (first_name, last_name)

test=# explain select * from t3 where (first_name || ' ' || last_name) = 'Jim Covert';
                                  QUERY PLAN
-------------------------------------------------------------------------------
 Index Scan using t3_expr_idx on t3  (cost=0.42..8.44 rows=1 width=70)
   Index Cond: (((first_name || ' '::text) || last_name) = 'Jim Covert'::text)
(2 rows)

四覆盖索引

谈覆盖索引之前，首先要了解PG中Index Only Scan，该扫描方式可以只通过索引就能得到元祖，而不需要回表产生额外的堆访问。基本思想是直接从每一个索引项中直接返回值，而不需要去访问堆数据。目前只有B树索引可以使用Index Only Scan。

能够使用Index Only Scan的索引需要包含查询中所有引用关系表的列。因此上面讲到的复合索引可以适用于Index Only Scan（复合索引实例中有Index Only Scan例子），但是复合索引要求的存储空间比较大，为了减少开销，PG引入了覆盖索引。覆盖索引包含where条件中的查询列，但是非where条件中的列通过INCLUDE完成，从而完成从索引中获取数据而不需要访问堆读取数据。

覆盖索引实例：

test=# create index on t1(b,d);
CREATE INDEX
test=# analyze t1;
ANALYZE
test=# \d t1
                 Table "public.t1"
 Column |  Type   | Collation | Nullable | Default
--------+---------+-----------+----------+---------
 a      | integer |           |          |
 b      | integer |           |          |
 c      | integer |           |          |
 d      | integer |           |          |
Indexes:
    "t1_a_b_c_d_idx" btree (a, b, c, d)
    "t1_b_d_idx" btree (b, d)
    "t1_b_idx" btree (b) WHERE b < 500

test=# explain select c from t1 where b > 800 and d > 9988;
                               QUERY PLAN
------------------------------------------------------------------------
 Index Scan using t1_b_d_idx on t1  (cost=0.42..116.42 rows=62 width=4)
   Index Cond: ((b > 800) AND (d > 9988))
(2 rows)
                                                     ^
test=# create index t1_b_d_c_idx on t1(b,d) include (c);
CREATE INDEX
test=# analyze t1;
ANALYZE
test=# \d t1
                 Table "public.t1"
 Column |  Type   | Collation | Nullable | Default
--------+---------+-----------+----------+---------
 a      | integer |           |          |
 b      | integer |           |          |
 c      | integer |           |          |
 d      | integer |           |          |
Indexes:
    "t1_a_b_c_d_idx" btree (a, b, c, d)
    "t1_b_d_c_idx" btree (b, d) INCLUDE (c)
    "t1_b_d_idx" btree (b, d)
    "t1_b_idx" btree (b) WHERE b < 500

test=# explain select c from t1 where b > 800 and d > 9988;
                                   QUERY PLAN
--------------------------------------------------------------------------------
 Index Only Scan using t1_b_d_c_idx on t1  (cost=0.42..360.78 rows=204 width=4)
   Index Cond: ((b > 800) AND (d > 9988))
(2 rows)

五唯一索引

唯一索引用来强制列值的唯一性，或者是多个列组合值的唯一性。目前只有B树索引能够被声明为唯一。

当一个索引被声明为唯一时，索引中不允许多个表行具有相同的索引值。空值被视为不相同。PG会自动为定义了一个唯一约束或主键的表创建一个唯一索引。该索引包含组成主键或唯一约束的所有列（可能是一个多列索引）。

唯一索引实例：

--为主键自动创建唯一索引：
test=# create table t4(id int primary key, name text);
CREATE TABLE
test=# \d t4
                 Table "public.t4"
 Column |  Type   | Collation | Nullable | Default
--------+---------+-----------+----------+---------
 id     | integer |           | not null |
 name   | text    |           |          |
Indexes:
    "t4_pkey" PRIMARY KEY, btree (id)

test=#
--为UNIQUE约束自动创建唯一索引：
test=# create table t5(id int unique, name text);
CREATE TABLE
test=# \d t5
                 Table "public.t5"
 Column |  Type   | Collation | Nullable | Default
--------+---------+-----------+----------+---------
 id     | integer |           |          |
 name   | text    |           |          |
Indexes:
    "t5_id_key" UNIQUE CONSTRAINT, btree (id)

--手动创建唯一索引：
test=# create table t6(id int, name text);
CREATE TABLE
test=# \d t6
                 Table "public.t6"
 Column |  Type   | Collation | Nullable | Default
--------+---------+-----------+----------+---------
 id     | integer |           |          |
 name   | text    |           |          |

test=# create unique index t6_id_idx_u on t6(id);
CREATE INDEX
test=# \d t6
                 Table "public.t6"
 Column |  Type   | Collation | Nullable | Default
--------+---------+-----------+----------+---------
 id     | integer |           |          |
 name   | text    |           |          |
Indexes:
    "t6_id_idx_u" UNIQUE, btree (id)