gpt4 book ai didi

sql - Postgres - 创建表非常慢 - 带有分区和 BRIN 索引

转载 作者:行者123 更新时间:2023-12-03 09:31:21 28 4
gpt4 key购买 nike

我有一张包含超过 2.6 亿条记录的表。我已经为表创建了分区并对其进行了索引。

CREATE TABLE qt_al_90k
(
rec_id integer,
user_id integer,
user_text text,
user_number double precision,
user_date date,
user_seq integer,
my_sequence integer
) PARTITION BY RANGE (rec_id);

分区查询:
CREATE TABLE qt_al_90k_rec_id_1 PARTITION OF qt_al_90k FOR VALUES FROM (0) TO (100000);
CREATE TABLE qt_al_90k_rec_id_2 PARTITION OF qt_al_90k FOR VALUES FROM (100000) TO (200000);
CREATE TABLE qt_al_90k_rec_id_3 PARTITION OF qt_al_90k FOR VALUES FROM (200000) TO (300000);
CREATE TABLE qt_al_90k_rec_id_4 PARTITION OF qt_al_90k FOR VALUES FROM (300000) TO (400000);
CREATE TABLE qt_al_90k_rec_id_5 PARTITION OF qt_al_90k FOR VALUES FROM (400000) TO (500000);
CREATE TABLE qt_al_90k_rec_id_6 PARTITION OF qt_al_90k FOR VALUES FROM (500000) TO (600000);
CREATE TABLE qt_al_90k_rec_id_7 PARTITION OF qt_al_90k FOR VALUES FROM (600000) TO (700000);
CREATE TABLE qt_al_90k_rec_id_8 PARTITION OF qt_al_90k FOR VALUES FROM (700000) TO (800000);
CREATE TABLE qt_al_90k_rec_id_9 PARTITION OF qt_al_90k FOR VALUES FROM (800000) TO (900000);
CREATE TABLE qt_al_90k_rec_id_10 PARTITION OF qt_al_90k FOR VALUES FROM (900000) TO (1000000);
CREATE TABLE qt_al_90k_rec_id_11 PARTITION OF qt_al_90k FOR VALUES FROM (1000000) TO (1100000);
CREATE TABLE qt_al_90k_rec_id_12 PARTITION OF qt_al_90k FOR VALUES FROM (1100000) TO (1200000);
CREATE TABLE qt_al_90k_rec_id_13 PARTITION OF qt_al_90k FOR VALUES FROM (1200000) TO (1300000);
CREATE TABLE qt_al_90k_rec_id_14 PARTITION OF qt_al_90k FOR VALUES FROM (1300000) TO (1400000);
CREATE TABLE qt_al_90k_rec_id_15 PARTITION OF qt_al_90k FOR VALUES FROM (1400000) TO (1500000);
CREATE TABLE qt_al_90k_rec_id_16 PARTITION OF qt_al_90k FOR VALUES FROM (1500000) TO (1600000);
CREATE TABLE qt_al_90k_rec_id_17 PARTITION OF qt_al_90k FOR VALUES FROM (1600000) TO (1700000);
CREATE TABLE qt_al_90k_rec_id_18 PARTITION OF qt_al_90k FOR VALUES FROM (1700000) TO (1800000);
CREATE TABLE qt_al_90k_rec_id_19 PARTITION OF qt_al_90k FOR VALUES FROM (1800000) TO (1900000);
CREATE TABLE qt_al_90k_rec_id_20 PARTITION OF qt_al_90k FOR VALUES FROM (1900000) TO (2000000);
CREATE TABLE qt_al_90k_rec_id_21 PARTITION OF qt_al_90k FOR VALUES FROM (2000000) TO (2100000);
CREATE TABLE qt_al_90k_rec_id_22 PARTITION OF qt_al_90k FOR VALUES FROM (2100000) TO (2200000);
CREATE TABLE qt_al_90k_rec_id_23 PARTITION OF qt_al_90k FOR VALUES FROM (2200000) TO (2300000);
CREATE TABLE qt_al_90k_rec_id_24 PARTITION OF qt_al_90k FOR VALUES FROM (2300000) TO (2400000);
CREATE TABLE qt_al_90k_rec_id_25 PARTITION OF qt_al_90k FOR VALUES FROM (2400000) TO (2500000);
CREATE TABLE qt_al_90k_rec_id_26 PARTITION OF qt_al_90k FOR VALUES FROM (2500000) TO (2600000);
CREATE TABLE qt_al_90k_rec_id_27 PARTITION OF qt_al_90k FOR VALUES FROM (2600000) TO (3000000);

索引查询:
CREATE INDEX qt_al_90k_user_id_1 ON qt_al_90k_rec_id_1 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_1 ON qt_al_90k_rec_id_1 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_2 ON qt_al_90k_rec_id_2 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_2 ON qt_al_90k_rec_id_2 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_3 ON qt_al_90k_rec_id_3 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_3 ON qt_al_90k_rec_id_3 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_4 ON qt_al_90k_rec_id_4 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_4 ON qt_al_90k_rec_id_4 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_5 ON qt_al_90k_rec_id_5 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_5 ON qt_al_90k_rec_id_5 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_6 ON qt_al_90k_rec_id_6 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_6 ON qt_al_90k_rec_id_6 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_7 ON qt_al_90k_rec_id_7 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_7 ON qt_al_90k_rec_id_7 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_8 ON qt_al_90k_rec_id_8 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_8 ON qt_al_90k_rec_id_8 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_9 ON qt_al_90k_rec_id_9 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_9 ON qt_al_90k_rec_id_9 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_10 ON qt_al_90k_rec_id_10 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_10 ON qt_al_90k_rec_id_10 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_11 ON qt_al_90k_rec_id_11 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_11 ON qt_al_90k_rec_id_11 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_12 ON qt_al_90k_rec_id_12 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_12 ON qt_al_90k_rec_id_12 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_13 ON qt_al_90k_rec_id_13 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_13 ON qt_al_90k_rec_id_13 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_14 ON qt_al_90k_rec_id_14 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_14 ON qt_al_90k_rec_id_14 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_15 ON qt_al_90k_rec_id_15 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_15 ON qt_al_90k_rec_id_15 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_16 ON qt_al_90k_rec_id_16 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_16 ON qt_al_90k_rec_id_16 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_17 ON qt_al_90k_rec_id_17 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_17 ON qt_al_90k_rec_id_17 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_18 ON qt_al_90k_rec_id_18 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_18 ON qt_al_90k_rec_id_18 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_19 ON qt_al_90k_rec_id_19 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_19 ON qt_al_90k_rec_id_19 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_20 ON qt_al_90k_rec_id_20 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_20 ON qt_al_90k_rec_id_20 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_21 ON qt_al_90k_rec_id_21 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_21 ON qt_al_90k_rec_id_21 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_22 ON qt_al_90k_rec_id_22 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_22 ON qt_al_90k_rec_id_22 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_23 ON qt_al_90k_rec_id_23 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_23 ON qt_al_90k_rec_id_23 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_24 ON qt_al_90k_rec_id_24 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_24 ON qt_al_90k_rec_id_24 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_25 ON qt_al_90k_rec_id_25 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_25 ON qt_al_90k_rec_id_25 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_26 ON qt_al_90k_rec_id_26 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_26 ON qt_al_90k_rec_id_26 USING brin(rec_id);
CREATE INDEX qt_al_90k_user_id_27 ON qt_al_90k_rec_id_27 USING brin(user_id);
CREATE INDEX qt_al_90k_rec_27 ON qt_al_90k_rec_id_27 USING brin(rec_id);

这是我的检索查询:
create table test123 as
select user_id,user_text, rec_id, user_seq
from qt_al_90k
where rec_id in (
1492,1493,1494,1495,1496,1497,1498,1499,1500,1501) and
user_id in (
37
);

此查询应返回大约 50 万条记录。但是,这需要超过 11 分钟来创建。当我超过 100 rec_id ,查询永远不会返回。

我有 1000 多个 rec_id 需要处理,但是,我做不到。

如何提高 create table 语句的性能?请帮忙。

编辑

这是 EXPLAIN (ANALYZE, BUFFERS)
"Gather  (cost=1607.98..2817973.44 rows=257182 width=29) (actual time=119.210..344297.379 rows=500000 loops=1)"
" Workers Planned: 2"
" Workers Launched: 2"
" Buffers: shared hit=49963 read=43384"
" I/O Timings: read=509470.717"
" -> Parallel Append (cost=607.98..2791255.24 rows=107159 width=29) (actual time=116.662..343151.672 rows=166667 loops=3)"
" Buffers: shared hit=49963 read=43384"
" I/O Timings: read=509470.717"
" -> Parallel Bitmap Heap Scan on qt_al_90k_rec_id_1 (cost=607.98..2790719.45 rows=107159 width=29) (actual time=116.661..343137.730 rows=166667 loops=3)"
" Recheck Cond: ((rec_id = ANY ('{1492,1493,1494,1495,1496,1497,1498,1499,1500,1501}'::integer[])) AND (va_id = 37))"
" Rows Removed by Index Recheck: 3862086"
" Heap Blocks: lossy=30808"
" Buffers: shared hit=49963 read=43384"
" I/O Timings: read=509470.717"
" -> BitmapAnd (cost=607.98..607.98 rows=4480328 width=0) (actual time=92.004..92.004 rows=0 loops=1)"
" Buffers: shared hit=395 read=24"
" I/O Timings: read=0.082"
" -> Bitmap Index Scan on qt_al_90k_rec_1 (cost=0.00..116.02 rows=5232168 width=0) (actual time=47.230..47.230 rows=8016640 loops=1)"
" Index Cond: (rec_id = ANY ('{1492,1493,1494,1495,1496,1497,1498,1499,1500,1501}'::integer[]))"
" Buffers: shared hit=381 read=1"
" I/O Timings: read=0.011"
" -> Bitmap Index Scan on qt_al_90k_user_id_1 (cost=0.00..363.11 rows=134190580 width=0) (actual time=44.130..44.130 rows=11761920 loops=1)"
" Index Cond: (va_id = 37)"
" Buffers: shared hit=14 read=23"
" I/O Timings: read=0.071"
"Planning Time: 1.814 ms"
"Execution Time: 344320.891 ms"

最佳答案

行的物理顺序是否与 user_id(或 va_id,无论列的真实名称是什么)和 rec_id 列的值相关?如果没有,那么您拥有的 BRIN 索引几乎毫无用处。您可以改用 BTREE 索引重试。或者也许更好,多列索引 (va_id, rec_id) .出于测试目的,您只能在分区 qt_al_90k_rec_id_1 上构建它。

您的数据缓存不佳。但是,这是因为它不能很好地缓存(您没有足够的内存)还是因为它恰好不是在您运行查询的那一刻?由于大部分时间都在读取表数据,因此更有效地使用索引可能没有多大帮助,因为它仍然需要读取表数据(除非您也可以通过添加其他列来获得仅索引扫描)

你的IO系统是什么样的?如果您有 RAID 或 JBOD,那么增加 Effective_io_concurrency 可能会有所帮助。

I have more than 1000 rec_id's to process through this



这些分为多少个不同的分区?

很高兴看到 EXPLAIN为此也是如此。因为它永远不会完成,所以你不能做 EXPLAIN (ANALYZE) .

此外,您需要多久执行一次此操作?这看起来不像您经常运行的那种查询。仅仅为它构建索引可能没有必要,但更好的索引也可能对其他查询有用。

关于sql - Postgres - 创建表非常慢 - 带有分区和 BRIN 索引,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59899889/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com