gpt4 book ai didi

sql - 如何在 PostgreSQL 中移动整个组?

转载 作者:行者123 更新时间:2023-12-04 02:28:42 26 4
gpt4 key购买 nike

我有一个结构如下的表(很抱歉数据示例很长,但无法用较小的行数重现问题):

CREATE TABLE example_tbl (
id CHAR(1),
pid INT,
sid INT,
tid VARCHAR(10),
other_columns INT
)

INSERT INTO example_tbl (id, pid, sid, tid, other_columns)
VALUES
('c', 157, 85, 'A32', 1),
('c', 157, 85, 'A32', 2),
('c', 157, 85, 'A32', 3),
('c', 157, 85, 'A32', 4),
('c', 157, 85, 'A32', 5),
('c', 157, 85, 'A32', 6),
('c', 157, 85, 'A32', 7),
('c', 157, 85, 'A32', 8),
('c', 157, 85, 'A32', 9),
('c', 157, 85, 'A32', 10),
('c', 157, 85, 'A32', 11),
('Q', 157, 81, 'A62', 1),
('Q', 157, 81, 'A62', 2),
('Q', 157, 81, 'A62', 3),
('Q', 157, 81, 'A62', 4),
('Q', 157, 81, 'A62', 5),
('Q', 157, 81, 'A62', 6),
('Q', 157, 81, 'A62', 7),
('Q', 157, 81, 'A62', 8),
('Q', 157, 81, 'A62', 9),
('Q', 157, 81, 'A62', 10),
('Q', 157, 81, 'A62', 11),
('f', 598, 51, 'A62', NULL),
('w', 598, 49, 'A32', 9),
('Z', 598, 44, 'A62', NULL),
('r', 598, 16, 'A32', 10),
('O', 118, 93, 'A32', 1),
('G', 118, 38, 'A32', 4),
('U', 118, 90, 'A32', 1),
('U', 118, 90, 'A32', 2),
('U', 118, 90, 'A32', 3),
('U', 118, 90, 'A32', 4),
('U', 118, 90, 'A32', 5),
('U', 118, 90, 'A32', 6),
('U', 118, 90, 'A32', 7),
('U', 118, 90, 'A32', 8),
('U', 118, 90, 'A32', 9),
('U', 118, 90, 'A32', 10),
('U', 118, 90, 'A32', 11),
('m', 118, 37, 'A62', 1),
('J', 118, 54, 'A32', 20),
('a', 118, 59, 'A32', 11),
('s', 118, 18, 'A62', 8),
('y', 118, 33, 'A62', NULL),
('N', 118, 79, 'A62', NULL),
('l', 118, 35, 'A32', 9),
('n', 118, 63, 'A32', 5),
('R', 118, 86, 'A62', 1),
('R', 118, 86, 'A62', 2),
('R', 118, 86, 'A62', 3),
('R', 118, 86, 'A62', 4),
('R', 118, 86, 'A62', 5),
('R', 118, 86, 'A62', 6),
('R', 118, 86, 'A62', 7),
('R', 118, 86, 'A62', 8),
('R', 118, 86, 'A62', 9),
('R', 118, 86, 'A62', 10),
('R', 118, 86, 'A62', 11),
('H', 118, 23, 'A32', 1),
('H', 118, 23, 'A32', 2),
('H', 118, 23, 'A32', 3),
('H', 118, 23, 'A32', 4),
('H', 118, 23, 'A32', 5),
('H', 118, 23, 'A32', 6),
('H', 118, 23, 'A32', 7),
('H', 118, 23, 'A32', 8),
('H', 118, 23, 'A32', 9),
('H', 118, 23, 'A32', 10),
('H', 118, 23, 'A32', 11),
('B', 118, 43, 'A62', 39),
('h', 118, 60, 'A62', NULL),
('p', 118, 72, 'A32', 1),
('v', 118, 22, 'A32', 5),
('I', 118, 89, 'A62', 9),
('T', 118, 17, 'A62', 1),
('F', 118, 41, 'A32', 10),
('z', 118, 55, 'A32', 6),
('Y', 118, 75, 'A32', NULL),
('u', 118, 48, 'A62', 9),
('x', 783, 27, 'A32', 10),
('V', 783, 11, 'A62', 8),
('i', 783, 61, 'A62', 1),
('i', 783, 61, 'A62', 2),
('i', 783, 61, 'A62', 3),
('i', 783, 61, 'A62', 4),
('i', 783, 61, 'A62', 5),
('i', 783, 61, 'A62', 6),
('i', 783, 61, 'A62', 7),
('i', 783, 61, 'A62', 8),
('i', 783, 61, 'A62', 9),
('i', 783, 61, 'A62', 10),
('i', 783, 61, 'A62', 11);

id 列定义事件。我想在由 tid 定义的组中添加一个包含上一个事件的 id 的列。数据应根据 pid(自定义顺序,其中 157 < 598 < 118 < 783)和 sid 排序。棘手的部分是有些事件有 11 行,有些只有 1 行。other_columns 代表所有其他应该保留的列。

我尝试使用具有如下预先计算的偏移量的 LAG() 窗口函数:

WITH example_tbl_with_offset AS (
SELECT * FROM example_tbl
LEFT JOIN (SELECT id, COUNT(id)::int AS lag_offset
FROM example_tbl
GROUP BY id) AS offset_tbl
USING (id)
)
SELECT
*,
LAG(id, lag_offset) OVER (PARTITION BY tid
ORDER BY (CASE
WHEN pid = 157 THEN 1
WHEN pid = 598 THEN 2
WHEN pid = 118 THEN 3
WHEN pid = 783 THEN 4
END, sid
)) AS prev_id
FROM example_tbl_with_offset;

然而,尽管 prev_id 列似乎对前几个事件有效,但它最终会失去踪迹。

预期输出如下:

CREATE TABLE expected_output (
id CHAR(1),
pid INT,
sid INT,
tid VARCHAR(10),
prev_id CHAR(1)
)

INSERT INTO expected_output (id, pid, sid, tid, prev_id)
VALUES
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('r', 598, 16, 'A32', 'c'),
('Z', 598, 44, 'A62', 'Q'),
('w', 598, 49, 'A32', 'r'),
('f', 598, 51, 'A62', 'Z'),
('T', 118, 17, 'A62', 'f'),
('s', 118, 18, 'A62', 'T'),
('v', 118, 22, 'A32', 'w'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('y', 118, 33, 'A62', 's'),
('l', 118, 35, 'A32', 'H'),
('m', 118, 37, 'A62', 'y'),
('G', 118, 38, 'A32', 'I'),
('F', 118, 41, 'A32', 'G'),
('B', 118, 43, 'A62', 'm'),
('u', 118, 48, 'A62', 'B'),
('J', 118, 54, 'A32', 'F'),
('z', 118, 55, 'A32', 'J'),
('a', 118, 59, 'A32', 'z'),
('h', 118, 60, 'A62', 'u'),
('n', 118, 63, 'A32', 'a'),
('p', 118, 72, 'A32', 'n'),
('Y', 118, 75, 'A32', 'p'),
('N', 118, 79, 'A62', 'h'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('I', 118, 89, 'A62', 'R'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('O', 118, 93, 'A32', 'U'),
('V', 783, 11, 'A62', 'I'),
('x', 783, 27, 'A32', 'O'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V');

使用以下顺序可能更容易理解实际逻辑:

SELECT * FROM expected_output
ORDER BY tid, CASE
WHEN pid = 157 THEN 1
WHEN pid = 598 THEN 2
WHEN pid = 118 THEN 3
WHEN pid = 783 THEN 4
END, sid;

@Edit:我的 PostgreSQL 版本是 9.5。

最佳答案

Postgres 11 的主要新特性之一在 release notes 中这样宣传:

  • Window functions now support all framing options shown in the SQL:2011 standard, including RANGE distance PRECEDING/FOLLOWING, GROUPS mode, and frame exclusion options

使用这个,在 single SELECT 中有一个single 窗口函数的解决方案:

SELECT *, first_value(id) OVER (PARTITION BY tid
ORDER BY CASE pid
WHEN 157 THEN 1
WHEN 598 THEN 2
WHEN 118 THEN 3
WHEN 783 THEN 4
END, sid
GROUPS BETWEEN 1 PRECEDING AND 1 PRECEDING
) AS prev_id
FROM example_tbl;

db<> fiddle here

应该也表现不错。
也适用于您的扩展测试设置,但不适用于版本 11 之前的 Postgres。

The manual:

In GROUPS mode, the offset again must yield a non-null, non-negativeinteger, and the option means that the frame starts or ends thespecified number of peer groups before or after the current row's peergroup, where a peer group is a set of rows that are equivalent in theORDER BY ordering.

所选的框架定义 GROUPS BETWEEN 1 PRECEDING AND 1 PRECEDING 完全满足您的需求。

您的描述和测试数据似乎暗示 sidid 会同步变化。所以窗口框架只包含一个 id 值。 min()max() 也可以,但我选择了 first_value() 作为最便宜的。因为它适用于任何数据类型,即使 min()max() 会失败(如 json)。

此外,使用更短(并且稍微便宜)的“简单”或“转换”CASE。见:

对于 Postgres 9.5

这适用于您过时的 Postgres 版本 9.5:

SELECT e.*, p.prev_id
FROM example_tbl e
JOIN (
SELECT *
, lag(id) OVER (PARTITION BY tid
ORDER BY CASE pid
WHEN 157 THEN 1
WHEN 598 THEN 2
WHEN 118 THEN 3
WHEN 783 THEN 4
END, sid) AS prev_id
FROM (
SELECT DISTINCT ON (tid, pid, sid)
tid, pid, sid, id
FROM example_tbl
) dist
) p USING (tid, pid, sid);

db<> fiddle here

这是假设 tidpidsid 被定义为 NOT NULL。否则,您需要做更多。

  1. 使用 (tid, pid, sid) 的不同组合导出一个表。同样,id 只是一个负载列。关于 DISTINCT ON:

  2. 现在,带有默认窗口框架的 lag() 会完成从“前一”行获取 id 的工作。

  3. 加入完整的表格。

关于sql - 如何在 PostgreSQL 中移动整个组?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/65631321/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com