gpt4 book ai didi

sql-server-2008 - 在 SQL 中查找重复的系列

转载 作者:行者123 更新时间:2023-12-04 07:22:38 25 4
gpt4 key购买 nike

我有一个包含 3 列的表,其中包含基于第一列(外键)的可变数量的记录。我正在尝试确定是否可以检测到整个系列的多行是否存在重复项

declare @finddupseries table
(
portid int,
asset_id int,
allocation float
)
;
INSERT INTO @finddupseries
SELECT 250, 6, 0.05 UNION ALL
SELECT 250, 66, 0.8 UNION ALL
SELECT 250, 2, 0.105 UNION ALL
SELECT 250, 4, 0.0225 UNION ALL
SELECT 250, 5, 0.0225 UNION ALL
SELECT 251, 13, 0.6 UNION ALL
SELECT 251, 2, 0.3 UNION ALL
SELECT 251, 5, 0.1 UNION ALL
SELECT 252, 13, 0.8 UNION ALL
SELECT 252, 2, 0.15 UNION ALL
SELECT 252, 5, 0.05 UNION ALL
SELECT 253, 13, 0.4 UNION ALL
SELECT 253, 2, 0.45 UNION ALL
SELECT 253, 5, 0.15 UNION ALL
SELECT 254, 6, 0.05 UNION ALL
SELECT 254, 66, 0.8 UNION ALL
SELECT 254, 2, 0.105 UNION ALL
SELECT 254, 4, 0.0225 UNION ALL
SELECT 254, 5, 0.0225

select * from @finddupseries

portid 250 和 254 的记录匹配。

有什么方法可以编写查询来检测这一点吗?编辑:是的,整个系列必须匹配。此外,如果有一种方法可以确定它 DID 匹配哪一个,那将很有帮助,因为实际表有大约 10k 条记录。

谢谢!

最佳答案

此查询将为您提供转换为按 port_id 分组的字符串的所有值

SELECT fus1.portid, 
(
SELECT CONVERT (VARCHAR, fus2.asset_id) + CONVERT (VARCHAR, fus2.allocation) + ','
FROM @finddupseries fus2
WHERE 1=1
AND fus1.portid = fus2.portid
ORDER BY fus2.portid, fus2.asset_id, fus2.allocation
FOR XML PATH ('')
) AllValuesFromAllRows
FROM @finddupseries fus1
GROUP BY fus1.portid

输出应该是这样的

portid      AllValuesFromAllRows
----------- ------------------------------------------------------
250 20.105,40.0225,50.0225,60.05,660.8,
251 20.3,50.1,130.6,
252 20.15,50.05,130.8,
253 20.45,50.15,130.4,
254 20.105,40.0225,50.0225,60.05,660.8,

现在,让我们一起做个小组吧!

;With DuplicateFinder as
(
SELECT fus1.portid,
(
SELECT CONVERT (VARCHAR, fus2.asset_id) + CONVERT (VARCHAR, fus2.allocation) + ','
FROM @finddupseries fus2
WHERE 1=1
AND fus1.portid = fus2.portid
ORDER BY fus2.portid, fus2.asset_id, fus2.allocation
FOR XML PATH ('')
) AllValuesFromAllRows
FROM @finddupseries fus1
GROUP BY fus1.portid
)
SELECT AllValuesFromAllRows, COUNT (*) NumDups
FROM DuplicateFinder
GROUP BY AllValuesFromAllRows
Having COUNT (*) > 1

你应该得到

AllValuesFromAllRows                           NumDups
----------------------------------------------- -----------
20.105,40.0225,50.0225,60.05,660.8, 2





所有东西都放在一起了

SET NOCOUNT ON

declare @finddupseries table
(
portid int,
asset_id int,
allocation float
)
;
INSERT INTO @finddupseries
SELECT 250, 6, 0.05 UNION ALL
SELECT 250, 66, 0.8 UNION ALL
SELECT 250, 2, 0.105 UNION ALL
SELECT 250, 4, 0.0225 UNION ALL
SELECT 250, 5, 0.0225 UNION ALL
SELECT 251, 13, 0.6 UNION ALL
SELECT 251, 2, 0.3 UNION ALL
SELECT 251, 5, 0.1 UNION ALL
SELECT 252, 13, 0.8 UNION ALL
SELECT 252, 2, 0.15 UNION ALL
SELECT 252, 5, 0.05 UNION ALL
SELECT 253, 13, 0.4 UNION ALL
SELECT 253, 2, 0.45 UNION ALL
SELECT 253, 5, 0.15 UNION ALL
SELECT 254, 6, 0.05 UNION ALL
SELECT 254, 66, 0.8 UNION ALL
SELECT 254, 2, 0.105 UNION ALL
SELECT 254, 4, 0.0225 UNION ALL
SELECT 254, 5, 0.0225

;With PivotAssetIdAndAllocation as
(
SELECT fus1.portid,
(
SELECT CONVERT (VARCHAR, fus2.asset_id) + '_'+ CONVERT (VARCHAR, fus2.allocation) + '~~'
FROM @finddupseries fus2
WHERE 1=1
AND fus1.portid = fus2.portid
ORDER BY fus2.portid, fus2.asset_id, fus2.allocation
FOR XML PATH ('')
) AllValuesFromAllRows
FROM @finddupseries fus1
GROUP BY fus1.portid
)
,
ListOfDuplicates AS
(
SELECT AllValuesFromAllRows, COUNT (*) NumDups
FROM PivotAssetIdAndAllocation
GROUP BY AllValuesFromAllRows
Having COUNT (*) > 1
)
SELECT portid, AllValuesFromAllRows
FROM PivotAssetIdAndAllocation
WHERE AllValuesFromAllRows IN (SELECT AllValuesFromAllRows FROM ListOfDuplicates)

输出是

portid      AllValuesFromAllRows
----------- ----------------------------------------------------------------------
250 2_0.105~~4_0.0225~~5_0.0225~~6_0.05~~66_0.8~~
254 2_0.105~~4_0.0225~~5_0.0225~~6_0.05~~66_0.8~~

关于sql-server-2008 - 在 SQL 中查找重复的系列,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/4616785/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com