gpt4 book ai didi

sql - Jaro Winkler 距离算法在 Transact SQL 中的实现

转载 作者:塔克拉玛干 更新时间:2023-11-03 05:43:58 24 4
gpt4 key购买 nike

几个月来我一直在想如何在 Transact SQL 中实现这个算法,https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance

如何实现?

最佳答案

今天我终于偶然发现了this leebickmtu 的 Stack Overflow-answer 显示了 C# 中的实现,最初是从 Java 移植的。我冒昧地将它移植到 Transact SQL 函数,尽情享受吧!

IF OBJECT_ID (N'dbo.InlineMax', N'FN') IS NOT NULL
DROP FUNCTION dbo.InlineMax;
GO

CREATE FUNCTION dbo.InlineMax(@valueOne int, @valueTwo int)
RETURNS FLOAT
AS
BEGIN
IF @valueOne > @valueTwo
BEGIN
RETURN @valueOne
END

RETURN ISNULL(@valueTwo, @valueOne)
END;
GO

IF OBJECT_ID (N'dbo.InlineMin', N'FN') IS NOT NULL
DROP FUNCTION dbo.InlineMin;
GO

CREATE FUNCTION dbo.InlineMin(@valueOne int, @valueTwo int)
RETURNS FLOAT
AS
BEGIN
IF @valueOne < @valueTwo
RETURN @valueOne

RETURN @valueTwo
END;
GO

IF OBJECT_ID (N'dbo.JaroWinklerDistance', N'FN') IS NOT NULL
DROP FUNCTION dbo.JaroWinklerDistance;
GO

CREATE FUNCTION dbo.JaroWinklerDistance(@stringOne varchar(MAX), @stringTwo varchar(MAX))
RETURNS FLOAT
WITH EXECUTE AS CALLER
AS
BEGIN
DECLARE @mWeightThreshold FLOAT; SET @mWeightThreshold = 0.7;
DECLARE @mNuMChars INT; SET @mNumChars = 4;
DECLARE @lLen1 int; SET @lLen1 = LEN(@stringOne)
DECLARE @lLen2 int; SET @lLen2 = LEN(@stringTwo)

IF @lLen1 = 0
RETURN CASE WHEN @lLen2 = 0 THEN 1 ELSE 0 END

DECLARE @lSearchRange int; SET @lSearchRange = dbo.InlineMax(0,dbo.InlineMax(@lLen1, @lLen2)/2 - 1);

DECLARE @lMatched1 TABLE (position int not null, [status] bit not null)
DECLARE @lMatched2 TABLE (position int not null, [status] bit not null)

DECLARE @lNumCommon int; SET @lNumCommon = 0
DECLARE @i int; SET @i = 1; WHILE(@i <= @lLen1)
BEGIN
DECLARE @lStart int; SET @lStart = dbo.InlineMax(1, @i - @lSearchRange)
DECLARE @lEnd int; SET @lEnd = dbo.InlineMin(@i + @lSearchRange + 1, @lLen2)

DECLARE @j int; SET @j = @lStart; WHILE(@j <= @lEnd)
BEGIN
IF((SELECT [status] FROM @lMatched2 WHERE position = @j) = 1)
BEGIN
SET @j = @j + 1
CONTINUE
END

IF (SELECT SUBSTRING(@stringOne, @i, 1)) <> (SELECT SUBSTRING(@stringTwo, @j, 1))
BEGIN
SET @j = @j + 1
CONTINUE
END

INSERT INTO @lMatched1 (position, [status]) VALUES(@i, 1)
INSERT INTO @lMatched2 (position, [status]) VALUES(@j, 1)

SET @lNumCommon = @lNumCommon + 1
BREAK
END

SET @i = @i + 1
END

IF @lNumCommon = 0
BEGIN
RETURN 0.0;
END

DECLARE @lNumHalfTransposed int; SET @lNumHalfTransposed = 0
DECLARE @k INT; SET @k = 1;
DECLARE @stopLoop bit; SET @stopLoop = 0;
SET @i = 1; WHILE(@i <= @lLen1)
BEGIN
IF ((SELECT [status] FROM @lMatched1 WHERE position = @i) = 1)
BEGIN
SET @i = @i + 1
CONTINUE;
END

WHILE(@stopLoop = 0)
BEGIN
IF((SELECT [status] FROM @lMatched2 WHERE position = @k) = 0)
SET @k = @k + 1
ELSE
BREAK

IF((SELECT SUBSTRING(@stringOne, @i, 1)) <> (SELECT SUBSTRING(@stringTwo, @k, 1)))
SET @lNumHalfTransposed = @lNumHalfTransposed + 1

SET @k = @k + 1
END

SET @i = @i + 1
END

DECLARE @lNumTransposed INT; SET @lNumTransposed = @lNumHalfTransposed/2;

DECLARE @lNumCommonD FLOAT; SET @lNumCommonD = @lNumCommon
DECLARE @lWeight FLOAT; SET @lWeight = (@lNumCommonD / @lLen1 + @lNumCommonD / @lLen2 + (@lNumCommon - @lNumTransposed) / @lNumCommonD) / 3.0;

IF(@lWeight <= @mWeightThreshold)
RETURN @lWeight
DECLARE @lMax INT; SET @lMax = dbo.InlineMin(@mNumChars, dbo.InlineMin(@lLen1, @lLen2))
DECLARE @lPos INT; SET @lPos = 0
WHILE(@lPos < @lMax AND (SELECT SUBSTRING(@stringOne, @lPos, 1)) = (SELECT SUBSTRING(@stringTwo, @lPos, 1)))
BEGIN
SET @lPos = @lPos + 1
END

IF @lPos = 0
RETURN @lWeight

RETURN @lWeight + 0.1 * @lPos * (1.0 - @lWeight)
END;
GO

关于sql - Jaro Winkler 距离算法在 Transact SQL 中的实现,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/34879176/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com