gpt4 book ai didi

Delphi - 比较和标记字符串差异

转载 作者:行者123 更新时间:2023-12-03 18:34:20 25 4
gpt4 key购买 nike

我需要做的是比较两个字符串并用开始/结束标记标记差异以进行更改。例子:

this is string number one.
this string is string number two.

output would be

this [is|string is] string number [one|two].  

I've been trying to figure this out for some time now. And I found something I blieved would help me do this, but I am unable to make this happen.
http://www.angusj.com/delphi/textdiff.html

I have it about 80% working here, but I've got no idea how to get it to do exactly what I want it to do. Any help would be appreciated.


uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, Diff, StdCtrls;

type
TForm2 = class(TForm)
Edit1: TEdit;
Edit2: TEdit;
Button1: TButton;
Memo1: TMemo;
Diff: TDiff;
procedure Button1Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;

var
Form2: TForm2;
s1,s2:string;

implementation

{$R *.dfm}

procedure TForm2.Button1Click(Sender: TObject);
var
i: Integer;
lastKind: TChangeKind;

procedure AddCharToStr(var s: string; c: char; kind, lastkind: TChangeKind);
begin
if (kind lastkind) AND (lastkind = ckNone) and (kind ckNone) then s:=s+'[';
if (kind lastkind) AND (lastkind ckNone) and (kind = ckNone) then s:=s+']';
case kind of
ckNone: s := s + c;
ckAdd: s := s + c;
ckDelete: s := s + c;
ckModify: s := s + '|' + c;
end;
end;

begin
Diff.Execute(pchar(edit1.text), pchar(edit2.text), length(edit1.text), length(edit2.text));

//now, display the diffs ...
lastKind := ckNone;
s1 := ''; s2 := '';
form2.caption:= inttostr(diff.Count);
for i := 0 to Diff.count-1 do
begin

with Diff.Compares[i] do
begin
//show changes to first string (with spaces for adds to align with second string)
if Kind = ckAdd then
begin
AddCharToStr(s1,' ',Kind, lastKind);
end
else
AddCharToStr(s1,chr1,Kind,lastKind);
if Kind = ckDelete then
begin
AddCharToStr(s2,' ',Kind, lastKind)
end
else AddCharToStr(s2,chr2,Kind,lastKind);

lastKind := Kind;
end;
end;
memo1.Lines.Add(s1);
memo1.Lines.Add(s2);
end;

end.

我从 angusj.com 获取了 basicdemo1 并对其进行了修改以实现这一目标。

最佳答案

要解决您描述的问题,您基本上必须做一些类似于 biological sequence alignment 中所做的事情。 DNA 或蛋白质数据。如果您只有两个字符串(或一个常量引用字符串),可以通过 dynamic programming基于成对对齐算法,例如 Needleman Wunsch algorithm * 及相关算法。 (多序列比对变得更加复杂。)

[* 编辑:链接应该是:http://en.wikipedia.org/wiki/Needleman –Wunsch_algorithm ]

编辑 2:

由于您似乎对在单词而不是字符级别进行比较感兴趣,因此您可以 (1) 将输入字符串拆分为字符串数组,其中每个数组元素代表一个单词,然后 (2) 在级别执行对齐这些话。这样做的好处是对齐的搜索空间变得更小,因此您希望它总体上更快。我已经相应地改编并“补充”了维基百科文章中的伪代码示例:


program AlignWords;



{$APPTYPE CONSOLE}



function MaxChoice (C1, C2, C3: integer): integer;
begin
Result:= C1;
if C2 > Result then Result:= C2;
if C3 > Result then Result:= C3;
end;



function WordSim (const S1, S2: String): integer; overload;
//Case-sensitive!
var i, l1, l2, minL: integer;
begin
l1:= length(S1);
l2:= length(S2);
Result:= l1-l2;
if Result > 0 then Result:= -Result;
if (S1='') or (S2='') then exit;
minL:= l1;
if l2 < l1 then minL:= l2;
for i := 1 to minL do if S1[i]<>S2[i] then dec(Result);
end;



procedure AlignWordsNW (const A, B: Array of String; GapChar: Char; const Delimiter: ShortString; GapPenalty: integer; out AlignmentA, AlignmentB: string);
// Needleman-Wunsch alignment
// GapPenalty should be a negative value!
var
F: array of array of integer;
i, j,
Choice1, Choice2, Choice3,
Score, ScoreDiag, ScoreUp, ScoreLeft :integer;
function GapChars (const S: String): String;
var i: integer;
begin
assert (length(S)>0);
Result:='';
for i := 0 to length(S) - 1 do Result:=Result + GapChar;
end;
begin
SetLength (F, length(A)+1, length(B)+1);
for i := 0 to length(A) do F[i,0]:= GapPenaltyi;
for j := 0 to length(B) do F[0,j]:= GapPenalty
j;
for i:=1 to length(A) do begin
for j:= 1 to length(B) do begin
Choice1:= F[i-1,j-1] + WordSim(A[i-1], B[j-1]);
Choice2:= F[i-1, j] + GapPenalty;
Choice3:= F[i, j-1] + GapPenalty;
F[i,j]:= maxChoice (Choice1, Choice2, Choice3);
end;
end;
AlignmentA:= '';
AlignmentB:= '';
i:= length(A);
j:= length(B);
while (i > 0) and (j > 0) do begin
Score := F[i,j];
ScoreDiag:= F[i-1,j-1];
ScoreUp:= F[i,j-1];
ScoreLeft:= F[i-1,j];
if Score = ScoreDiag + WordSim(A[i-1], B[j-1]) then begin
AlignmentA:= A[i-1] + Delimiter + AlignmentA;
AlignmentB:= B[j-1] + Delimiter + AlignmentB;
dec (i);
dec (j);
end else if Score = ScoreLeft + GapPenalty then begin
AlignmentA:= A[i-1] + Delimiter + AlignmentA;
AlignmentB:= GapChars (A[i-1]) + Delimiter + AlignmentB;
dec(i);
end else begin
assert (Score = ScoreUp + GapPenalty);
AlignmentA:= GapChars(B[j-1]) + Delimiter + AlignmentA;
AlignmentB:= B[j-1] + Delimiter + AlignmentB;
dec (j);
end;
end;
while (i > 0) do begin
AlignmentA:= A[i-1] + Delimiter + AlignmentA;
AlignmentB:= GapChars(A[i-1]) + Delimiter + AlignmentB;
dec(i);
end;
while (j > 0) do begin
AlignmentA:= GapChars(B[j-1]) + Delimiter + AlignmentA;
AlignmentB:= B[j-1] + Delimiter + AlignmentB;
dec(j);
end;
end;



Type
TStringArray = Array Of String;



Var
as1, as2: TStringArray;
s1, s2: string;



BEGIN
as1:= TStringArray.create ('this','is','string','number','one.');
as2:= TStringArray.Create ('this','string','is','string','number','two.');



AlignWordsNW (as1, as2, '-',' ',-1, s1,s2);
writeln (s1);
writeln (s2);

结尾。

这个例子的输出是

这 ------ 是字符串编号---- 一个。
这个字符串是第二个字符串。 ----

它并不完美,但你明白了。从这种输出中,你应该能够做你想做的事。请注意,您可能需要调整 GapPenalty和相似度评分函数WordSim以满足您的需求。

关于Delphi - 比较和标记字符串差异,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/2141383/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com