gpt4 book ai didi

delphi - 查找并替换大型文本文件中的文本 (Delphi XE5)

转载 作者:行者123 更新时间:2023-12-03 15:06:06 27 4
gpt4 key购买 nike

我正在尝试查找并替换文本文件中的文本。我过去已经能够通过以下方法做到这一点:

procedure SmallFileFindAndReplace(FileName, Find, ReplaceWith: string);
begin
with TStringList.Create do
begin
LoadFromFile(FileName);
Text := StringReplace(Text, Find, ReplaceWith, [rfReplaceAll, rfIgnoreCase]);
SaveToFile(FileName);
Free;
end;
end;

但是,当文件相对较小时,上述方法可以正常工作;当文件大小约为 170 Mb 时,上述代码将导致以下错误:EOutOfMemory 带有消息“内存不足”enter image description here

我已成功尝试以下操作,但运行时间较长:

procedure Tfrm_Main.button_MakeReplacementClick(Sender: TObject);
var
fs : TFileStream;
s : AnsiString;
//s : string;
begin
fs := TFileStream.Create(edit_SQLFile.Text, fmOpenread or fmShareDenyNone);
try
SetLength(S, fs.Size);
fs.ReadBuffer(S[1], fs.Size);
finally
fs.Free;
end;
s := StringReplace(s, edit_Find.Text, edit_Replace.Text, [rfReplaceAll, rfIgnoreCase]);
fs := TFileStream.Create(edit_SQLFile.Text, fmCreate);
try
fs.WriteBuffer(S[1], Length(S));
finally
fs.Free;
end;
end;

我是“Streams”和使用缓冲区的新手。

有更好的方法吗?

谢谢。

最佳答案

第一个代码示例中有两个错误,第二个示例中有三个错误:

  1. 不要将整个大文件加载到内存中,尤其是在 32 位应用程序中。如果文件大小超过 ~1 Gb,您总是会收到“内存不足”
  2. StringReplace 由于重复的内存重新分配,大型字符串的速度会变慢
  3. 在第二个代码中,您不在文件中使用文本编码,因此(对于 Windows)您的代码“认为”该文件具有 UCS2 编码(每个字符两个字节)。但是,如果文件编码是 Ansi(每个字符一个字节)或 UTF8(可变大小的字符),您会得到什么?

因此,为了正确查找和替换,您必须使用文件编码和读/写文件部分,正如 LU RD 所说:

interface

uses
System.Classes,
System.SysUtils;

type
TFileSearchReplace = class(TObject)
private
FSourceFile: TFileStream;
FtmpFile: TFileStream;
FEncoding: TEncoding;
public
constructor Create(const AFileName: string);
destructor Destroy; override;

procedure Replace(const AFrom, ATo: string; ReplaceFlags: TReplaceFlags);
end;

implementation

uses
System.IOUtils,
System.StrUtils;

function Max(const A, B: Integer): Integer;
begin
if A > B then
Result := A
else
Result := B;
end;

{ TFileSearchReplace }

constructor TFileSearchReplace.Create(const AFileName: string);
begin
inherited Create;

FSourceFile := TFileStream.Create(AFileName, fmOpenReadWrite);
FtmpFile := TFileStream.Create(ChangeFileExt(AFileName, '.tmp'), fmCreate);
end;

destructor TFileSearchReplace.Destroy;
var
tmpFileName: string;
begin
if Assigned(FtmpFile) then
tmpFileName := FtmpFile.FileName;

FreeAndNil(FtmpFile);
FreeAndNil(FSourceFile);

TFile.Delete(tmpFileName);

inherited;
end;

procedure TFileSearchReplace.Replace(const AFrom, ATo: string;
ReplaceFlags: TReplaceFlags);
procedure CopyPreamble;
var
PreambleSize: Integer;
PreambleBuf: TBytes;
begin
// Copy Encoding preamble
SetLength(PreambleBuf, 100);
FSourceFile.Read(PreambleBuf, Length(PreambleBuf));
FSourceFile.Seek(0, soBeginning);

PreambleSize := TEncoding.GetBufferEncoding(PreambleBuf, FEncoding);
if PreambleSize <> 0 then
FtmpFile.CopyFrom(FSourceFile, PreambleSize);
end;

function GetLastIndex(const Str, SubStr: string): Integer;
var
i: Integer;
tmpSubStr, tmpStr: string;
begin
if not(rfIgnoreCase in ReplaceFlags) then
begin
i := Pos(SubStr, Str);
Result := i;
while i > 0 do
begin
i := PosEx(SubStr, Str, i + 1);
if i > 0 then
Result := i;
end;
if Result > 0 then
Inc(Result, Length(SubStr) - 1);
end
else
begin
tmpStr := UpperCase(Str);
tmpSubStr := UpperCase(SubStr);
i := Pos(tmpSubStr, tmpStr);
Result := i;
while i > 0 do
begin
i := PosEx(tmpSubStr, tmpStr, i + 1);
if i > 0 then
Result := i;
end;
if Result > 0 then
Inc(Result, Length(tmpSubStr) - 1);
end;
end;

var
SourceSize: int64;

procedure ParseBuffer(Buf: TBytes; var IsReplaced: Boolean);
var
i: Integer;
ReadedBufLen: Integer;
BufStr: string;
DestBytes: TBytes;
LastIndex: Integer;
begin
if IsReplaced and (not(rfReplaceAll in ReplaceFlags)) then
begin
FtmpFile.Write(Buf, Length(Buf));
Exit;
end;

// 1. Get chars from buffer
ReadedBufLen := 0;
for i := Length(Buf) downto 0 do
if FEncoding.GetCharCount(Buf, 0, i) <> 0 then
begin
ReadedBufLen := i;
Break;
end;
if ReadedBufLen = 0 then
raise EEncodingError.Create('Cant convert bytes to str');

FSourceFile.Seek(ReadedBufLen - Length(Buf), soCurrent);

BufStr := FEncoding.GetString(Buf, 0, ReadedBufLen);
if rfIgnoreCase in ReplaceFlags then
IsReplaced := ContainsText(BufStr, AFrom)
else
IsReplaced := ContainsStr(BufStr, AFrom);

if IsReplaced then
begin
LastIndex := GetLastIndex(BufStr, AFrom);
LastIndex := Max(LastIndex, Length(BufStr) - Length(AFrom) + 1);
end
else
LastIndex := Length(BufStr);

SetLength(BufStr, LastIndex);
FSourceFile.Seek(FEncoding.GetByteCount(BufStr) - ReadedBufLen, soCurrent);

BufStr := StringReplace(BufStr, AFrom, ATo, ReplaceFlags);
DestBytes := FEncoding.GetBytes(BufStr);
FtmpFile.Write(DestBytes, Length(DestBytes));
end;

var
Buf: TBytes;
BufLen: Integer;
bReplaced: Boolean;
begin
FSourceFile.Seek(0, soBeginning);
FtmpFile.Size := 0;
CopyPreamble;

SourceSize := FSourceFile.Size;
BufLen := Max(FEncoding.GetByteCount(AFrom) * 5, 2048);
BufLen := Max(FEncoding.GetByteCount(ATo) * 5, BufLen);
SetLength(Buf, BufLen);

bReplaced := False;
while FSourceFile.Position < SourceSize do
begin
BufLen := FSourceFile.Read(Buf, Length(Buf));
SetLength(Buf, BufLen);
ParseBuffer(Buf, bReplaced);
end;

FSourceFile.Size := 0;
FSourceFile.CopyFrom(FtmpFile, 0);
end;

使用方法:

procedure TForm2.btn1Click(Sender: TObject);
var
Replacer: TFileSearchReplace;
StartTime: TDateTime;
begin
StartTime:=Now;
Replacer:=TFileSearchReplace.Create('c:\Temp\123.txt');
try
Replacer.Replace('some текст', 'some', [rfReplaceAll, rfIgnoreCase]);
finally
Replacer.Free;
end;

Caption:=FormatDateTime('nn:ss.zzz', Now - StartTime);
end;

关于delphi - 查找并替换大型文本文件中的文本 (Delphi XE5),我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/35446733/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com