gpt4 book ai didi

xml - 如何使用XML/SGML实体将UTF-16转换为ASCII/ANSI?

转载 作者:行者123 更新时间:2023-12-03 18:47:13 26 4
gpt4 key购买 nike

XML文件:

<?xml version="1.0" encoding="utf-8"?>
<response>
<center>
<b>Need to decode this -> 😉</b>
</center>
</response>


我当前的代码:

procedure TForm1.Button1Click(Sender: TObject);
var
Doc: IXMLDocument;
S: AnsiString;
SW: WideString;
I: Integer;
begin
Doc := TXMLDocument.Create(nil);
Doc.LoadFromFile('example.xml');
SW := Doc.DocumentElement.ChildNodes['center'].ChildNodes['b'].NodeValue;
S := '';
for I := 1 to Length(SW) do
if Ord(SW[I]) > $04FF then
S := S + IntToHex(Ord(SW[I]), 4) + ' '
else
S := S + SW[I];
Memo1.Text := s;
end;


SW以UTF-16(宽字符串)编码,并且包含字符序列 #$D83D#$DE09,但是我需要像 '&#128521;'这样的XML / SGML实体来使用它。我该如何编码?

使用的字符是这样的: http://www.fileformat.info/info/unicode/char/1f609/index.htm

最佳答案

使用ANSI Delphi时,您必须手动处理UTF-16代理对(或使用某些第三方库)。

这应该在ANSI和Unicode Delphi中工作:

uses
{$IFDEF UNICODE}
Xml.XMLDoc, Xml.XMLIntf, System.AnsiStrings, System.Character;
{$ELSE}
XMLDoc, XMLIntf;
{$ENDIF}

{$R *.dfm}

type
{$IFDEF UNICODE}
ValueString = UnicodeString;
{$ELSE}
ValueString = WideString;
{$ENDIF}

procedure Check(ATrue: Boolean; const AMessage: string);
begin
if not ATrue then
raise Exception.Create(AMessage);
end;

function IsHighSurrogate(AChar: WideChar): Boolean;
begin
{$IFDEF UNICODE}
Result := TCharacter.IsHighSurrogate(AChar);
{$ELSE}
Result := (AChar >= #$D800) and (AChar <= #$DBFF);
{$ENDIF}
end;

function ConvertToUtf32(AHigh, ALow: WideChar): Integer;
begin
{$IFDEF UNICODE}
Result := Ord(TCharacter.ConvertToUtf32(AHigh, ALow));
{$ELSE}
Check(AHigh >= #$D800, 'Invalid high surrogate code point');
Check(AHigh <= #$DBFF, 'Invalid high surrogate code point');
Check(ALow >= #$DC00, 'Invalid low surrogate code point');
Check(ALow <= #$DFFF, 'Invalid low surrogate code point');
// This will return the ordinal value of the Unicode character represented by the two surrogate code points
Result := $010000 + ((Ord(AHigh) - $D800) shl 10) or (Ord(ALow) - $DC00);
{$ENDIF}
end;

function MakeEntity(AValue: Integer): AnsiString;
begin
Result := Format(AnsiString('&#%d;'), [AValue]);
end;

function UnicodeToAsciiWithEntities(const AInput: ValueString): AnsiString;
var
C: WideChar;
I: Integer;
begin
Result := '';
I := 1;
while I <= Length(AInput) do
begin
C := AInput[I];
if C < #$0080 then
Result := Result + AnsiChar(C)
else
if IsHighSurrogate(C) then
begin
Check((I + 1) <= Length(AInput), 'String truncated after high surrogate');
Result := Result + MakeEntity(ConvertToUtf32(C, AInput[I + 1]));
// Skip low surrogate
Inc(I);
end
else
Result := Result + MakeEntity(Ord(C));
Inc(I);
end;
end;

procedure TForm1.Button1Click(Sender: TObject);
begin
Memo1.Lines.Text := string(UnicodeToAsciiWithEntities(LoadXMLDocument(
'example.xml').DocumentElement.ChildNodes['center'].ChildNodes['b'].NodeValue
));
end;


我这里没有Delphi 7,因此可能需要进行一些细微的调整,该代码在XE2和2007中有效。

关于xml - 如何使用XML/SGML实体将UTF-16转换为ASCII/ANSI?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/26111641/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com