gpt4 book ai didi

Delphi IFilter 实现

转载 作者:行者123 更新时间:2023-12-03 15:27:58 27 4
gpt4 key购买 nike

我需要在 Delphi 2010 中实现一个 IFilter,它可以搜索 Office 2007 docx 文件并返回在文档中找到的文本。

ifilter还需要使用IPersistStream接口(interface)。

谢谢

最佳答案

您不想实现一个IFilter来解析Office 2007 docx。您想使用Microsoft's already written IFilter objects ,以便您可以了解 docx 文件的内容。

然后使用标准的 IFilter 机制来解析文件内容:

procedure TForm1.ProcessFile(filename: string);
var
Filter: IFilter;
hr: HRESULT;
chunk: PSTAT_CHUNK;
// attr: FULLPROPSPEC;
flags: ULONG;
c: Cardinal;
buffer: WideString;
begin
Log('Processing "'+filename+'"');

Log('Calling LoadIFilter');
filter := LoadIFilter(filename);
if filter = nil then
begin
Log('filter is null; leaving');
Exit;
end;
try
Log('Calling filter.Init(IFILTER_INIT_INDEXING_ONLY)');
hr := filter.Init(IFILTER_INIT_INDEXING_ONLY, 0, nil, flags);
OleCheck(hr);

Log('Init returned sucessfully, looking for chunks...');
while True do
begin
New(chunk);
try
hr := filter.GetChunk(chunk);
if Failed(hr) then
begin
Log('No more chunks: '+IntToHex(hr, 8)+' ('+GetChunkHresultToStr(hr)+')');
Break;
end;

Log('== Got chunk. ChunkType='+IntToStr(chunk.flags)+' (1=text, 2=value) ==');

if (chunk.Flags and CHUNK_TEXT) = CHUNK_TEXT then
begin
c := 2048;
SetLength(buffer, c);
hr := filter.GetText(c, PWideChar(buffer));
if Succeeded(hr) then
begin
Log('=== Got text ===');
SetLength(buffer, c);
Log(buffer);

while Succeeded(hr) do
begin
c := 2048;
SetLength(buffer, c);
hr := filter.GetText(c, PWideChar(buffer));
if Succeeded(hr) then
begin
SetLength(buffer, c);
Log('==== Really long chunk, here''s the next 2048 characters ====');
Log(buffer);
end;
end;
end
else
begin
Log('Could not get text from chunk: '+IntToHex(hr, 8)+' ('+GetChunkHResultToStr(hr)+')');
Log(' It might be a "Value" chunk, meaning i should call filter.GetValue rather than filter.GetText. But i''m too lazy');
end;
end
else if (chunk.flags and CHUNK_VALUE) = CHUNK_VALUE then
begin
Log('This is a "VALUE" chunk. i''m not going to read anything out of it cause it''s too hard :(');
end
else
Log('Unknown chunk type');
finally
Dispose(chunk);
end;
end; //end while true getting chunks
finally
filter := nil;
end;
end;

Windows 已经提供了为指定文件名加载 IFilter 的代码:

function TForm1.LoadIFilter(const filename: WideString): IFilter;
var
hr: HRESULT;
unk: IUnknown;
begin
hr := ntQuery.LoadIFilter(PWideChar(filename), nil, unk);
OleCheck(hr);

Result := unk as IFilter;
end;

IFilter 声明单元:

unit Filter;

interface

uses
Windows, SysUtils, Classes, ActiveX;

type
IFILTER_INIT = TOleEnum;
const
IFILTER_INIT_CANON_PARAGRAPHS = 1;
IFILTER_INIT_HARD_LINE_BREAKS = 2;
IFILTER_INIT_CANON_HYPHENS = 4;
IFILTER_INIT_CANON_SPACES = 8;
IFILTER_INIT_APPLY_INDEX_ATTRIBUTES = 16;
IFILTER_INIT_APPLY_OTHER_ATTRIBUTES = 32;
IFILTER_INIT_INDEXING_ONLY = 64;
IFILTER_INIT_SEARCH_LINKS = 128;

type
IFILTER_FLAGS = TOleEnum;
const
IFILTER_FLAGS_OLE_PROPERTIES = 1;

type
CHUNKSTATE = TOleEnum;
const
CHUNK_TEXT = $01;
CHUNK_VALUE = $02;

type
CHUNK_BREAKTYPE = TOleEnum;
const
CHUNK_NO_BREAK = 0;
CHUNK_EOW = 1;
CHUNK_EOS = 2;
CHUNK_EOP = 3;
CHUNK_EOC = 4;

type
FILTERREGION = packed record
idChunk: ULONG;
cwcStart: ULONG;
cwcExtent: ULONG;
end;
tagFILTERREGION = FILTERREGION;


const
PRSPEC_LPWSTR = 0;
PRSPEC_PROPID = 1;

type
PROPID = ULONG;

type
PROPSPEC = packed record
ulKind: ULONG;
case integer of
0: (prid: PROPID);
1: (lpws: PWideChar);
end;
tagPROPSPEC = PROPSPEC;

type
FULLPROPSPEC = packed record
guidPropSet: TGUID;
psProperty: PROPSPEC;
end;
tagFULLPROPSPEC = FULLPROPSPEC;
PFULLPROPSPEC = ^FULLPROPSPEC;

type
STAT_CHUNK = packed record
idChunk: ULONG;
breakType: CHUNK_BREAKTYPE;
flags: CHUNKSTATE;
locale: LCID;
attribute: FULLPROPSPEC;
idChunkSource: ULONG;
cwcStartSource: ULONG;
cwcLenSource: ULONG;
end;
tagSTAT_CHUNK = STAT_CHUNK;
PSTAT_CHUNK = ^STAT_CHUNK;

// From filtererr.h
const
FILTER_E_END_OF_CHUNKS = HRESULT($80041700);

//
// MessageId: FILTER_E_NO_MORE_TEXT
//
// MessageText:
//
// No more text available in chunk.
//
const
FILTER_E_NO_MORE_TEXT = HRESULT($80041701);

//
// MessageId: FILTER_E_NO_MORE_VALUES
//
// MessageText:
//
// No more property values available in chunk.
//
const
FILTER_E_NO_MORE_VALUES = HRESULT($80041702);

//
// MessageId: FILTER_E_ACCESS
//
// MessageText:
//
// Unable to access object.
//
const
FILTER_E_ACCESS = HRESULT($80041703);

//
// MessageId: FILTER_W_MONIKER_CLIPPED
//
// MessageText:
//
// Moniker doesn't cover entire region.
//
const
FILTER_W_MONIKER_CLIPPED = HRESULT($80041704);

//
// MessageId: FILTER_E_NO_TEXT
//
// MessageText:
//
// No text in current chunk.
//
const
FILTER_E_NO_TEXT = HRESULT($80041705);

//
// MessageId: FILTER_E_NO_VALUES
//
// MessageText:
//
// No values in current chunk.
//
const
FILTER_E_NO_VALUES = HRESULT($80041706);

//
// MessageId: FILTER_E_EMBEDDING_UNAVAILABLE
//
// MessageText:
//
// Unable to bind IFilter for embedded object.
//
const
FILTER_E_EMBEDDING_UNAVAILABLE = HRESULT($80041707);

//
// MessageId: FILTER_E_LINK_UNAVAILABLE
//
// MessageText:
//
// Unable to bind IFilter for linked object.
//
const
FILTER_E_LINK_UNAVAILABLE = HRESULT($80041708);

//
// MessageId: FILTER_S_LAST_TEXT
//
// MessageText:
//
// This is the last text in the current chunk.
//
const
FILTER_S_LAST_TEXT = HRESULT($00041709);

//
// MessageId: FILTER_S_LAST_VALUES
//
// MessageText:
//
// This is the last value in the current chunk.
//
const
FILTER_S_LAST_VALUES = HRESULT($0004170A);

//
// MessageId: FILTER_E_PASSWORD
//
// MessageText:
//
// File was not filtered due to password protection.
//
const
FILTER_E_PASSWORD = HRESULT($8004170B);

//
// MessageId: FILTER_E_UNKNOWNFORMAT
//
// MessageText:
//
// The document format is not recognized by the flter.
//
const
FILTER_E_UNKNOWNFORMAT = HRESULT($8004170C);


const
IID_IFilter: TGUID = '{89BCB740-6119-101A-BCB7-00DD010655AF}';

type
IFilter = interface(IUnknown)
['{89BCB740-6119-101A-BCB7-00DD010655AF}']
function Init(grfFlags: ULONG; cAttributes: ULONG; aAttributes: PFULLPROPSPEC; out pFlags: ULONG): HResult; stdcall;
function GetChunk(pStat: PSTAT_CHUNK): HResult; stdcall;
function GetText(var pcwcBuffer: ULONG; awcBuffer: PWideChar): HResult; stdcall;
function GetValue(out ppPropValue: PROPVARIANT): HResult; stdcall;
function BindRegion(origPos: FILTERREGION; riid: TGUID; out ppUnk): HResult; stdcall;
end;

implementation

end.

关于Delphi IFilter 实现,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/3251935/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com