gpt4 book ai didi

java - flexicapture 处理器只识别文档的第一页

转载 作者:行者123 更新时间:2023-11-30 09:07:20 25 4
gpt4 key购买 nike

我正在使用 flexicapture 处理器来识别我的文档。我有一个案例,我有一个包含多个页面的文档,即一个文档有多个图像,每个图像都需要识别。

我正在按照以下过程来完成我的一般任务,无论是文档中的一张图像还是文档中的多张图像;

  1. 创建处理器
  2. 添加文档定义文件或afl文件
  3. 运行识别为 IDocument document = processor.RecognizeNextDocument();但是当它返回一个文档,文档,它只有一页,也就是文档的第一页,为什么会这样呢?

在另一种情况下,如果我使用项目而不是处理器,IProject,程序如下

  1. 创建一个项目
  2. 从项目 project.getBatches() 获取批处理,
  3. 批量添加一个文档(多页)
  4. 认出他们我有文档的所有页面信息,IDocuments documents = batch.getDocuments(),

如何使用处理器完成相同的任务?我希望处理器识别文档中的所有页面并返回包含所有页面的文档。 ?

如果有什么不清楚的地方,请询问更多信息。请尽快回复...代码 :1 使用 flexicapture 处理器/** * */

/** * @author 尼廷 * */

import java.sql.BatchUpdateException;

import com.abbyy.FCEngine.*;

public class FlexicaputreVerificationUsingProcessor {
private static Object verificationWorkSet(Object object) {
// TODO Auto-generated method stub
return null;
}
private static void trace( String txt )
{
System.out.println( txt );
}

static private String samplesFolder;
static private String projectFolder;
static private String serialNumber;
static private String dllPath;

static {

samplesFolder = "C:\\ProgramData\\ABBYY\\SDK\\10\\FlexiCapture Engine\\Samples\\";
projectFolder = "C:\\Users\\Nitin\\FlexicaptureTest\\flexiverificationtest" ;

try {

java.io.FileInputStream file = new java.io.FileInputStream( samplesFolder + "SampleConfig\\SamplesConfig.txt" );

java.io.BufferedReader reader = new java.io.BufferedReader( new java.io.InputStreamReader( file ) );

serialNumber = reader.readLine();

dllPath = reader.readLine();

file.close();

} catch( java.io.IOException e ) {
System.out.println( e.getMessage() );
e.printStackTrace();
}
}


/**
* @param args
*/
public static void main(String[] args) {

// Load Engine
try {
trace("Loading engine");
IEngineLoader engineLoader= Engine.CreateEngineOutprocLoader();
IEngine engine = engineLoader.Load(serialNumber,dllPath);

try {

// Create and configure FlexiCaptureProcessor
trace("Creating and configureing FlexiCaptureProcessor");
IFlexiCaptureProcessor processor = engine.CreateFlexiCaptureProcessor();
processor.AddDocumentDefinitionFile( projectFolder + "\\Document_Definition_1.fcdot" );

trace("Adding images/pdf to processor");
final int fileCount = 1 ;

processor.AddImageFile(projectFolder + "\\don't upload to big .pdf");

engine.EnableRecognitionVariants( true );

trace("Creating Document collection");
IDocumentsCollection documentsCollection = engine.CreateDocumentsCollection();

trace( "Reconizing Images/pdfs..." );
int totalErrors = 0 ;
for ( int iterator = 0 ; iterator<fileCount; iterator++ ){
trace("Recongnizing image/pdf number: " +(iterator+1));
IDocument document = processor.RecognizeNextDocument();

trace("Getting last processing error for checksum");
IProcessingError lastProcessingError = processor.GetLastProcessingError() ;

if ( lastProcessingError !=null)
{
String errormsg = lastProcessingError.MessageText();
totalErrors++;

trace("Error occured while recognizeing document, Document number: "+(iterator+1)+ " with Error msg: "+errormsg);
//since we are not handling error (right now) so moving to next document for recognization
processor.ResumeProcessing(false);

}else {
trace("No error occured while recognization of document number : "+(iterator+1));
}
trace("Adding documents in Documents collection");
documentsCollection.Add(document);

}

if ( totalErrors == fileCount){
trace("Facing Error for all document while recongnization");
return ;
}

trace("Creaing Verification session");

try {

IVerificationSession verificationSession = engine.CreateVerificationSession(documentsCollection) ;

try {

//enabling context verification
verificationSession.getOptions().setVerifyFields(true);

//disabling group verification
verificationSession.getOptions().setVerifyBaseSymbols(false);
verificationSession.getOptions().setVerifyExtraSymbols(false);

try {
trace("Get NextWork Set");
IVerificationWorkSet verificationWorkSet = verificationSession.NextWorkSet();

if ( verificationWorkSet == null){
trace("first verificationWork set is null");
}else {

//process each work set in Verification session
trace("Processing Work Set");
while ( verificationWorkSet != null ){

try{
trace("Geting Verification group");
//get next group for verification
IVerificationGroup verificationGroup = verificationWorkSet.NextGroup();

if ( verificationGroup == null ){
trace("First verification group is null");
}else {
trace("processing each group of a workset");
//processing each group of a work set
while ( verificationGroup!= null){
int verificationObjectInAGroupCount = verificationGroup.getCount();
trace("Total number of verification object: " +verificationObjectInAGroupCount);

for ( int iterator = 0; iterator<verificationObjectInAGroupCount; iterator++){
trace ( "getting and Processing "+(iterator +1 ) + " verification object of A group");

//getting verification object
IVerificationObject verificationObject = verificationGroup.getElement(iterator);
if ( verificationObject == null){
trace("verification object is null");
}else {
if ( verificationObject.getType() == VerificationObjectTypeEnum.VOT_Group ) {
IGroupVerificationObject groupVerificationObject = verificationObject.AsGroupVerificationObject();

if ( groupVerificationObject == null){
System.out.println("group verification object is null ");
}


}else if ( verificationObject.getType() == VerificationObjectTypeEnum.VOT_Context) {
IContextVerificationObject contextVerificationObject = verificationObject.AsContextVerificationObject();




if ( contextVerificationObject == null){
trace("ContextVerification object is null");
}else {
IField field = contextVerificationObject.getField();
if ( field == null){
trace("field getting null");
}else {
System.out.println(" field full name: " +field.getFullName() + "\n Name: " +field.getName());

IFieldValue fieldValue = field.getValue();
if ( fieldValue == null){
trace("Field Value is Null");
}else {

trace ( "getting text from field value");
IText text = fieldValue.getAsText() ;
if ( text == null){
trace("text getting null in field value");
}else {


int wordCount = text.getRecognizedWordsCount() ;
trace("recognized word count: "+wordCount);

//getting words from text
for ( int wordIndex = 0 ; wordIndex<wordCount; wordIndex++ ){
trace ("processing word number :" +wordIndex);

IRecognizedWordInfo recognizedWordInfo = engine.CreateRecognizedWordInfo() ;

if ( recognizedWordInfo == null){
trace("Can't create recognizedWordInfo object using engine");
}else {
text.GetRecognizedWord(wordIndex, -1, recognizedWordInfo);

//getting characters from word
for (int characterIndex = 0 ; characterIndex<recognizedWordInfo.getText().length(); characterIndex++ ){
trace("processing character number : " +characterIndex);

IRecognizedCharacterInfo recognizedCharacterInfo = engine.CreateRecognizedCharacterInfo();
if ( recognizedCharacterInfo == null) {
trace("can't create recognizedCharacterInfo object");
}else {
recognizedWordInfo.GetRecognizedCharacter(characterIndex, -1, recognizedCharacterInfo);

System.out.println(" Character: " + recognizedCharacterInfo.getCharacter());
System.out.println(" Confidence level : " +recognizedCharacterInfo.getCharConfidence());
}
}
}
}
}
System.out.println(" Field Value : " +fieldValue.getAsString());
}
}
}
}
}
}

trace("Geting next Verification group");
verificationGroup = verificationWorkSet.NextGroup();
}

}

}catch (Exception e){
trace("Exception occured in getting next work group");
e.printStackTrace();
}

trace("Get next worksets");
//get next work set
verificationWorkSet = verificationSession.NextWorkSet();

}
}

}catch (Exception e){

e.printStackTrace();
}


}finally {
trace("closing Verification object");
verificationSession.Close();
}

} catch (Exception e) {
trace("Exception occured in creating verification sessions");
}



}catch (Exception e){
trace ("Exception occured in");
}

}catch (Exception e) {
// TODO: handle exception

e.printStackTrace();
}
finally {
trace("unloading Engine");
Engine.Unload();
}



}

代码:2 使用项目

import java.io.File;
import java.io.IOException;
import java.sql.BatchUpdateException;

import com.abbyy.FCEngine.*;

public class VerificationStep {
//same as above

public static void main( String[] args )
{
// Load Engine
try {
trace("Loading engine");
IEngineLoader engineLoader= Engine.CreateEngineOutprocLoader();
IEngine engine = engineLoader.Load(serialNumber,dllPath);

try{
IProject project = engine.OpenProject( projectFolder + "\\flexitest.fcproj" );

try {
IBatch batch = null ;
trace( "Creating Batch..." );
IBatches batchs = project.getBatches();
if (batchs == null || batchs.getCount() == 0){
batch = project.getBatches().AddNew("TestBatch");
}
batch = batchs.getElement(0);
assert(batch == null);

try{
trace("opening batch");
batch.Open();

trace( "Adding pdfs..." );
batch.AddImage(projectFolder + "\\don't upload to big .pdf");

trace( "Reconizing pdfs..." );
batch.Recognize(null, RecognitionModeEnum.RM_ReRecognizeAll,null);

trace("Creating Verification object");
try {
IVerificationSession verificationSession = project.StartVerification(null);

try {

//enabling context verification
verificationSession.getOptions().setVerifyFields(true);

//disabling group verification
verificationSession.getOptions().setVerifyBaseSymbols(false);
verificationSession.getOptions().setVerifyExtraSymbols(false);

try {
trace("Get NextWork Set");
IVerificationWorkSet verificationWorkSet = verificationSession.NextWorkSet();

if ( verificationWorkSet == null){
trace("first verificationWork set is null");
}else {

//process each work set in Verification session
trace("Processing Work Set");
while ( verificationWorkSet != null ){

try{
trace("Geting Verification group");
//get next group for verification
IVerificationGroup verificationGroup = verificationWorkSet.NextGroup();

if ( verificationGroup == null ){
trace("First verification group is null");
}else {
trace("processing each group of a workset");
//processing each group of a work set
while ( verificationGroup!= null){
int verificationObjectInAGroupCount = verificationGroup.getCount();
trace("Total number of verification object: " +verificationObjectInAGroupCount);

for ( int iterator = 0; iterator<verificationObjectInAGroupCount; iterator++){
trace ( "getting and Processing "+(iterator +1 ) + " verification object of A group");

//getting verification object
IVerificationObject verificationObject = verificationGroup.getElement(iterator);
if ( verificationObject == null){
trace("verification object is null");
}else {
if ( verificationObject.getType() == VerificationObjectTypeEnum.VOT_Group ) {
IGroupVerificationObject groupVerificationObject = verificationObject.AsGroupVerificationObject();

if ( groupVerificationObject == null){
System.out.println("group verification object is null ");
}


}else if ( verificationObject.getType() == VerificationObjectTypeEnum.VOT_Context) {
IContextVerificationObject contextVerificationObject = verificationObject.AsContextVerificationObject();



if ( contextVerificationObject == null){
trace("ContextVerification object is null");
}else {
IField field = contextVerificationObject.getField();
if ( field == null){
trace("field getting null");
}else {
System.out.println(" field full name: " +field.getFullName() + "\n Name: " +field.getName());

IFieldValue fieldValue = field.getValue();
if ( fieldValue == null){
trace("Field Value is Null");
}else {
trace ( "getting text from field value");
IText text = fieldValue.getAsText() ;
if ( text == null){
trace("text getting null in field value");
}else {

int wordCount = text.getRecognizedWordsCount() ;
trace("recognized word count: "+wordCount);

//getting words from text
for ( int wordIndex = 0 ; wordIndex<wordCount; wordIndex++ ){
trace ("processing word number :" +wordIndex);

IRecognizedWordInfo recognizedWordInfo = engine.CreateRecognizedWordInfo() ;

if ( recognizedWordInfo == null){
trace("Can't create recognizedWordInfo object using engine");
}else {
text.GetRecognizedWord(wordIndex, -1, recognizedWordInfo);

//getting characters from word
for (int characterIndex = 0 ; characterIndex<recognizedWordInfo.getText().length(); characterIndex++ ){
trace("processing character number : " +characterIndex);

IRecognizedCharacterInfo recognizedCharacterInfo = engine.CreateRecognizedCharacterInfo();
if ( recognizedCharacterInfo == null) {
trace("can't create recognizedCharacterInfo object");
}else {
recognizedWordInfo.GetRecognizedCharacter(characterIndex, -1, recognizedCharacterInfo);

System.out.println(" Character: " + recognizedCharacterInfo.getCharacter());
System.out.println(" Confidence level : " +recognizedCharacterInfo.getCharConfidence());
}
}
}
}
}
System.out.println(" Field Value : " +fieldValue.getAsString());
}
}
}
}
}
}

verificationGroup = verificationWorkSet.NextGroup();
}

}

}catch (Exception e){

e.printStackTrace();
}

//get next work set
verificationWorkSet = verificationSession.NextWorkSet();

}
}

}catch (Exception e){

e.printStackTrace();
}


}finally {
verificationSession.Close();
}
}catch (Exception e){

e.printStackTrace();
}


trace ("Getting Documents");
IDocuments documents = batch.getDocuments();

trace ("Getting Fields and printing");
for ( int j = 0 ; j < documents.getCount(); j++){
trace ("Getting documnets:" +(j+1));

IDocument document = documents.getElement(j);
IDocumentDefinition definition = document.getDocumentDefinition();
assert( definition != null );
assert( document.getPages().getCount() == 1 );


trace( "DocumentType: " + document.getDocumentDefinition().getName() );

try {
trace("opening document");
document.Open(true);
IFields fields = document.getSections().Item( 0 ).getChildren();

for( int i = 0; i < fields.getCount(); i++ ) {
IField field = fields.getElement( i );
trace( field.getName() + ": " +
( field.getValue() != null ? field.getValue().getAsString() : "." ) );
}
}finally {
trace("closing document");
document.Close(true);
}
}
}finally {
trace("Closing Batch");
batch.Close();
}

}catch (Exception e){
System.out.println("Exception in creating Batch");
e.printStackTrace();
}
finally {
trace("closing project");

project.Close();
}

}catch (Exception e){
System.out.println("Exception occured while loading project");
e.printStackTrace();
}

}catch (Exception e) {
// TODO: handle exception
System.out.println("Exception occured while loading engine");
e.printStackTrace();
}
finally {
trace("unloading Engine");
Engine.Unload();

}

}




}

最佳答案

最后我得到了我的解决方案,实际上它识别正确,我处理它们的方式不对......

关于java - flexicapture 处理器只识别文档的第一页,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/24034598/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com