Setting Paragraph, Recognizing Paragraphs, and Getting Recognition Results
Execute paragraph recognition using an instance of the paragraph recognition class. After setting the document image and paragraph, create a normalized image and then perform recognition.
// Perform paragraph recognition
int mrecognize(unsigned long chartype1, unsigned long chartype2);
// When recognizing forcibly as one character per paragraph
int mrecognize1char(unsigned long chartype1, unsigned long chartype2);
// Input:
unsigned long chartype1; // Character type filter 1
// CHAR_SET_ALL, CHAR_SET_ENGLISH, CHAR_SET_NUMBERALL, and combinations of others
unsigned long chartype2; // Character type filter 2
// Specify 0 or CHAR_SET_ASCII
// Characters other than those specified by chartype1 will not be included in the result
// By specifying CHAR_SET_ASCII with chartype2, only ASCII characters will be recognized.
// Recognition rate and speed will also increase
// Return value:
// 0.................Normal termination
// MEMORY_SHORTAGE...Insufficient memory
// Get recognition results
int mgetresult(int& resultnum, OCRResult* pocrresult);
// Input:
// int& resultnum; Size of the OCRResult structure array
// OCRResult* pocrresult; OCRResult structure array
// Output:
// int& resultnum; Number of elements in the result array
// OCRResult* pocrresult; OCRResult structure array
// Return value:
// 0...Normal termination
// BUFFER_OVERFLOW.....Buffer overflow
// In this case, the actual result is not included, but the number of elements required for the structure array will be returned in resultnum.
// If you specify NULL for pocrresult, you can simply get the number of results.
// Even if BUFFER_OVERFLOW occurs, there will be no side effects, so you can call it repeatedly as many times as you like.
Example
#include "ocrdef.h"
#include "ocrco.h"
#include "cjocrstock.h"
#include "cjocrdict98.h"
#include "cjocrpat98.h"
#include "cjocrrec98.h"
#include "cjocrline98.h"
#include "cjocrlang.h"
#include "cjocrblock.h"
#include "errcode.h"
...
....
// Create pattern class
CJocrPattern* pattern = new CJocrPattern;
int ret = pattern->mallocmemory();
if (ret < 0) {
printf("Pattern class memory allocation error");
delete pattern;
exit(1);
}
// Create dictionary class
CJocrDict* pjocrdict = new CJocrDict;
pjocrdict->msetsystemdict("c:\\dic\\feature\\system");
pjocrdict->msetsystemdict("c:\\dic\\feature\\systemfat");
pjocrdict->msetuserdict("c:\\dic\\feature\\user");
ret = pjocrdict->mloaddict();
if (ret < 0) {
printf("Error");
delete pjocrdict;
delete pattern;
exit(1);
}
// Create single character recognition class
CJocrRecognize* precognize = new CJocrRecognize;
precognize->msetpatter(pattern);
precognize->msetdict(pjocrdict);
// Initialize single character recognition class
ret = precognize->mallocmemory();
if (ret < 0) {
printf("Error");
delete precognize;
delete pjocrdict;
delete pattern;
exit(1);
}
// Create line recognition class with language processing
CJocrLang* pjocrlang = new CJocrLang;
pjocrlang->msetpatter(pattern);
pjocrlang->msetrecognize(precognize);
// Create paragraph recognition class
CJocrBlock* pjocrblock = new CJocrBlock;
pjocrblock->msetlang(pjocrlang);
pjocrblock->msetprocess(PREPROCESS_INSIDE); // Exclude primitives that touch the frame
// Set document
pjocrblock->msetdocument(mdata, mwidth, mheight);
pjocrblock->msetdpi(400); // Resolution 400dpi
// Set paragraph
// Set block
OCRBlock ocrblock;
ocrblock.baseline.x1 = 100;
ocrblock.baseline.y1 = 100;
ocrblock.baseline.x2 = 300;
ocrblock.baseline.y2 = 200;
ocrblock.thickness = 100;
// Baseline is (100,100)-(300,200)
// Line height is 100
// Background 0
// Set horizontal block
pjocrblock->msetblock(ocrblock, atan2(ocrblock.baseline.y2 - ocrblock.baseline.y1, ocrblock.baseline.x2 - ocrblock.baseline.x1), 0, 0);
// Create normalized image
ret = pjocrblock->mregularize();
if (ret < 0) {
printf("Error");
delete pjocrblock;
delete pjocrlang;
delete precognize;
delete pjocrdict;
delete pattern;
exit(1);
}
// Recognize paragraph
ret = pjocrblock->mrecognize(CHAR_SET_ALL, 0); // Recognize all character types
if (ret < 0) {
printf("Error");
delete pjocrblock;
delete pjocrlang;
delete precognize;
delete pjocrdict;
delete pattern;
exit(1);
}
// Get recognition result
int resultnum;
mgetresult(resultnum, NULL);
// Allocate a buffer of size result × sizeof(OCRResult)
OCRResult* pocrresult = malloc(sizeof(OCRResult) * resultnum);
if (pocrresult) {
pjocrblock->mgetresult(resultnum, pocrresult);
}
// pocrresult[i].cand[0].code contains the recognition result of each character or
// line break code 0x0d, 0x0a as line separator.
....
...
..
delete pjocrblock;
delete pjocrlang;
delete precognize;
delete pjocrdict;
delete pattern;