New Version of Recognition Method
Improved the following points compared to the conventional mrecognize method:
- Improvement of recognition algorithm
- Significant improvement in language processing function through language dictionary
- Improved discrimination performance of voiced and semi-voiced sounds
- Automatic detection of separations caused by recognition of semantic radicals
- Automatic processing of bracket matching
- Possibility to add underline processing (default: no underline processing)
// Paragraph recognition
int mrecognizecluster(unsigned long chartype1,unsigned long chartype2);
Input:
unsigned long chartype1; Character type filter 1
CHAR_SET_ALL, CHAR_SET_ENGLISH, CHAR_SET_NUMBERALL
and combinations of these
unsigned long chartype2; Character type filter 2
0 or CHAR_SET_ASCII specified
Characters not specified in chartype1 are not included in the result
If CHAR_SET_ASCII is specified in chartype2, recognition is further limited to ASCII characters
Recognition rate and speed also increase
Return:
0..................Normal completion
MEMORY_SHORTAGE...Memory shortage
// Adding underline processing
void msetunderlineflag(int flag = 1);
Input:
int flag; 0 or 1 (1 enables underline processing)
Example
#include "ocrdef.h"
#include "ocrco.h"
#include "cjocrstock.h"
#include "cjocrdict98.h"
#include "cjocrpat98.h"
#include "cjocrrec98.h"
#include "cjocrline98.h"
#include "cjocrlang.h"
#include "cjocrblock.h"
#include "errcode.h"
...
....
// Create pattern class
CJocrPattern* pattern = new CJocrPattern;
int ret = pattern->mallocmemory();
if(ret < 0) {
printf("Pattern class memory allocation error");
delete pattern;
exit(1);
}
// Create dictionary class
CJocrDict* pjocrdict = new CJocrDict;
pjocrdict->msetsystemdict("c:\\dic\\feature\\system");
pjocrdict->msetsystemdict("c:\\dic\\feature\\systemfat");
pjocrdict->msetuserdict("c:\\dic\\feature\\user");
ret = pjocrdict->mloaddict();
if(ret < 0) {
printf("Error");
delete pjocrdict;
delete pattern;
exit(1);
}
// Create single character recognition class
CJocrRecognize* precognize = new CJocrRecognize;
precognize->msetpatter(pattern);
precognize->msetdict(pjocrdict);
// Initialize single character recognition class
ret = precognize->mallocmemory();
if(ret < 0) {
printf("Error");
delete precognize;
delete pjocrdict;
delete pattern;
exit(1);
}
// Create line recognition class with language processing
CJocrLang* pjocrlang = new CJocrLang;
pjocrlang->msetpatter(pattern);
pjocrlang->msetrecognize(precognize);
// Create paragraph recognition class
CJocrBlock* pjocrblock = new CJocrBlock;
pjocrblock->msetlang(pjocrlang);
pjocrblock->msetprocess(PREPROCESS_INSIDE); // Exclude primitives touching the frame
// Set document
pjocrblock->msetdocument(mdata,mwidth,mheight);
pjocrblock->msetdpi(400); // Resolution 400dpi
// Set paragraph
// Set block
OCRBlock ocrblock;
ocrblock.baseline.x1 = 100;
ocrblock.baseline.y1 = 100;
ocrblock.baseline.x2 = 300;
ocrblock.baseline.y2 = 200;
ocrblock.thickness = 100;
// Baseline is (100,100)-(300,200)
// Line height is 100
// Background 0
// Set horizontal block
pjocrblock->msetblock(ocrblock,atan2(ocrblock.baseline.y2 - ocrblock.baseline.y1,ocrblock.baseline.x2 - ocrblock.baseline.x1),0,0);
// Create normalized image
ret = pjocrblock->mregularize();
if(ret < 0) {
printf("Error");
delete pjocrblock;
delete pjocrlang;
delete precognize;
delete pjocrdict;
delete pattern;
exit(1);
}
// Recognize paragraph
pjocrblock->msetunderlineflag(1); // Underline processing enabled
ret = pjocrblock->mrecognizecluster(CHAR_SET_ALL,0); // Recognize all character types
if(ret < 0) {
printf("Error");
delete pjocrblock;
delete pjocrlang;
delete precognize;
delete pjocrdict;
delete pattern;
exit(1);
}
// Get recognition results
int resultnum;
mgetresult(resultnum,NULL);
// Allocate buffer for result×sizeof(OCRResult)
OCRResult* pocrresult = malloc(sizeof(OCRResult) * resultnum);
if(pocrresult) {
pjocrblock->mgetresult(resultnum,pocrresult);
}
// pocrresult[i].cand[0].code contains recognition results for each character or
// line separation newline codes 0x0d,0x0a.
....
...
..
delete pjocrblock;
delete pjocrlang;
delete precognize;
delete pjocrdict;
delete pattern;