Classes |
| struct | TESS_CHAR |
| class | TessBaseAPI |
| class | PageIterator |
| class | ResultIterator |
| class | ChoiceIterator |
| class | CubeRecoContext |
| struct | DocQualCallbacks |
| class | TesseractCubeCombiner |
| struct | TesseractStats |
| class | Tesseract |
| class | ImageThresholder |
| class | BoxWord |
| class | CCStruct |
| class | DetLineFit |
| class | DPPoint |
| class | UnicharIdArrayUtils |
| class | AmbigSpec |
| class | UnicharAmbigs |
| class | CCUtilMutex |
| class | CCUtil |
| class | PointerVector |
| struct | ParamsVectors |
| class | ParamUtils |
| class | Param |
| class | IntParam |
| class | BoolParam |
| class | StringParam |
| class | DoubleParam |
| class | TessdataManager |
| class | Classify |
| class | AltList |
| class | BeamSearch |
| class | Bmp8 |
| class | CachedFile |
| class | CharAltList |
| struct | Bigram |
| struct | CharBigram |
| struct | CharBigramTable |
| class | CharBigrams |
| class | CharSamp |
| class | CharSampEnum |
| class | CharSampSet |
| class | CharSet |
| class | CharClassifier |
| class | CharClassifierFactory |
| class | ConCompPt |
| class | ConComp |
| class | ConvNetCharClassifier |
| class | CubeLineObject |
| class | CubeLineSegmenter |
| class | CubeObject |
| class | CubeSearchObject |
| class | CubeTuningParams |
| class | CubeUtils |
| class | FeatureBase |
| class | FeatureBmp |
| class | FeatureChebyshev |
| class | FeatureHybrid |
| class | HybridNeuralNetCharClassifier |
| class | LangModEdge |
| class | LangModel |
| class | SearchColumn |
| class | SearchNode |
| class | SearchNodeHashTable |
| class | SearchObject |
| class | TessLangModEdge |
| class | TessLangModel |
| class | TuningParams |
| class | WordAltList |
| class | WordListLangModel |
| struct | PairSizeInfo |
| struct | FontPairSizeInfo |
| class | WordSizeModel |
| class | WordUnigrams |
| class | CUtil |
| struct | NodeChild |
| class | Dawg |
| struct | DawgInfo |
| class | DawgInfoVector |
| class | SquishedDawg |
| struct | DawgArgs |
| class | Dict |
| class | PermuterState |
| class | Trie |
| class | Image |
| class | InputFileBuffer |
| class | NeuralNet |
| class | Neuron |
| struct | AlignedBlobParams |
| class | AlignedBlob |
| class | GridBase |
| class | IntGrid |
| class | BBGrid |
| class | GridSearch |
| class | TabEventHandler |
| class | ColumnFinder |
| class | ColPartition |
| class | ColPartitionGrid |
| class | ColPartitionSet |
| class | PixelHistogram |
| class | ShiroRekhaSplitter |
| class | ImageFinder |
| class | LineFinder |
| class | StrokeWidth |
| class | TabFind |
| class | ColSegment |
| class | TableFinder |
| class | StructuredTable |
| class | TableRecognizer |
| class | TabConstraint |
| class | TabVector |
| class | Textord |
| class | WorkingPartSet |
| struct | AssociateStats |
| class | AssociateUtils |
| struct | LanguageModelConsistencyInfo |
| struct | LanguageModelDawgInfo |
| struct | LanguageModelNgramInfo |
| struct | ViterbiStateEntry |
| struct | LanguageModelState |
| struct | BestChoiceBundle |
| struct | BestPathByColumn |
| class | LanguageModel |
| struct | _MATCH_ |
| class | BlobMatchTable |
| class | FRAGMENT |
| class | Wordrec |
Typedefs |
| typedef int(Dict::* | DictFunc )(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) |
| typedef double(Dict::* | ProbabilityInContextFunc )(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes) |
typedef TessCallback2< int,
PAGE_RES * > | TruthCallback |
| typedef GenericVector< UNICHAR_ID > | UnicharIdVector |
typedef GenericVector
< AmbigSpec_LIST * > | UnicharAmbigsVector |
| typedef signed int | char_32 |
| typedef basic_string< char_32 > | string_32 |
| typedef GenericVector< NodeChild > | NodeChildVector |
| typedef GenericVector< int > | SuccessorList |
typedef GenericVector
< SuccessorList * > | SuccessorListsVector |
| typedef GenericVector< Dawg * > | DawgVector |
typedef GridSearch
< ColPartition,
ColPartition_CLIST,
ColPartition_C_IT > | ColPartitionGridSearch |
typedef GenericVector
< ColPartitionSet * > | PartSetVector |
typedef TessResultCallback1
< bool, int > | WidthCallback |
typedef BBGrid< ColSegment,
ColSegment_CLIST,
ColSegment_C_IT > | ColSegmentGrid |
typedef GridSearch< ColSegment,
ColSegment_CLIST,
ColSegment_C_IT > | ColSegmentGridSearch |
typedef BBGrid< BLOBNBOX,
BLOBNBOX_CLIST, BLOBNBOX_C_IT > | BlobGrid |
typedef GridSearch< BLOBNBOX,
BLOBNBOX_CLIST, BLOBNBOX_C_IT > | BlobGridSearch |
| typedef unsigned char | LanguageModelFlagsType |
| typedef struct tesseract::_MATCH_ | MATCH |
Enumerations |
| enum | CMD_EVENTS { ACTION_1_CMD_EVENT,
RECOG_WERDS,
RECOG_PSEUDO,
ACTION_2_CMD_EVENT
} |
| enum | ScriptPos { SP_NORMAL,
SP_SUBSCRIPT,
SP_SUPERSCRIPT,
SP_DROPCAP
} |
| enum | Orientation { ORIENTATION_PAGE_UP = 0,
ORIENTATION_PAGE_RIGHT = 1,
ORIENTATION_PAGE_DOWN = 2,
ORIENTATION_PAGE_LEFT = 3
} |
| enum | WritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
WRITING_DIRECTION_TOP_TO_BOTTOM = 2
} |
| enum | TextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2
} |
| enum | PageSegMode {
PSM_OSD_ONLY,
PSM_AUTO_OSD,
PSM_AUTO_ONLY,
PSM_AUTO,
PSM_SINGLE_COLUMN,
PSM_SINGLE_BLOCK_VERT_TEXT,
PSM_SINGLE_BLOCK,
PSM_SINGLE_LINE,
PSM_SINGLE_WORD,
PSM_CIRCLE_WORD,
PSM_SINGLE_CHAR,
PSM_COUNT
} |
| enum | PageIteratorLevel {
RIL_BLOCK,
RIL_PARA,
RIL_TEXTLINE,
RIL_WORD,
RIL_SYMBOL
} |
| enum | OcrEngineMode { OEM_TESSERACT_ONLY,
OEM_CUBE_ONLY,
OEM_TESSERACT_CUBE_COMBINED,
OEM_DEFAULT
} |
| enum | AmbigType {
NOT_AMBIG,
REPLACE_AMBIG,
DEFINITE_AMBIG,
SIMILAR_AMBIG,
CASE_AMBIG,
AMBIG_TYPE_COUNT
} |
| enum | TessdataType {
TESSDATA_LANG_CONFIG,
TESSDATA_UNICHARSET,
TESSDATA_AMBIGS,
TESSDATA_INTTEMP,
TESSDATA_PFFMTABLE,
TESSDATA_NORMPROTO,
TESSDATA_PUNC_DAWG,
TESSDATA_SYSTEM_DAWG,
TESSDATA_NUMBER_DAWG,
TESSDATA_FREQ_DAWG,
TESSDATA_FIXED_LENGTH_DAWGS,
TESSDATA_CUBE_UNICHARSET,
TESSDATA_CUBE_SYSTEM_DAWG,
TESSDATA_NUM_ENTRIES
} |
| enum | CharSegmentationType { CST_FRAGMENT,
CST_WHOLE,
CST_IMPROPER,
CST_NGRAM
} |
| enum | DawgType {
DAWG_TYPE_PUNCTUATION,
DAWG_TYPE_WORD,
DAWG_TYPE_NUMBER,
DAWG_TYPE_PATTERN,
DAWG_TYPE_COUNT
} |
| enum | ColumnSpanningType {
CST_NOISE,
CST_FLOWING,
CST_HEADING,
CST_PULLOUT,
CST_COUNT
} |
| enum | ColSegType {
COL_UNKNOWN,
COL_TEXT,
COL_TABLE,
COL_MIXED,
COL_COUNT
} |
| enum | TabAlignment {
TA_LEFT_ALIGNED,
TA_LEFT_RAGGED,
TA_CENTER_JUSTIFIED,
TA_RIGHT_ALIGNED,
TA_RIGHT_RAGGED,
TA_SEPARATOR,
TA_COUNT
} |
Functions |
| int | CubeAPITest (Boxa *boxa_blocks, Pixa *pixa_blocks, Boxa *boxa_words, Pixa *pixa_words, const FCOORD &reskew, Pix *page_pix, PAGE_RES *page_res) |
| TBLOB * | make_tesseract_blob (float baseline, float xheight, float descender, float ascender, bool numeric_mode, Pix *pix) |
| TBOX | char_box_to_tbox (Box *char_box, TBOX word_box, int x_offset) |
| bool | read_t (PAGE_RES_IT *page_res_it, TBOX *tbox) |
| bool | read_b (int applybox_page, int *line_number, FILE *box_file, char *label, TBOX *bbox) |
| ICOORD | ComputeEndFromGradient (const ICOORD &start, double m) |
| void | OtsuThreshold (const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height, int **thresholds, int **hi_values) |
| void | HistogramRect (const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height, int *histogram) |
| int | OtsuStats (const int *histogram, int *H_out, int *omega0_out) |
| | ELISTIZE (AmbigSpec) |
| | ELISTIZEH (AmbigSpec) |
| template<typename T > |
| bool | cmp_eq (T const &t1, T const &t2) |
| template<typename T > |
| int | sort_cmp (const void *t1, const void *t2) |
| template<typename T > |
| int | sort_ptr_cmp (const void *t1, const void *t2) |
| void | ClearCharNormArray (INT_TEMPLATES Templates, CLASS_NORMALIZATION_ARRAY CharNormArray) |
| void | ClearFeatureSpaceWindow (NORM_METHOD norm_method, ScrollView *window) |
| WERD_CHOICE * | get_best_delete_other (WERD_CHOICE *choice1, WERD_CHOICE *choice2) |
| BLOB_CHOICE * | get_nth_choice (BLOB_CHOICE_LIST *blob_list, int n) |
| UNICHAR_ID | get_top_choice_uid (BLOB_CHOICE_LIST *blob_list) |
| int | find_choice_by_uid (BLOB_CHOICE_LIST *blob_list, UNICHAR_ID target_uid) |
| WERD_CHOICE * | get_choice_from_posstr (const BLOB_CHOICE_LIST_VECTOR &char_choices, int start_pos, const char *pos_str, float *certainties) |
| void | get_posstr_from_choice (const BLOB_CHOICE_LIST_VECTOR &char_choices, WERD_CHOICE *word_choice, int start_pos, char *pos_str) |
| BLOB_CHOICE * | find_choice_by_type (BLOB_CHOICE_LIST *blob_choices, char target_type, const UNICHARSET &unicharset) |
| BLOB_CHOICE * | find_choice_by_script (BLOB_CHOICE_LIST *blob_choices, int target_sid, int backup_sid, int secondary_sid) |
| Pix * | GridReducedPix (const TBOX &box, int gridsize, ICOORD bleft, int *left, int *bottom) |
| Pix * | TraceOutlineOnReducedPix (C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, int *bottom) |
| Pix * | TraceBlockOnReducedPix (BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom) |
| template<class BBC > |
| int | SortByBoxLeft (const void *void1, const void *void2) |
| template<class BBC > |
| int | SortByBoxBottom (const void *void1, const void *void2) |
| template<typename T > |
| void | DeleteObject (T *object) |
| | ELISTIZE (ViterbiStateEntry) |
| | ELISTIZEH (ViterbiStateEntry) |
Variables |
| const int | kMinRectSize = 10 |
| const char | kTesseractReject = '~' |
| const char | kUNLVReject = '~' |
| const char | kUNLVSuspect = '^' |
| const char * | kInputFile = "noname.tif" |
| const char * | kOldVarsFile = "failed_vars.txt" |
| const int | kMaxIntSize = 22 |
| const int | kNumbersPerBlob = 5 |
| const int | kBytesPerNumber = 5 |
| const int | kBytesPerBlob = kNumbersPerBlob * (kBytesPerNumber + 1) + 1 |
| const int | kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1 |
| const int | kBytesPer64BitNumber = 20 |
| const int | kMaxBytesPerLine |
| const int | kUniChs [] |
| const int | kLatinChs [] |
| const int | kMaxCharTopRange = 48 |
| const int | kMinCredibleResolution = 70 |
| | Minimum believable resolution.
|
| const int | kDefaultResolution = 300 |
| | Default resolution used if input in not believable.
|
| const int | kMaxCircleErosions = 8 |
| const inT16 | kMaxBoxEdgeDiff = 2 |
| const int | kBoxClipTolerance = 2 |
| const int | kMinSubscriptOffset = 20 |
| const int | kMinSuperscriptOffset = 20 |
| const int | kMaxDropCapBottom = -128 |
| const int | kNumEndPoints = 3 |
| const int | kHistogramSize = 256 |
| CCUtilMutex | tprintfMutex |
| const int | kStateCnt = 4 |
| const int | kNumLiteralCnt = 5 |
| const int | case_state_table [6][4] |
| const double | kAlignedFraction = 0.03125 |
| const double | kRaggedFraction = 0.5 |
| const double | kAlignedGapFraction = 0.75 |
| const double | kRaggedGapFraction = 3.0 |
| const int | kVLineAlignment = 3 |
| const int | kVLineGutter = 1 |
| const int | kVLineSearchSize = 150 |
| const int | kMinRaggedTabs = 5 |
| const int | kMinAlignedTabs = 4 |
| const int | kVLineMinLength = 500 |
| const double | kMinTabGradient = 4.0 |
| const int | kMaxSkewFactor = 15 |
| const char * | kTextordDebugPix = "psdebug_pix" |
| const int | kMinColumnWidth = 100 |
| const int | kMaxIncompatibleColumnCount = 2 |
| const double | kMarginOverlapFraction = 0.25 |
| const double | kHorizontalGapMergeFraction = 0.5 |
| const double | kMinNonNoiseFraction = 0.5 |
| const double | kMinGutterWidthGrid = 0.5 |
| const int | kSmallBlobSearchRadius = 2 |
| bool | textord_tabfind_show_initial_partitions = false |
| int | textord_tabfind_show_partitions = 0 |
| bool | textord_tabfind_show_columns = false |
| bool | textord_tabfind_show_blocks = false |
| bool | textord_tabfind_find_tables = false |
| const int | kMaxPartnerDepth = 4 |
| const double | kMaxSpacingDrift = 1.0 / 72 |
| const double | kMaxTopSpacingFraction = 0.25 |
| const double | kMaxSizeRatio = 1.5 |
| const double | kMaxLeaderGapFractionOfMax = 0.25 |
| const double | kMaxLeaderGapFractionOfMin = 0.5 |
| const int | kMinLeaderCount = 5 |
| const int | kLeaderCutCost = 8 |
| const int | kRGBRMSColors = 4 |
| bool | textord_tabfind_show_color_fit = false |
| const int | kMaxCaptionLines = 7 |
| const double | kMinCaptionGapRatio = 2.0 |
| const double | kMinCaptionGapHeightRatio = 0.5 |
| const double | kTinyEnoughTextlineOverlapFraction = 0.25 |
| const double | kMaxPartitionSpacing = 1.75 |
| const double | kMinRectangularFraction = 0.125 |
| const double | kMaxRectangularFraction = 0.75 |
| const double | kMaxRectangularGradient = 0.1 |
| const int | kMinImageFindSize = 100 |
| const int | kThinLineFraction = 30 |
| | Denominator of resolution makes max pixel width to allow thin lines.
|
| const int | kMinLineLengthFraction = 8 |
| | Denominator of resolution makes min pixels to demand line lengths to be.
|
| const int | kCrackSpacing = 100 |
| | Spacing of cracks across the page to break up tall vertical lines.
|
| const int | kLineFindGridSize = 50 |
| | Grid size used by line finder. Not very critical.
|
| int | textord_tabfind_show_strokewidths = 0 |
| bool | textord_tabfind_only_strokewidths = false |
| double | textord_strokewidth_minsize = 0.25 |
| double | textord_strokewidth_maxsize = 4.0 |
| bool | textord_tabfind_vertical_text = true |
| bool | textord_tabfind_force_vertical_text = false |
| bool | textord_tabfind_vertical_horizontal_mix = true |
| double | textord_tabfind_vertical_text_ratio = 0.5 |
| const double | kStrokeWidthFractionTolerance = 0.125 |
| const double | kStrokeWidthTolerance = 1.5 |
| const double | kStrokeWidthFractionCJK = 0.25 |
| const double | kStrokeWidthCJK = 2.0 |
| const int | kCJKRadius = 2 |
| const double | kCJKBrokenDistanceFraction = 0.25 |
| const int | kCJKMaxComponents = 8 |
| const double | kCJKAspectRatio = 1.25 |
| const double | kCJKAspectRatioIncrease = 1.0625 |
| const int | kMaxCJKSizeRatio = 5 |
| const int | kMinDiacriticSizeRatio = 2 |
| const int | kSearchRadius = 2 |
| const int | kLineTrapLongest = 4 |
| const int | kLineTrapShortest = 2 |
| const int | kMostlyOneDirRatio = 3 |
| const double | kMaxSmallNeighboursPerPix = 3.0 / 128 |
| const float | kSizeRatioToReject = 2.0 |
| const double | kMaxTextSize = 2.0 |
| const int | kTabRadiusFactor = 5 |
| const int | kMinVerticalSearch = 3 |
| const int | kMaxVerticalSearch = 12 |
| const int | kMaxRaggedSearch = 25 |
| const int | kMinLinesInColumn = 10 |
| const double | kMinFractionalLinesInColumn = 0.125 |
| const double | kMinGutterWidthAbsolute = 0.02 |
| const double | kMaxGutterWidthAbsolute = 2.00 |
| const double | kLineFragmentAspectRatio = 10.0 |
| const double | kSmoothFactor = 0.25 |
| const double | kMinBaselineCoverage = 0.5 |
| const double | kCharVerticalOverlapFraction = 0.375 |
| const double | kMaxHorizontalGap = 3.0 |
| const double | kMaxBaselineError = 0.4375 |
| const int | kMinEvaluatedTabs = 3 |
| const int | kMaxTextLineBlobRatio = 5 |
| const int | kMinTextLineBlobRatio = 3 |
| const double | kMinImageArea = 0.5 |
| const double | kCosMaxSkewAngle = 0.866025 |
| bool | textord_tabfind_show_initialtabs = false |
| bool | textord_tabfind_show_finaltabs = false |
| double | textord_tabfind_aligned_gap_fraction = 0.75 |
| const int | kColumnWidthFactor = 20 |
| const int | kMaxVerticalSpacing = 500 |
| const int | kMaxBlobWidth = 500 |
| const double | kSplitPartitionSize = 2.0 |
| const double | kAllowTextHeight = 0.5 |
| const double | kAllowTextWidth = 0.6 |
| const double | kAllowTextArea = 0.8 |
| const double | kAllowBlobHeight = 0.3 |
| const double | kAllowBlobWidth = 0.4 |
| const double | kAllowBlobArea = 0.05 |
| const int | kMinBoxesInTextPartition = 10 |
| const int | kMaxBoxesInDataPartition = 20 |
| const double | kMaxGapInTextPartition = 4.0 |
| const double | kMinMaxGapInTextPartition = 0.5 |
| const double | kMaxBlobOverlapFactor = 4.0 |
| const double | kMaxTableCellXheight = 2.0 |
| const int | kMaxColumnHeaderDistance = 4 |
| const double | kTableColumnThreshold = 3.0 |
| const int | kRulingVerticalMargin = 3 |
| const double | kMinOverlapWithTable = 0.6 |
| const int | kSideSpaceMargin = 10 |
| const double | kSmallTableProjectionThreshold = 0.35 |
| const double | kLargeTableProjectionThreshold = 0.45 |
| const int | kLargeTableRowCount = 6 |
| const int | kMinRowsInTable = 3 |
| const double | kRequiredFullJustifiedSpacing = 4.0 |
| const int | kAdjacentLeaderSearchPadding = 2 |
| const double | kParagraphEndingPreviousLineRatio = 1.3 |
| const double | kMaxParagraphEndingLeftSpaceMultiple = 3.0 |
| const double | kMinParagraphEndingTextToWhitespaceRatio = 3.0 |
| const double | kMaxXProjectionGapFactor = 2.0 |
| const double | kStrokeWidthFractionalTolerance = 0.25 |
| const double | kStrokeWidthConstantTolerance = 2.0 |
| bool | textord_dump_table_images = false |
| bool | textord_show_tables = false |
| bool | textord_tablefind_show_mark = false |
| bool | textord_tablefind_show_stats = false |
| bool | textord_tablefind_recognize_tables = false |
| const double | kHorizontalSpacing = 0.30 |
| const double | kVerticalSpacing = -0.2 |
| const int | kCellSplitRowThreshold = 0 |
| const int | kCellSplitColumnThreshold = 0 |
| const int | kLinedTableMinVerticalLines = 3 |
| const int | kLinedTableMinHorizontalLines = 3 |
| const double | kRequiredColumns = 0.7 |
| const double | kMarginFactor = 1.1 |
| const double | kMaxRowSize = 2.5 |
| const double | kGoodRowNumberOfColumnsSmall [] = { 2, 2, 2, 2, 2, 3, 3 } |
| const int | kGoodRowNumberOfColumnsSmallSize |
| const double | kGoodRowNumberOfColumnsLarge = 0.7 |
| const double | kMinFilledArea = 0.35 |
| const int | kGutterMultiple = 4 |
| const int | kGutterToNeighbourRatio = 3 |
| const int | kSimilarVectorDist = 10 |
| const int | kSimilarRaggedDist = 50 |
| const int | kMaxFillinMultiple = 11 |
| const double | kMinGutterFraction = 0.5 |
| const double | kLineCountReciprocal = 4.0 |
| const double | kMinAlignedGutter = 0.25 |
| const double | kMinRaggedGutter = 2.0 |
| double | textord_tabvector_vertical_gap_fraction = 0.5 |
| double | textord_tabvector_vertical_box_ratio = 0.5 |
| const char * | kAlignmentNames [] |
recog_pseudo_word
Make a word from the selected blobs and run Tess on them.
- Parameters:
-
| page_res | recognise blobs |
| selection_box | within this box |
fp_eval_word_spacing() Evaluation function for fixed pitch word lists.
Basically, count the number of "nice" characters - those which are in tess acceptable words or in dict words and are not rejected. Penalise any potential noise chars
process_selected_words()
Walk the current block list applying the specified word processor function to each word that overlaps the selection_box.
build_menu()
Construct the menu tree used by the command window
process_cmd_win_event()
Process a command returned from the command window (Just call the appropriate command handler)
word_blank_and_set_display() Word processor
Blank display of word then redisplay word according to current display mode settings
---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------
---------------------------------------------------------------------------- Include Files and Type Defines ---------------------------------------------------------------------------- ---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------