From 894f3035fdf3fbd35f155b93142f5972199de4e4 Mon Sep 17 00:00:00 2001 From: Erik Auerswald Date: Sat, 22 Jun 2024 20:04:08 +0200 Subject: [PATCH] fix a special case for decimal point recognition When the widest digit found in the image is a one, it is likely that a decimal separator is nearly as wide as this digit. Thus it cannot be recognized, because the decimal separator needs to be at most half as wide as the widest digit (before this commit). Thus add an additional pass over the digits for this special case. This pass comes after the existing recognition passes for the digit one, decimal separator, and minus sign. In the new pass, the width of the digit is ignored. This addresses GitHub issue #26. --- NEWS | 1 + ssocr.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/NEWS b/NEWS index b29771f..b5041fd 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,7 @@ Noteworthy Changes in ssocr Releases Upcoming Version ?.??.? (????-??-??): ------------------------------------- + * Fix decimal separator recognition when widest digit is a one * Add recognition of lower case variant of character 'h' to full charset * Add recognition of lower case variant of character 'r' to full charset * Documentation improvements diff --git a/ssocr.c b/ssocr.c index ba2339c..638df20 100644 --- a/ssocr.c +++ b/ssocr.c @@ -278,6 +278,7 @@ int main(int argc, char **argv) int dig_w; /* width of digit part of image */ int dig_h; /* height of digit part of image */ int max_dig_h=0, max_dig_w=0; /* maximum height & width of digits found */ + int widest_dig_is_one=0; /* set to one if the widest digit is a one */ Imlib_Color color; /* Imlib2 RGBA color structure */ /* state of search */ int state = (ssocr_foreground == SSOCR_BLACK) ? FIND_DARK : FIND_LIGHT; @@ -1521,6 +1522,56 @@ int main(int argc, char **argv) } } + /* If the widest digit is a one, decimal points may be of the same width, + * and may thus not be detected. Now that minus signs have been selected, + * if the widest digit still is a one (i.e., no minus signs), then decimal + * separators may also be recognized by checking only the height, not the + * width. */ + if(flags & DEBUG_OUTPUT) + fputs("checking for special case of a one as widest character\n",stderr); + /* check if the widest digit is a one */ + for(d=0; d= max_dig_w)) { + widest_dig_is_one = 1; + if(flags & DEBUG_OUTPUT) + fputs(" widest digit is a one -> additional decimal point search\n", + stderr); + break; + } + } + if(!widest_dig_is_one) { + fputs(" widest digit is not a one, skipping extra decimal point search\n", + stderr); + } else { + /* widest digit is a one, thus decimal seperators may have been missed: + * identify a decimal point (or thousands separator) by relative height */ + if(flags & DEBUG_OUTPUT) + fputs("looking for decimal points again\n",stderr); + for(d=0; d dec_h_ratio)) { + digits[d].digit = D_DECIMAL; + if(flags & DEBUG_OUTPUT) + fprintf(stderr, " digit %d is a decimal point\n", d); + } + } + } + /* now the digits are located and they have to be identified */ if(flags & DEBUG_OUTPUT) fputs("starting scanline based recognition for remaining digits\n", stderr);