diff --git a/NEWS b/NEWS index dcae67a..198a4d5 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,7 @@ Upcoming Version ?.??.? (????-??-??): ------------------------------------- * New option -N, --min-segment to specify the minimum width and height of a segment, for both scanline based and ratio based character recognition + * New option -M, --min-char-dims to specify minimum character dimensions Version 2.22.2 (2023-04-24): ---------------------------- diff --git a/defines.h b/defines.h index 03500c9..8cfa094 100644 --- a/defines.h +++ b/defines.h @@ -122,6 +122,12 @@ * used for decimal separators */ #define MIN_SEGMENT 1 +/* minimum width of a character respectively digit */ +#define MIN_CHAR_W 1 + +/* minimum height of a character respectively digit */ +#define MIN_CHAR_H 1 + /* ignore # of pixels when checking a column fo black or white */ #define IGNORE_PIXELS 0 diff --git a/help.c b/help.c index 34f1378..405a851 100644 --- a/help.c +++ b/help.c @@ -123,6 +123,7 @@ void usage(char *name, FILE *f) fprintf(f, " -N, --min-segment=SIZE minimum width and height of a segment\n"); fprintf(f, " -i, --ignore-pixels=# number of pixels ignored when searching digit\n"); fprintf(f, " boundaries\n"); + fprintf(f, " -M, --min-char-dims=WxH minimum width and height of a character/digit\n"); fprintf(f, " -d, --number-digits=# number of digits in image (-1 for auto)\n"); fprintf(f, " -r, --one-ratio=# height/width ratio to recognize a \'one\'\n"); fprintf(f, " -m, --minus-ratio=# width/height ratio to recognize a minus sign\n"); @@ -185,23 +186,25 @@ void usage(char *name, FILE *f) fprintf(f, " pixels set (including checked position)\n"); fprintf(f, " keep_pixels_filter MASK keeps pixels that have at least MASK neighbor\n"); fprintf(f, " pixels set (not counting the checked pixel)\n"); - fprintf(f, "\nDefaults: needed pixels = %2d\n", NEED_PIXELS); - fprintf(f, " minimum segment size = %2d\n", MIN_SEGMENT); - fprintf(f, " ignored pixels = %2d\n", IGNORE_PIXELS); - fprintf(f, " no. of digits = %2d\n", NUMBER_OF_DIGITS); - fprintf(f, " threshold = %5.2f\n", THRESHOLD); - fprintf(f, " foreground = %s\n", + fprintf(f, "\nDefaults: needed pixels = %2d\n", NEED_PIXELS); + fprintf(f, " minimum segment size = %2d\n", MIN_SEGMENT); + fprintf(f, " minimum character width = %2d\n", MIN_CHAR_W); + fprintf(f, " minimum character height = %2d\n", MIN_CHAR_H); + fprintf(f, " ignored pixels = %2d\n", IGNORE_PIXELS); + fprintf(f, " no. of digits = %2d\n", NUMBER_OF_DIGITS); + fprintf(f, " threshold = %5.2f\n", THRESHOLD); + fprintf(f, " foreground = %s\n", (SSOCR_DEFAULT_FOREGROUND == SSOCR_BLACK) ? "black" : "white"); - fprintf(f, " background = %s\n", + fprintf(f, " background = %s\n", (SSOCR_DEFAULT_BACKGROUND == SSOCR_BLACK) ? "black" : "white"); - fprintf(f, " luminance = "); + fprintf(f, " luminance = "); print_lum_key(DEFAULT_LUM_FORMULA, f); fprintf(f, "\n"); fprintf(f, " height/width threshold for digit one = %2d\n", ONE_RATIO); fprintf(f, " width/height threshold for minus sign = %2d\n", MINUS_RATIO); fprintf(f, " max_dig_h/h threshold for decimal sep = %2d\n", DEC_H_RATIO); fprintf(f, " max_dig_w/w threshold for decimal sep = %2d\n", DEC_W_RATIO); - fprintf(f, " space width factor = %.2f\n", SPC_FAC); - fprintf(f, " character set = "); + fprintf(f, " space width factor = %.2f\n", SPC_FAC); + fprintf(f, " character set = "); print_cs_key(DEFAULT_CHARSET, f); fputs("\n", f); fprintf(f, "\nOperation: The IMAGE is read, the COMMANDs are processed in the sequence\n"); fprintf(f, " they are given, in the resulting image the given number of digits\n"); diff --git a/ssocr.1.in b/ssocr.1.in index f1d74d6..d2365d7 100644 --- a/ssocr.1.in +++ b/ssocr.1.in @@ -75,6 +75,18 @@ Can be used to ignore some noise in the picture. See the web page of .BR ssocr (1) for a description of the algorithm. +.SS \-M, \-\-min\-char\-dims WIDTHxHEIGHT +Specify the minimum dimensions of characters respectively digits. +When the segmentation step finds potential digits, +those with a width less than +.B WIDTH +or a height less than +.B HEIGHT +are ignored. +Can be used to ignore some noise in the picture. +See the web page of +.BR ssocr (1) +for a description of the algorithm. .SS \-d, \-\-number\-digits NUMBER Specifies the number of digits shown in the image. Default value is .IR 6 . diff --git a/ssocr.c b/ssocr.c index 8877f58..b028e19 100644 --- a/ssocr.c +++ b/ssocr.c @@ -27,7 +27,7 @@ #include /* exit */ /* string manipulation */ -#include /* memcpy, strdup, strlen */ +#include /* memcpy, strchr, strdup, strlen */ /* option parsing */ #include /* getopt */ @@ -146,6 +146,52 @@ static void print_spaces(FILE *f, int n) } } +/* parse dimensions given as a string in the format "WxH" */ +static int parse_width_height(const char *s, dimensions_struct *d) +{ + size_t l; + const char *width_string; + char *height_string; + int w, h; + if (!s || !d) { + fputs(PROG ": error: parse_width_height() called with NULL pointer\n", + stderr); + return 1; + } + l = strlen(s); + if (l == 0) { + fputs(PROG ": error: parse_width_height() called with empty string\n", + stderr); + return 1; + } + width_string = s; + height_string = strchr(s, 'x'); + if (!height_string) { + fputs(PROG ": error: no 'x' in dimension specification\n", stderr); + return 1; + } + if (width_string == height_string) { + fputs(PROG ": error: width missing from dimension specification\n", stderr); + return 1; + } + height_string++; + if (strlen(height_string) == 0) { + fputs(PROG ": error: height missing from dimension specification\n",stderr); + return 1; + } + w = atoi(width_string); + h = atoi(height_string); + if (w < 1 || h < 1) { + fprintf(stderr, + PROG ": warning: ignoring mininmum character dimensions %dx%d\n", + w, h); + return 1; + } + d->w = w; + d->h = h; + return 0; +} + /*** main() ***/ int main(int argc, char **argv) @@ -161,6 +207,7 @@ int main(int argc, char **argv) int unknown_digit=0; /* was one of the 6 found digits an unknown one? */ int need_pixels = NEED_PIXELS; /* pixels needed to set segment in scanline */ int min_segment = MIN_SEGMENT; /* minimum pixels needed for a segment */ + dimensions_struct min_char_dims; /* minimum character dimensions (W x H) */ int number_of_digits = NUMBER_OF_DIGITS; /* look for this many digits */ int potential_digits; /* number of potential digits after segmentation */ int ignore_pixels = IGNORE_PIXELS; /* pixels to ignore when checking column */ @@ -192,6 +239,10 @@ int main(int argc, char **argv) int found_pixels=0; /* how many pixels are already found */ color_struct d_color = {0, 0, 0, 0}; /* drawing color */ + /* initialize minimum character dimensions structure */ + min_char_dims.w = MIN_CHAR_W; + min_char_dims.h = MIN_CHAR_H; + /* if we provided no arguments to the program exit */ if (argc < 2) { usage(PROG, stderr); @@ -211,6 +262,7 @@ int main(int argc, char **argv) {"iter-threshold", 0, 0, 'T'}, /* use treshold value as provided */ {"number-pixels", 1, 0, 'n'}, /* pixels needed to regard segment as set */ {"min-segment", 1, 0, 'N'}, /* minimum pixels needed for a segment */ + {"min-char-dims", 1, 0, 'M'}, /* minimum character (digit) dimensions */ {"ignore-pixels", 1, 0, 'i'}, /* pixels ignored when searching digits */ {"number-digits", 1, 0, 'd'}, /* number of digits in image */ {"one-ratio", 1, 0, 'r'}, /* height/width threshold to recognize a one */ @@ -237,7 +289,7 @@ int main(int argc, char **argv) {0, 0, 0, 0} /* terminate long options */ }; c = getopt_long (argc, argv, - "hVt:vaTn:N:i:d:r:m:o:O:D::pPf:b:Igl:SXCc:H:W:sA:G", + "hVt:vaTn:N:i:d:r:m:M:o:O:D::pPf:b:Igl:SXCc:H:W:sA:G", long_options, &option_index); if (c == -1) break; /* leaves while (1) loop */ switch (c) { @@ -305,6 +357,19 @@ int main(int argc, char **argv) } } break; + case 'M': + if(optarg) { + int ret; + ret = parse_width_height(optarg, &min_char_dims); + if (ret) { + fprintf(stderr, "warning: ignoring --min-char-dims=%s\n", optarg); + } + if(flags & DEBUG_OUTPUT) { + fprintf(stderr, "min_char_dims = %dx%d\n", min_char_dims.w, + min_char_dims.h); + } + } + break; case 'i': if(optarg) { ignore_pixels = atoi(optarg); @@ -516,6 +581,7 @@ int main(int argc, char **argv) fprintf(stderr, "flags & SPC_USE_AVG_DST=%d\n", flags & SPC_USE_AVG_DST); fprintf(stderr, "need_pixels = %d\n", need_pixels); fprintf(stderr, "min_segment = %d\n", min_segment); + fprintf(stderr, "min_char_dims = %dx%d\n",min_char_dims.w,min_char_dims.h); fprintf(stderr, "ignore_pixels = %d\n", ignore_pixels); fprintf(stderr, "number_of_digits = %d\n", number_of_digits); fprintf(stderr, "foreground = %d (%s)\n", ssocr_foreground, @@ -1152,6 +1218,56 @@ int main(int argc, char **argv) } } } + if (flags & DEBUG_OUTPUT) { + fprintf(stderr, "image segmentation found %d potential digits\n", + potential_digits); + } + + /* image has been segmented into potential digits, ignore too small ones */ + if (min_char_dims.w > 1 || min_char_dims.h > 1) { + int digit_count = 0, pos; + digit_struct *tmp; + if (flags & DEBUG_OUTPUT) { + fputs("dropping too small potential digits\n", stderr); + } + /* count sufficiently large digits */ + for (d = 0; d < potential_digits; d++) { + if (digits[d].x2 - digits[d].x1 >= min_char_dims.w && + digits[d].y2 - digits[d].y1 >= min_char_dims.h) { + if (flags & DEBUG_OUTPUT) { + fprintf(stderr, " keeping sufficiently large digit %d\n", d); + } + digit_count += 1; + } else if (flags & DEBUG_OUTPUT) { + fprintf(stderr, " dropping too small potential digit %d\n", d); + } + } + if (flags & DEBUG_OUTPUT) { + fprintf(stderr, "keeping %d of %d potential digits\n", digit_count, + potential_digits); + } + /* allocate memory for sufficiently large digits */ + if(!(tmp = calloc(digit_count, sizeof(digit_struct)))) { + perror(PROG ": tmp = calloc()"); + exit(99); + } + /* keep only sufficiently large digits */ + pos = 0; + for (d = 0; d < potential_digits; d++) { + if (digits[d].x2 - digits[d].x1 >= min_char_dims.w && + digits[d].y2 - digits[d].y1 >= min_char_dims.h) { + if (pos >= digit_count) { + fputs(PROG ": error copying digit information", stderr); + exit(99); + } + memcpy(tmp + pos, digits + d, sizeof(digit_struct)); + pos++; + } + } + free(digits); + digits = tmp; + potential_digits = digit_count; + } /* check if expected number of digits have been found */ if ((number_of_digits > -1) && (number_of_digits != potential_digits)) { @@ -1166,13 +1282,13 @@ int main(int argc, char **argv) exit(1); } - /* accept all potential characters / digits */ + /* continue to work with the accepted number of characters / digits */ number_of_digits = potential_digits; if (flags & DEBUG_OUTPUT) { fprintf(stderr, "image segmentation found %d digits\n", number_of_digits); } - /* draw rectangles around digits */ + /* draw rectangles around accepted digits */ if(flags & USE_DEBUG_IMAGE) { imlib_context_set_image(debug_image); imlib_context_set_color(128,128,128,255); /* gray line */ @@ -1204,7 +1320,7 @@ int main(int argc, char **argv) /* debug: write digit info to stderr */ if(flags & DEBUG_OUTPUT) { - fprintf(stderr, "found %d digits\n", d); + fprintf(stderr, "found %d digits\n", number_of_digits); for(d=0; d (%d,%d), width: %d (%5.2f%%) " "height: %d (%5.2f%%)\n", diff --git a/ssocr.h b/ssocr.h index c84e0b4..19a9b48 100644 --- a/ssocr.h +++ b/ssocr.h @@ -27,4 +27,8 @@ typedef struct { int R, G, B, A; } color_struct; +typedef struct { + int w, h; +} dimensions_struct; + #endif /* SSOCR2_H */