Ah, I see what you mean. Yes, different font sizes would need different kernels.
My fragment above is a building block to remove the unfilled pixels near the bottom of characters. The kernel can be inverted for unfilled pixels near the top of characters. That leaves a few isolated pixels, which a third pass can remove.
A complete Windows Bat script, that gives perfect results for your sample file, is below. It isn't fast, because of the repeated sub-image search for white pixels. Performance would be greatly improved by dumping w2.tiff to a text file and looping through it, floodfilling w1.tiff for each white-ish pixel in w2.tiff.
If your files have different font sizes, other morphology methods may be better. See
http://www.imagemagick.org/Usage/morphology/ .
Code: Select all
"%IMG%convert" wollte.jpg ^
-fuzz 50%% ^
-fill Black ^
-floodfill 0x0 White ^
-alpha off ^
-threshold 50%% ^
-depth 8 ^
w1.tiff
rem Find unfilled pixels near the bottom of characters.
"%IMG%convert" w1.tiff ^
-morphology Hit-and-Miss "1x8:1,0,1,1,0,0,0,0" ^
w2.tiff
:Loop1
rem Find a white pixel
"%IMG%compare" ^
-metric pae -dissimilarity-threshold 1 ^
w2.tiff ^
-size 1x1 xc:white ^
-subimage-search ^
null: 2>wollteWhite.lis
type wollteWhite.lis
for /f "tokens=2,3,4 delims=()@, " %%a ^
in (wollteWhite.lis) ^
do (
set score=%%a
set foundX=%%b
set foundY=%%c
)
if /I "%score%" gtr "0.1" goto noMore1
set /A imgY=%foundY%-3
"%IMG%convert" w1.tiff -fuzz 25%% -fill Black -draw ^"color %foundX%,%imgY% floodfill^" w1.tiff
"%IMG%convert" w2.tiff -fuzz 25%% -fill Black -draw ^"color %foundX%,%foundY% floodfill^" w2.tiff
goto Loop1
:noMore1
rem Find unfilled pixels near the top of characters.
"%IMG%convert" w1.tiff ^
-threshold 50%% ^
-morphology Hit-and-Miss "1x8:0,0,0,0,1,1,0,1" ^
-threshold 50%% ^
-depth 8 ^
w2.tiff
:Loop2
rem Find a white pixel
"%IMG%compare" ^
-metric pae -dissimilarity-threshold 1 ^
w2.tiff ^
-size 1x1 xc:white ^
-subimage-search ^
null: 2>wollteWhite.lis
type wollteWhite.lis
for /f "tokens=2,3,4 delims=()@, " %%a ^
in (wollteWhite.lis) ^
do (
set score=%%a
set foundX=%%b
set foundY=%%c
)
if /I "%score%" gtr "0.1" goto noMore2
set /A imgY=%foundY%+4
"%IMG%convert" w1.tiff ^
-fuzz 50%% -fill Black -draw ^"color %foundX%,%imgY% floodfill^" ^
-threshold 50%% ^
-depth 8 ^
w1.tiff
"%IMG%convert" w2.tiff ^
-fuzz 50%% -fill Black -draw ^"color %foundX%,%foundY% floodfill^" ^
-threshold 50%% ^
-depth 8 ^
w2.tiff
goto Loop2
:noMore2
rem Eliminate single white pixels
"%IMG%convert" w1.tiff ^
-threshold 50%% ^
-morphology Hit-and-Miss "3x3:-,0,-,0,1,0,-,0,-" ^
-threshold 50%% ^
-depth 8 ^
w2.tiff
:Loop3
rem Find a white pixel
"%IMG%compare" ^
-metric pae -dissimilarity-threshold 1 ^
w2.tiff ^
-size 1x1 xc:white ^
-subimage-search ^
null: 2>wollteWhite.lis
type wollteWhite.lis
for /f "tokens=2,3,4 delims=()@, " %%a ^
in (wollteWhite.lis) ^
do (
set score=%%a
set foundX=%%b
set foundY=%%c
)
if /I "%score%" gtr "0.1" goto noMore3
set /A imgY=%foundY%
"%IMG%convert" w1.tiff ^
-fuzz 50%% -fill Black -draw ^"color %foundX%,%imgY% floodfill^" ^
-threshold 50%% ^
-depth 8 ^
w1.tiff
"%IMG%convert" w2.tiff ^
-fuzz 50%% -fill Black -draw ^"color %foundX%,%foundY% floodfill^" ^
-threshold 50%% ^
-depth 8 ^
w2.tiff
goto Loop3
:noMore3
rem Finished. w1.tiff contains the result.