Skip to content

Commit

Permalink
recognize: more robust polygon handling
Browse files Browse the repository at this point in the history
  • Loading branch information
bertsky committed May 2, 2024
1 parent 4e42262 commit e67ffe7
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 16 deletions.
43 changes: 28 additions & 15 deletions ocrd_tesserocr/recognize.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
import numpy as np
from scipy.sparse.csgraph import minimum_spanning_tree
from shapely.geometry import Polygon, LineString
from shapely.ops import unary_union, nearest_points
from shapely.ops import unary_union, nearest_points, orient
from shapely import set_precision

from tesserocr import (
RIL, PSM, PT, OEM,
Expand Down Expand Up @@ -1444,15 +1445,17 @@ def join_segments(segments):

def join_polygons(polygons, scale=20):
"""construct concave hull (alpha shape) from input polygons by connecting their pairwise nearest points"""
return make_join([Polygon(poly) for poly in polygons], scale=scale).exterior.coords[:-1]
return make_join([make_valid(Polygon(poly)) for poly in polygons], scale=scale).exterior.coords[:-1]

def make_join(polygons, scale=20):
"""construct concave hull (alpha shape) from input polygons by connecting their pairwise nearest points"""
# ensure input polygons are simply typed
polygons = list(itertools.chain.from_iterable([
poly.geoms if poly.geom_type in ['MultiPolygon', 'GeometryCollection']
else [poly]
for poly in polygons]))
# ensure input polygons are simply typed and all oriented equally
polygons = [orient(poly)
for poly in itertools.chain.from_iterable(
[poly.geoms
if poly.geom_type in ['MultiPolygon', 'GeometryCollection']
else [poly]
for poly in polygons])]
npoly = len(polygons)
if npoly == 1:
return polygons[0]
Expand All @@ -1461,7 +1464,7 @@ def make_join(polygons, scale=20):
dists = np.zeros((npoly, npoly), dtype=float)
for i, j in pairs:
dist = polygons[i].distance(polygons[j])
if dist == 0:
if dist < 1e-5:
dist = 1e-5 # if pair merely touches, we still need to get an edge
dists[i, j] = dist
dists[j, i] = dist
Expand All @@ -1475,12 +1478,14 @@ def make_join(polygons, scale=20):
polygons.append(bridgep)
jointp = unary_union(polygons)
assert jointp.geom_type == 'Polygon', jointp.wkt
if jointp.minimum_clearance < 1.0:
# follow-up calculations will necessarily be integer;
# so anticipate rounding here and then ensure validity
jointp = Polygon(np.round(jointp.exterior.coords))
jointp = make_valid(jointp)
return jointp
# follow-up calculations will necessarily be integer;
# so anticipate rounding here and then ensure validity
jointp2 = set_precision(jointp, 1.0)
if jointp2.geom_type != 'Polygon' or not jointp2.is_valid:
jointp2 = Polygon(np.round(jointp.exterior.coords))
jointp2 = make_valid(jointp2)
assert jointp2.geom_type == 'Polygon', jointp2.wkt
return jointp2

def pad_image(image, padding):
# TODO: input padding can create extra edges if not binarized; at least try to smooth
Expand Down Expand Up @@ -1558,11 +1563,19 @@ def make_valid(polygon):
# simplification may not be possible (at all) due to ordering
# in that case, try another starting point
polygon = Polygon(points[-split:]+points[:-split])
for tolerance in range(int(polygon.area)):
# try by simplification
for tolerance in range(int(polygon.area + 1.5)):
if polygon.is_valid:
break
# simplification may require a larger tolerance
polygon = polygon.simplify(tolerance + 1)
# try by enlarging
for tolerance in range(1, int(polygon.area + 2.5)):
if polygon.is_valid:
break
# enlargement may require a larger tolerance
polygon = polygon.buffer(tolerance)
assert polygon.is_valid, polygon.wkt
return polygon

def iterate_level(it, ril, parent=None):
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
ocrd >= 2.53
click
tesserocr >= 2.5.2
shapely >= 1.7.1
shapely >= 2.0
scipy

0 comments on commit e67ffe7

Please sign in to comment.