-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtable-coords.py
65 lines (49 loc) · 2.15 KB
/
table-coords.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import json
import os
import sys
import cv2
if len(sys.argv) < 2:
sys.exit('Usage: {0} Image file name'.format(sys.argv[0]))
path = sys.argv[1]
shortname, extension = os.path.splitext(path)
extension = extension.lstrip('.')
if not os.path.exists(path):
sys.exit('ERROR: File {0} was not found'.format(path))
# List of cells that have been found
cells = []
img = cv2.imread(path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, img_w, img_h = img.shape[::-1]
_, contours, _ = cv2.findContours(gray, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for i, contour in enumerate(contours):
perimeter = cv2.arcLength(contour, True)
if 500 < perimeter < 1500:
# Calculate a bounding rectangle from the contour and get the dimensions
x, y, w, h = cv2.boundingRect(contour)
# Add the rectangle in green on the original image
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
# The y position on a PDF starts from the top and the bottom in the image.
# We convert from image to PDF y by subtracting the image height from the y and adding the height.
pdf_y = abs(y - img_h + h)
cells.append({
'h': h,
'w': w,
'x': x,
'y': pdf_y,
})
# Add some debugging info in each rectangle so we can see which text part in the XML file relates to this.
cv2.putText(img, 'x:' + str(x) + ', y:' + str(y) + ' (' + str(pdf_y) + ')', (x + 10, y + 15),
cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255))
cv2.putText(img, 'w:' + str(w) + ', h:' + str(h), (x + 10, y + 30), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255))
# Debug: Ucomment to view image
# cv2.imshow("Output", img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
filename = '{0}-processed.{1}'.format(shortname, extension)
cv2.imwrite(filename, img)
print('Saved processed {0} as {1}'.format(extension.upper(), filename))
cells.sort(key=lambda cell: '{0:04d}-{1:04d}'.format(cell['y'], cell['x']))
coords_filename = '{0}-coords.json'.format(shortname)
with open(coords_filename, 'w') as coords_file:
json.dump(cells, coords_file)
print('Saved coordinates to {0}'.format(coords_filename))