You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
54 lines
1.9 KiB
Python
54 lines
1.9 KiB
Python
5 years ago
|
import math
|
||
5 years ago
|
|
||
|
import cv2
|
||
5 years ago
|
|
||
5 years ago
|
def crop_to_text(image):
|
||
5 years ago
|
MAX_COLOR_VAL = 255
|
||
|
BLOCK_SIZE = 15
|
||
|
SUBTRACT_FROM_MEAN = -2
|
||
|
|
||
|
img_bin = cv2.adaptiveThreshold(
|
||
|
~image,
|
||
|
MAX_COLOR_VAL,
|
||
|
cv2.ADAPTIVE_THRESH_MEAN_C,
|
||
|
cv2.THRESH_BINARY,
|
||
|
BLOCK_SIZE,
|
||
|
SUBTRACT_FROM_MEAN,
|
||
|
)
|
||
|
|
||
5 years ago
|
img_h, img_w = image.shape
|
||
|
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(img_w * 0.5), 1))
|
||
|
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, int(img_h * 0.7)))
|
||
|
horizontal_lines = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, horizontal_kernel)
|
||
|
vertical_lines = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, vertical_kernel)
|
||
|
both = horizontal_lines + vertical_lines
|
||
|
cleaned = img_bin - both
|
||
5 years ago
|
|
||
5 years ago
|
# Get rid of little noise.
|
||
|
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
|
||
|
opened = cv2.morphologyEx(cleaned, cv2.MORPH_OPEN, kernel)
|
||
5 years ago
|
|
||
5 years ago
|
contours, hierarchy = cv2.findContours(opened, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
||
5 years ago
|
bounding_rects = [cv2.boundingRect(c) for c in contours]
|
||
5 years ago
|
NUM_PX_COMMA = 6
|
||
5 years ago
|
MIN_CHAR_AREA = 5 * 9
|
||
5 years ago
|
if bounding_rects:
|
||
5 years ago
|
minx, miny, maxx, maxy = math.inf, math.inf, 0, 0
|
||
5 years ago
|
for x, y, w, h in [(x, y, w, h) for x, y, w, h in bounding_rects if w * h > MIN_CHAR_AREA]:
|
||
5 years ago
|
minx = min(minx, x)
|
||
|
miny = min(miny, y)
|
||
|
maxx = max(maxx, x + w)
|
||
|
maxy = max(maxy, y + h)
|
||
|
x, y, w, h = minx, miny, maxx - minx, maxy - miny
|
||
|
cropped = image[y:min(img_h, y+h+NUM_PX_COMMA), x:min(img_w, x+w)]
|
||
5 years ago
|
else:
|
||
5 years ago
|
# If we morphed out all of the text, fallback to using the unmorphed image.
|
||
5 years ago
|
cropped = image
|
||
5 years ago
|
bordered = cv2.copyMakeBorder(cropped, 5, 5, 5, 5, cv2.BORDER_CONSTANT, None, 255)
|
||
|
return bordered
|
||
|
def ocr_image(image, config):
|
||
|
return pytesseract.image_to_string(
|
||
5 years ago
|
image,
|
||
5 years ago
|
config=config
|
||
|
)
|