Strip whitespace when reading ocr for csv

main
Eric Ihli 4 years ago
parent 01406752d4
commit df50db1fbd

Binary file not shown.

@ -794,7 +794,7 @@ Requires binaries for tesseract, ImageMagick, and pdfimages (from Poppler).
""" """
setuptools.setup( setuptools.setup(
name="table_ocr", name="table_ocr",
version="0.2.0", version="0.2.1",
author="Eric Ihli", author="Eric Ihli",
author_email="eihli@owoga.com", author_email="eihli@owoga.com",
description="Extract text from tables in images.", description="Extract text from tables in images.",
@ -1113,7 +1113,7 @@ def text_files_to_csv(files):
for f in files: for f in files:
directory, filename = os.path.split(f) directory, filename = os.path.split(f)
with open(f) as of: with open(f) as of:
txt = of.read() txt = of.read().strip()
row, column = map(int, filename.split(".")[0].split("-")) row, column = map(int, filename.split(".")[0].split("-"))
if row == len(rows): if row == len(rows):
rows.append([]) rows.append([])

@ -7,7 +7,7 @@ Requires binaries for tesseract, ImageMagick, and pdfimages (from Poppler).
""" """
setuptools.setup( setuptools.setup(
name="table_ocr", name="table_ocr",
version="0.2.0", version="0.2.1",
author="Eric Ihli", author="Eric Ihli",
author_email="eihli@owoga.com", author_email="eihli@owoga.com",
description="Extract text from tables in images.", description="Extract text from tables in images.",

@ -15,7 +15,7 @@ def text_files_to_csv(files):
for f in files: for f in files:
directory, filename = os.path.split(f) directory, filename = os.path.split(f)
with open(f) as of: with open(f) as of:
txt = of.read() txt = of.read().strip()
row, column = map(int, filename.split(".")[0].split("-")) row, column = map(int, filename.split(".")[0].split("-"))
if row == len(rows): if row == len(rows):
rows.append([]) rows.append([])

Loading…
Cancel
Save