Strip whitespace when reading ocr for csv

5 years ago · df50db1fbd
parent 01406752d4
commit df50db1fbd
4 changed files with 4 additions and 4 deletions
--- a/dist/table_ocr-0.2.1-py3-none-any.whl
+++ b/dist/table_ocr-0.2.1-py3-none-any.whl
--- a/pdf_table_extraction_and_ocr.org
+++ b/pdf_table_extraction_and_ocr.org
@ -794,7 +794,7 @@ Requires binaries for tesseract, ImageMagick, and pdfimages (from Poppler).
 """
 setuptools.setup(
    name="table_ocr",
-    version="0.2.0",
+    version="0.2.1",
    author="Eric Ihli",
    author_email="eihli@owoga.com",
    description="Extract text from tables in images.",
@ -1113,7 +1113,7 @@ def text_files_to_csv(files):
    for f in files:
        directory, filename = os.path.split(f)
        with open(f) as of:
-            txt = of.read()
+            txt = of.read().strip()
        row, column = map(int, filename.split(".")[0].split("-"))
        if row == len(rows):
            rows.append([])
--- a/setup.py
+++ b/setup.py
@ -7,7 +7,7 @@ Requires binaries for tesseract, ImageMagick, and pdfimages (from Poppler).
 """
 setuptools.setup(
    name="table_ocr",
-    version="0.2.0",
+    version="0.2.1",
    author="Eric Ihli",
    author_email="eihli@owoga.com",
    description="Extract text from tables in images.",
--- a/table_ocr/ocr_to_csv/init.py
+++ b/table_ocr/ocr_to_csv/init.py
@ -15,7 +15,7 @@ def text_files_to_csv(files):
    for f in files:
        directory, filename = os.path.split(f)
        with open(f) as of:
-            txt = of.read()
+            txt = of.read().strip()
        row, column = map(int, filename.split(".")[0].split("-"))
        if row == len(rows):
            rows.append([])