|
|
@ -794,7 +794,7 @@ Requires binaries for tesseract, ImageMagick, and pdfimages (from Poppler).
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
setuptools.setup(
|
|
|
|
setuptools.setup(
|
|
|
|
name="table_ocr",
|
|
|
|
name="table_ocr",
|
|
|
|
version="0.2.0",
|
|
|
|
version="0.2.1",
|
|
|
|
author="Eric Ihli",
|
|
|
|
author="Eric Ihli",
|
|
|
|
author_email="eihli@owoga.com",
|
|
|
|
author_email="eihli@owoga.com",
|
|
|
|
description="Extract text from tables in images.",
|
|
|
|
description="Extract text from tables in images.",
|
|
|
@ -1113,7 +1113,7 @@ def text_files_to_csv(files):
|
|
|
|
for f in files:
|
|
|
|
for f in files:
|
|
|
|
directory, filename = os.path.split(f)
|
|
|
|
directory, filename = os.path.split(f)
|
|
|
|
with open(f) as of:
|
|
|
|
with open(f) as of:
|
|
|
|
txt = of.read()
|
|
|
|
txt = of.read().strip()
|
|
|
|
row, column = map(int, filename.split(".")[0].split("-"))
|
|
|
|
row, column = map(int, filename.split(".")[0].split("-"))
|
|
|
|
if row == len(rows):
|
|
|
|
if row == len(rows):
|
|
|
|
rows.append([])
|
|
|
|
rows.append([])
|
|
|
|