@ -849,9 +849,32 @@ if __name__ == "__main__":
#+NAME : extract_tables/__init__.py
#+HEADER : :tangle table_ocr/extract_tables/__init__.py
#+BEGIN_SRC python
import os
import cv2
<<detect-tables >>
def main(files):
results = []
for f in files:
directory, filename = os.path.split(f)
image = cv2.imread(f, cv2.IMREAD_GRAYSCALE)
tables = find_tables(image)
files = []
filename_sans_extension = os.path.splitext(filename)[0]
if tables:
os.makedirs(os.path.join(directory, filename_sans_extension), exist_ok=True)
for i, table in enumerate(tables):
table_filename = "table-{:03d}.png".format(i)
table_filepath = os.path.join(
directory, filename_sans_extension, table_filename
)
files.append(table_filepath)
cv2.imwrite(table_filepath, table)
if tables:
results.append((f, files))
# Results is [[<input image >, [<images of detected tables >]]]
return results
#+END_SRC
**** table_ocr/extract_tables/__main__.py
@ -876,39 +899,11 @@ For each image path given as an agument, outputs:
#+NAME : extract_tables/__main__.py
#+BEGIN_SRC python :tangle table_ocr/extract_tables/__main__.py :results none
import argparse
import os
import cv2
from table_ocr.extract_tables import find_tables
from table_ocr.extract_tables import main
parser = argparse.ArgumentParser()
parser.add_argument("files", nargs="+")
def main(files):
results = []
for f in files:
directory, filename = os.path.split(f)
image = cv2.imread(f, cv2.IMREAD_GRAYSCALE)
tables = find_tables(image)
files = []
filename_sans_extension = os.path.splitext(filename)[0]
if tables:
os.makedirs(os.path.join(directory, filename_sans_extension), exist_ok=True)
for i, table in enumerate(tables):
table_filename = "table-{:03d}.png".format(i)
table_filepath = os.path.join(
directory, filename_sans_extension, table_filename
)
files.append(table_filepath)
cv2.imwrite(table_filepath, table)
if tables:
results.append((f, files))
# Results is [[<input image >, [<images of detected tables >]]]
return results
if __name__ == "__main__":
args = parser.parse_args()
files = args.files
results = main(files)