Move `main` to __init__ for extract_tables

Since this function is the meat and potatoes, it's nice to be able to
import it as typical, which you can't really do if it only resides in
__main__.py.

Also, __main__.py doesn't need `if __name__ == "__main__"`. The whole
point of __main__.py is that it only gets run when that condition is true.
main
Eric Ihli 5 years ago
parent 85f864cd17
commit 962abb7a02

@ -849,9 +849,32 @@ if __name__ == "__main__":
#+NAME: extract_tables/__init__.py #+NAME: extract_tables/__init__.py
#+HEADER: :tangle table_ocr/extract_tables/__init__.py #+HEADER: :tangle table_ocr/extract_tables/__init__.py
#+BEGIN_SRC python #+BEGIN_SRC python
import os
import cv2 import cv2
<<detect-tables>> <<detect-tables>>
def main(files):
results = []
for f in files:
directory, filename = os.path.split(f)
image = cv2.imread(f, cv2.IMREAD_GRAYSCALE)
tables = find_tables(image)
files = []
filename_sans_extension = os.path.splitext(filename)[0]
if tables:
os.makedirs(os.path.join(directory, filename_sans_extension), exist_ok=True)
for i, table in enumerate(tables):
table_filename = "table-{:03d}.png".format(i)
table_filepath = os.path.join(
directory, filename_sans_extension, table_filename
)
files.append(table_filepath)
cv2.imwrite(table_filepath, table)
if tables:
results.append((f, files))
# Results is [[<input image>, [<images of detected tables>]]]
return results
#+END_SRC #+END_SRC
**** table_ocr/extract_tables/__main__.py **** table_ocr/extract_tables/__main__.py
@ -876,43 +899,15 @@ For each image path given as an agument, outputs:
#+NAME: extract_tables/__main__.py #+NAME: extract_tables/__main__.py
#+BEGIN_SRC python :tangle table_ocr/extract_tables/__main__.py :results none #+BEGIN_SRC python :tangle table_ocr/extract_tables/__main__.py :results none
import argparse import argparse
import os
import cv2 from table_ocr.extract_tables import main
from table_ocr.extract_tables import find_tables
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("files", nargs="+") parser.add_argument("files", nargs="+")
args = parser.parse_args()
files = args.files
def main(files): results = main(files)
results = [] for image, tables in results:
for f in files:
directory, filename = os.path.split(f)
image = cv2.imread(f, cv2.IMREAD_GRAYSCALE)
tables = find_tables(image)
files = []
filename_sans_extension = os.path.splitext(filename)[0]
if tables:
os.makedirs(os.path.join(directory, filename_sans_extension), exist_ok=True)
for i, table in enumerate(tables):
table_filename = "table-{:03d}.png".format(i)
table_filepath = os.path.join(
directory, filename_sans_extension, table_filename
)
files.append(table_filepath)
cv2.imwrite(table_filepath, table)
if tables:
results.append((f, files))
# Results is [[<input image>, [<images of detected tables>]]]
return results
if __name__ == "__main__":
args = parser.parse_args()
files = args.files
results = main(files)
for image, tables in results:
print("\n".join(tables)) print("\n".join(tables))
#+END_SRC #+END_SRC

@ -1,3 +1,4 @@
import os
import cv2 import cv2
def find_tables(image): def find_tables(image):
@ -47,3 +48,25 @@ def find_tables(image):
# Leaving that step as a future TODO if it is ever necessary. # Leaving that step as a future TODO if it is ever necessary.
images = [image[y:y+h, x:x+w] for x, y, w, h in bounding_rects] images = [image[y:y+h, x:x+w] for x, y, w, h in bounding_rects]
return images return images
def main(files):
results = []
for f in files:
directory, filename = os.path.split(f)
image = cv2.imread(f, cv2.IMREAD_GRAYSCALE)
tables = find_tables(image)
files = []
filename_sans_extension = os.path.splitext(filename)[0]
if tables:
os.makedirs(os.path.join(directory, filename_sans_extension), exist_ok=True)
for i, table in enumerate(tables):
table_filename = "table-{:03d}.png".format(i)
table_filepath = os.path.join(
directory, filename_sans_extension, table_filename
)
files.append(table_filepath)
cv2.imwrite(table_filepath, table)
if tables:
results.append((f, files))
# Results is [[<input image>, [<images of detected tables>]]]
return results

@ -1,39 +1,11 @@
import argparse import argparse
import os
import cv2 from table_ocr.extract_tables import main
from table_ocr.extract_tables import find_tables
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("files", nargs="+") parser.add_argument("files", nargs="+")
args = parser.parse_args()
files = args.files
def main(files): results = main(files)
results = [] for image, tables in results:
for f in files:
directory, filename = os.path.split(f)
image = cv2.imread(f, cv2.IMREAD_GRAYSCALE)
tables = find_tables(image)
files = []
filename_sans_extension = os.path.splitext(filename)[0]
if tables:
os.makedirs(os.path.join(directory, filename_sans_extension), exist_ok=True)
for i, table in enumerate(tables):
table_filename = "table-{:03d}.png".format(i)
table_filepath = os.path.join(
directory, filename_sans_extension, table_filename
)
files.append(table_filepath)
cv2.imwrite(table_filepath, table)
if tables:
results.append((f, files))
# Results is [[<input image>, [<images of detected tables>]]]
return results
if __name__ == "__main__":
args = parser.parse_args()
files = args.files
results = main(files)
for image, tables in results:
print("\n".join(tables)) print("\n".join(tables))

Loading…
Cancel
Save