image-table-ocr/table_ocr/extract_tables/__main__.py

import argparse
import os

import cv2

from table_ocr.extract_tables import find_tables

parser = argparse.ArgumentParser()
parser.add_argument("files", nargs="+")


def main(files):
    results = []
    for f in files:
        directory, filename = os.path.split(f)
        image = cv2.imread(f, cv2.IMREAD_GRAYSCALE)
        tables = find_tables(image)
        files = []
        filename_sans_extension = os.path.splitext(filename)[0]
        if tables:
            os.makedirs(os.path.join(directory, filename_sans_extension), exist_ok=True)
        for i, table in enumerate(tables):
            table_filename = "table-{:03d}.png".format(i)
            table_filepath = os.path.join(
                directory, filename_sans_extension, table_filename
            )
            files.append(table_filepath)
            cv2.imwrite(table_filepath, table)
        if tables:
            results.append((f, files))
    # Results is [[<input image>, [<images of detected tables>]]]
    return results

if __name__ == "__main__":
    args = parser.parse_args()
    files = args.files
    results = main(files)
    for image, tables in results:
        print("\n".join(tables))
Refactor table extraction into module 5 years ago			`import argparse`
			`import os`

			`import cv2`

			`from table_ocr.extract_tables import find_tables`

			`parser = argparse.ArgumentParser()`
			`parser.add_argument("files", nargs="+")`


			`def main(files):`
			`results = []`
			`for f in files:`
			`directory, filename = os.path.split(f)`
			`image = cv2.imread(f, cv2.IMREAD_GRAYSCALE)`
			`tables = find_tables(image)`
			`files = []`
			`filename_sans_extension = os.path.splitext(filename)[0]`
			`if tables:`
			`os.makedirs(os.path.join(directory, filename_sans_extension), exist_ok=True)`
			`for i, table in enumerate(tables):`
			`table_filename = "table-{:03d}.png".format(i)`
			`table_filepath = os.path.join(`
			`directory, filename_sans_extension, table_filename`
			`)`
			`files.append(table_filepath)`
			`cv2.imwrite(table_filepath, table)`
			`if tables:`
			`results.append((f, files))`
Return value from main rather than print We only really want to print if we are running the module as a script. It's nice to allow `main` to be imported and used from other code, and that code probably wants a returned value rather than having to read from stdout. 5 years ago			`# Results is [[<input image>, [<images of detected tables>]]]`
			`return results`
Refactor table extraction into module 5 years ago
			`if __name__ == "__main__":`
			`args = parser.parse_args()`
			`files = args.files`
Return value from main rather than print We only really want to print if we are running the module as a script. It's nice to allow `main` to be imported and used from other code, and that code probably wants a returned value rather than having to read from stdout. 5 years ago			`results = main(files)`
			`for image, tables in results:`
			`print("\n".join(tables))`