diff --git a/ocr_tables b/ocr_tables
new file mode 100755
index 0000000..d8c3217
--- /dev/null
+++ b/ocr_tables
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+PDF=$1
+
+python -m table_ocr.prepare_pdfs $PDF | grep .png > /tmp/pdf-images.txt
+cat /tmp/pdf-images.txt | xargs -I{} python -m table_ocr.extract_tables {}  | grep table > /tmp/extracted-tables.txt
+cat /tmp/extracted-tables.txt | xargs -I{} python -m table_ocr.extract_cells_from_table {} | grep cells > /tmp/extracted-cells.txt
+cat /tmp/extracted-cells.txt | xargs -I{} python -m table_ocr.ocr_image {} --psm 7 -l data-table
+
+for image in $(cat /tmp/extracted-tables.txt); do
+    dir=$(dirname $image)
+    python -m table_ocr.ocr_to_csv $(find $dir/cells -name "*.txt")
+done
diff --git a/pdf_table_extraction_and_ocr.org b/pdf_table_extraction_and_ocr.org
index 6004b31..04007c0 100644
--- a/pdf_table_extraction_and_ocr.org
+++ b/pdf_table_extraction_and_ocr.org
@@ -26,16 +26,33 @@ output~ to a code block will minimize that noise.
 
 #+BEGIN_SRC shell :results none :session *Shell*
 TABLES=("/tmp/example-1/example-1.pdf" "/tmp/example-2/example-2.pdf")
-python -m table_ocr.prepare_pdfs $TABLES | grep .png > /tmp/pdf_images.txt
-# All pngs that don't have "table" in their name. Assume "table" has already been found for files with table in name.
-cat /tmp/pdf-images.txt | xargs -I{} python -m table_ocr.extract_tables {} # | grep tables > /tmp/extracted-tables.txt
-cat /tmp/extracted-tables.txt | xargs -I{} python -m table_ocr.extract_cells_from_table {} # | grep cells > /tmp/extracted-cells.txt
+python -m table_ocr.prepare_pdfs $TABLES | grep .png > /tmp/pdf-images.txt
+cat /tmp/pdf-images.txt | xargs -I{} python -m table_ocr.extract_tables {}  | grep table > /tmp/extracted-tables.txt
+cat /tmp/extracted-tables.txt | xargs -I{} python -m table_ocr.extract_cells_from_table {} | grep cells > /tmp/extracted-cells.txt
 cat /tmp/extracted-cells.txt | xargs -I{} python -m table_ocr.ocr_image {}
 
 # This next one needs to be run on each subdirectory one at a time.
 python -m table_ocr.ocr_to_csv $(find . -iregex ".*cells.*ocr_data.*\.txt" 2>/dev/null)
 #+END_SRC
 
+Or, as a shell script.
+
+#+BEGIN_SRC shell :results none :tangle ocr_tables :tangle-mode (identity #o755)
+#!/bin/sh
+
+PDF=$1
+
+python -m table_ocr.prepare_pdfs $PDF | grep .png > /tmp/pdf-images.txt
+cat /tmp/pdf-images.txt | xargs -I{} python -m table_ocr.extract_tables {}  | grep table > /tmp/extracted-tables.txt
+cat /tmp/extracted-tables.txt | xargs -I{} python -m table_ocr.extract_cells_from_table {} | grep cells > /tmp/extracted-cells.txt
+cat /tmp/extracted-cells.txt | xargs -I{} python -m table_ocr.ocr_image {} --psm 7 -l data-table
+
+for image in $(cat /tmp/extracted-tables.txt); do
+    dir=$(dirname $image)
+    python -m table_ocr.ocr_to_csv $(find $dir/cells -name "*.txt")
+done
+#+END_SRC
+
 * Preparing data
 ** Converting PDFs to images
 
@@ -506,30 +523,32 @@ def crop_to_text(image):
         SUBTRACT_FROM_MEAN,
     )
 
-    # Get rid of littl noise.
-    kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
-    opened = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel)
+    img_h, img_w = image.shape
+    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(img_w * 0.5), 1))
+    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, int(img_h * 0.7)))
+    horizontal_lines = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, horizontal_kernel)
+    vertical_lines = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, vertical_kernel)
+    both = horizontal_lines + vertical_lines
+    cleaned = img_bin - both
 
-    # Dilate so each digit is connected, so we can get a bounding rectangle
-    # around all of the digits as one contour. This will make the bounding
-    # rectangle 8 pixels wider on the left and right, so we'll need to crop that
-    # out at the end so that we don't pick up stray border pixels.
-    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (16, 1))
-    dilated = cv2.dilate(opened, kernel)
-
-    contours, hierarchy = cv2.findContours(dilated, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+    # Get rid of little noise.
+    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
+    opened = cv2.morphologyEx(cleaned, cv2.MORPH_OPEN, kernel)
 
+    contours, hierarchy = cv2.findContours(opened, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
     bounding_rects = [cv2.boundingRect(c) for c in contours]
-
+    NUM_PX_COMMA = 6
     if bounding_rects:
-        # The largest contour is certainly the text that we're looking for.
-        largest_rect = max(bounding_rects, key=lambda r: r[2] * r[3])
-        x, y, w, h = largest_rect
-        # Commas sometimes go a little below the bounding box and we don't want
-        # to lost them or turn them into periods.
-        img_h, img_w = image.shape
-        cropped = image[y:min(img_h, y+h+6), x+8:x+w-8]
+        minx, miny, maxx, maxy = math.inf, math.inf, 0, 0
+        for x, y, w, h in bounding_rects:
+            minx = min(minx, x)
+            miny = min(miny, y)
+            maxx = max(maxx, x + w)
+            maxy = max(maxy, y + h)
+        x, y, w, h = minx, miny, maxx - minx, maxy - miny
+        cropped = image[y:min(img_h, y+h+NUM_PX_COMMA), x:min(img_w, x+w)]
     else:
+        # If we morphed out all of the text, fallback to using the unmorphed image.
         cropped = image
     bordered = cv2.copyMakeBorder(cropped, 5, 5, 5, 5, cv2.BORDER_CONSTANT, None, 255)
     return bordered
@@ -549,7 +568,6 @@ cv2.imwrite("resources/examples/example-table-cell-1-1-cropped.png", image)
 #+ATTR_HTML: :width 200px :height 100%
 [[file:resources/examples/example-table-cell-1-1-cropped.png]]
 
-
 ** OCR each cell
 
 If we cleaned up the images well enough, we might get some accurate OCR!
@@ -813,31 +831,44 @@ python -m table_ocr.ocr_cell resources/examples/cells/000-000.png
 : PRIZE
 
 #+BEGIN_SRC python :tangle table_ocr/ocr_image.py :mkdirp yes :results none
+import argparse
+import math
 import os
 import sys
 
 import cv2
 import pytesseract
 
+description="""Takes a single argument that is the image to OCR.
+Remaining arguments are passed directly to Tesseract.
+
+Attempts to make OCR more accurate by performing some modifications on the image.
+Saves the modified image and the OCR text in an `ocr_data` directory.
+Filenames are of the format for training with tesstrain."""
+parser = argparse.ArgumentParser(description=description)
+parser.add_argument("image", help="filepath of image to perform OCR")
+
 <<crop-to-text>>
 <<ocr-image>>
 
-def main(f):
-    directory, filename = os.path.split(f)
+def main(image_file, tess_args):
+    directory, filename = os.path.split(image_file)
     filename_sans_ext, ext = os.path.splitext(filename)
-    image = cv2.imread(f, cv2.IMREAD_GRAYSCALE)
+    image = cv2.imread(image_file, cv2.IMREAD_GRAYSCALE)
     cropped = crop_to_text(image)
     ocr_data_dir = os.path.join(directory, "ocr_data")
     os.makedirs(ocr_data_dir, exist_ok=True)
     out_imagepath = os.path.join(ocr_data_dir, filename)
     out_txtpath = os.path.join(ocr_data_dir, "{}.gt.txt".format(filename_sans_ext))
     cv2.imwrite(out_imagepath, cropped)
-    txt = ocr_image(cropped, "--psm 7")
+    txt = ocr_image(cropped, " ".join(tess_args))
+    print(txt)
     with open(out_txtpath, "w") as txt_file:
         txt_file.write(txt)
 
 if __name__ == "__main__":
-    main(sys.argv[1])
+    args, tess_args = parser.parse_known_args()
+    main(args.image, tess_args)
 #+END_SRC
 
 *** table_ocr/ocr_to_csv.py
@@ -854,6 +885,13 @@ parser = argparse.ArgumentParser()
 parser.add_argument("files", nargs="+")
 
 def main(files):
+    """Files must be sorted lexicographically
+    Filenames must be <row>-<colum>.txt.
+    000-000.txt
+    000-001.txt
+    001-000.txt
+    etc...
+    """
     rows = []
     for f in files:
         directory, filename = os.path.split(f)
@@ -871,9 +909,9 @@ def main(files):
 
 if __name__ == "__main__":
     args = parser.parse_args()
-    main(args.files)
-
-
+    files = args.files
+    files.sort()
+    main(files)
 #+END_SRC
 
 * Utils
@@ -899,10 +937,6 @@ with ~advice-add~.
 (concat "#+ATTR_HTML: :width " width " :height " height "\n[[file:" text "]]")
 #+END_SRC
 
-#+RESULTS: html-image-size
-#+ATTR_HTML: :width 100% :height 100%
-[[file:]]
-
 #+BEGIN_SRC emacs-lisp :results none
 (defun remove-attributes-from-src-block-result (&rest args)
   (let ((location (org-babel-where-is-src-block-result))
diff --git a/resources/examples/example-table-cell-1-1-cropped.png b/resources/examples/example-table-cell-1-1-cropped.png
index 4aba6ec..5bbcbe9 100644
Binary files a/resources/examples/example-table-cell-1-1-cropped.png and b/resources/examples/example-table-cell-1-1-cropped.png differ
diff --git a/table_ocr/ocr_image.py b/table_ocr/ocr_image.py
index e2c886b..f92e786 100644
--- a/table_ocr/ocr_image.py
+++ b/table_ocr/ocr_image.py
@@ -1,9 +1,20 @@
+import argparse
+import math
 import os
 import sys
 
 import cv2
 import pytesseract
 
+description="""Takes a single argument that is the image to OCR.
+Remaining arguments are passed directly to Tesseract.
+
+Attempts to make OCR more accurate by performing some modifications on the image.
+Saves the modified image and the OCR text in an `ocr_data` directory.
+Filenames are of the format for training with tesstrain."""
+parser = argparse.ArgumentParser(description=description)
+parser.add_argument("image", help="filepath of image to perform OCR")
+
 def crop_to_text(image):
     MAX_COLOR_VAL = 255
     BLOCK_SIZE = 15
@@ -18,30 +29,32 @@ def crop_to_text(image):
         SUBTRACT_FROM_MEAN,
     )
 
-    # Get rid of littl noise.
-    kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
-    opened = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel)
+    img_h, img_w = image.shape
+    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(img_w * 0.5), 1))
+    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, int(img_h * 0.7)))
+    horizontal_lines = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, horizontal_kernel)
+    vertical_lines = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, vertical_kernel)
+    both = horizontal_lines + vertical_lines
+    cleaned = img_bin - both
 
-    # Dilate so each digit is connected, so we can get a bounding rectangle
-    # around all of the digits as one contour. This will make the bounding
-    # rectangle 8 pixels wider on the left and right, so we'll need to crop that
-    # out at the end so that we don't pick up stray border pixels.
-    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (16, 1))
-    dilated = cv2.dilate(opened, kernel)
-
-    contours, hierarchy = cv2.findContours(dilated, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+    # Get rid of little noise.
+    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
+    opened = cv2.morphologyEx(cleaned, cv2.MORPH_OPEN, kernel)
 
+    contours, hierarchy = cv2.findContours(opened, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
     bounding_rects = [cv2.boundingRect(c) for c in contours]
-
+    NUM_PX_COMMA = 6
     if bounding_rects:
-        # The largest contour is certainly the text that we're looking for.
-        largest_rect = max(bounding_rects, key=lambda r: r[2] * r[3])
-        x, y, w, h = largest_rect
-        # Commas sometimes go a little below the bounding box and we don't want
-        # to lost them or turn them into periods.
-        img_h, img_w = image.shape
-        cropped = image[y:min(img_h, y+h+6), x+8:x+w-8]
+        minx, miny, maxx, maxy = math.inf, math.inf, 0, 0
+        for x, y, w, h in bounding_rects:
+            minx = min(minx, x)
+            miny = min(miny, y)
+            maxx = max(maxx, x + w)
+            maxy = max(maxy, y + h)
+        x, y, w, h = minx, miny, maxx - minx, maxy - miny
+        cropped = image[y:min(img_h, y+h+NUM_PX_COMMA), x:min(img_w, x+w)]
     else:
+        # If we morphed out all of the text, fallback to using the unmorphed image.
         cropped = image
     bordered = cv2.copyMakeBorder(cropped, 5, 5, 5, 5, cv2.BORDER_CONSTANT, None, 255)
     return bordered
@@ -51,19 +64,21 @@ def ocr_image(image, config):
         config=config
     )
 
-def main(f):
-    directory, filename = os.path.split(f)
+def main(image_file, tess_args):
+    directory, filename = os.path.split(image_file)
     filename_sans_ext, ext = os.path.splitext(filename)
-    image = cv2.imread(f, cv2.IMREAD_GRAYSCALE)
+    image = cv2.imread(image_file, cv2.IMREAD_GRAYSCALE)
     cropped = crop_to_text(image)
     ocr_data_dir = os.path.join(directory, "ocr_data")
     os.makedirs(ocr_data_dir, exist_ok=True)
     out_imagepath = os.path.join(ocr_data_dir, filename)
     out_txtpath = os.path.join(ocr_data_dir, "{}.gt.txt".format(filename_sans_ext))
     cv2.imwrite(out_imagepath, cropped)
-    txt = ocr_image(cropped, "--psm 7")
+    txt = ocr_image(cropped, " ".join(tess_args))
+    print(txt)
     with open(out_txtpath, "w") as txt_file:
         txt_file.write(txt)
 
 if __name__ == "__main__":
-    main(sys.argv[1])
+    args, tess_args = parser.parse_known_args()
+    main(args.image, tess_args)
diff --git a/table_ocr/ocr_to_csv.py b/table_ocr/ocr_to_csv.py
index d390bd6..2560233 100644
--- a/table_ocr/ocr_to_csv.py
+++ b/table_ocr/ocr_to_csv.py
@@ -9,6 +9,13 @@ parser = argparse.ArgumentParser()
 parser.add_argument("files", nargs="+")
 
 def main(files):
+    """Files must be sorted lexicographically
+    Filenames must be <row>-<colum>.txt.
+    000-000.txt
+    000-001.txt
+    001-000.txt
+    etc...
+    """
     rows = []
     for f in files:
         directory, filename = os.path.split(f)
@@ -26,4 +33,6 @@ def main(files):
 
 if __name__ == "__main__":
     args = parser.parse_args()
-    main(args.files)
+    files = args.files
+    files.sort()
+    main(files)