From 3b31888a55979f12ec00308617e884fbd7b1f0d9 Mon Sep 17 00:00:00 2001 From: Eric Ihli Date: Wed, 14 Oct 2020 21:07:25 -0700 Subject: [PATCH] Include tesseract traineddata files Includes a english model and a model trained specifically on cells extracted from tables. --- .gitignore | 1 - dist/table_ocr-0.0.1-py3-none-any.whl | Bin 0 -> 8006 bytes dist/table_ocr-0.1.0-py3-none-any.whl | Bin 0 -> 33349187 bytes dist/table_ocr-0.1.1-py3-none-any.whl | Bin 0 -> 18711 bytes dist/table_ocr-0.2.0-py3-none-any.whl | Bin 0 -> 33349187 bytes pdf_table_extraction_and_ocr.org | 38 +++++++++++++++++------ setup.py | 13 ++++---- table_ocr/ocr_image/__init__.py | 5 +++ table_ocr/tessdata/eng.traineddata | Bin 0 -> 23466654 bytes table_ocr/tessdata/table-ocr.traineddata | Bin 0 -> 11704518 bytes 10 files changed, 39 insertions(+), 18 deletions(-) create mode 100644 dist/table_ocr-0.0.1-py3-none-any.whl create mode 100644 dist/table_ocr-0.1.0-py3-none-any.whl create mode 100644 dist/table_ocr-0.1.1-py3-none-any.whl create mode 100644 dist/table_ocr-0.2.0-py3-none-any.whl create mode 100644 table_ocr/tessdata/eng.traineddata create mode 100644 table_ocr/tessdata/table-ocr.traineddata diff --git a/.gitignore b/.gitignore index 0cbdb7d..6dbad59 100644 --- a/.gitignore +++ b/.gitignore @@ -7,5 +7,4 @@ tmp/ *.egg build htmlcov -dist *.egg-info diff --git a/dist/table_ocr-0.0.1-py3-none-any.whl b/dist/table_ocr-0.0.1-py3-none-any.whl new file mode 100644 index 0000000000000000000000000000000000000000..267920e655e33e63ba48b293fa35426e29981eaa GIT binary patch literal 8006 zcmbW6bx>UC(uW6kcMBRMxO)g1+$}hR2N__Hf#9wI211ZPa2ebQK|>(HEw~2>F2RC- zWbf|HE_ZkT_|DX+si`{s%zNJM`gOnE>PiTRga7~l72sG9rJjg9$%AnB3-0d5xx1Y$ ztvO9i?VRk~OiekQy;k4>>i>N{r#w>K60M7(>Fzn9JA+KfrfJ zI;sQDJGn4hFIi+%(Lypk$XBcNrs`;n*eEKh)_SphuY19rDXJHK`6=j12&~!q#g<>& zMZ52F8F!LzPigz&IHv>A8Aq9f#thx z6*moC=c|5E!v?8p&ku6N%%JY$eR(s4fn~r^PBVSQY09^?v4EGEC0iozD=qkM6!F37 zh4z^o5~YzG>@A%#I1$r~QX!7PXjOW2s=46fbdq21`g3pAVc`^%$N&Hc7XYCC{avjf zZm#AQZl)Gi4h~>bYgdq?shc^_!Rqd{GxQz7GeWpGgB8Q8kKfEU5!!+?%+qi}8bj^y zNyA%>!$wdI&G_rr5@hikz-P0d6QBb^s!2LdfQR1up&A2)l1jdBQgkM6Dd###vV|8^ z3plX1UMzMR>XK5^n=)>(njd&Wtv`om_pl2$1;E zQRg2CD)7GqOmDUws^7*&dek(wliV;}A(<*8_&28vmUrY$j{1t{7(O(lwY6m35}zhU zqMsI)SAp!)RC}@|d$L7HdhlNaeX8ku79bJJK|8o~{DmhiRzln8LeAsp&<8K38{>*2 z#dw+3KQEw_)HdTrJ-Rx`mCIQ{#zTe*(+PM8rd5 z{4w%)@f!AaAhmwpRzD`NSD?zlYm1n>r#Mp`S2Wmk)nTIUZedh1Nws_dfR??_+%eebbW_jL_BjNgCzT8S&Ndgvods`Gx7~x zdvyERfU_8xO2wh=V;pN!vo_-a&g=E^QEFi#wtOf$=C>WW?u)2#lu%U8x{qk$c@=cN zZ6RCM#kgrDDeFi|kOG_2rWUPZ%(WQ-y?PY5eg^2bAc?PUn5?KY`_^fiQc8+yiAcM9 z7d?|j-NRs0d({QUDBhp&Q6qln?q$)D5ky90O~1u2W=*wBca)MLVF#KF7k-(bWnR+! z%Gzk5nB<~6?35Uqmi3Ge(tujc4}Yq}^27)q|afnvTH&N?FTWIYWf9M(no;J$bHDAL3=d>=-b}7F*W1m|atz zf$*Mxjn7CD6WO(dY`jdiK~vDflLXp+KTrmHy#0Kj$mHQJR>j&Swo*;>Ey!92< z0Gf^7EzEA<5!VUB*l6G)j3M|CNsLWiW-%4s&8oGAjG}p4-xi{U*=ou@f^m6`kM(>T z&cuaZQd8;lWa2;GQZDqm+X3py&Fzlfv2<%SFo62;<(4Td&pL!%oia z7a|n79oE{o^D{LDM6#S$>-2{zbPbf8`pnNMo3=kKuN;NvUuyRod+qe>CeAwjXc5l9 z{sPy%bsb*SB%;QpdrRE@!_xMW72-|&P>3Y`g^=Xjp#NVJ3k0zE7z+si;KKv}i0&np z-?GWiESjO~2$~hZz1cIBgqY^Zjzi0z1p>&jrVK2;L}GOfjJm^h$k%qs;v+#{t{~5Y z77t~iRyNcI2rm8aSHMFp@nX6YM$9rFVT@9va5eTEC<8;so4aOaN$?pBVdf0F+fd!- zPJEgB%2H6P6rl`3qw1hu^MFUVJx9wU!_=6_WX3tr%3fZBTZe!p`XVC|#h_@;Gr7uD zcy_#!_^N8Ti$rcot*$NsuC%BmLID@D37NI*QjCJ;{Kd2yXlX562wt_OI@GJLy_8W; zfim_moZ-TUH>$v;;b{VjGg^9bX>X;m7jjVzcd1A{mId+De40=h4Mq_2)-%*Lj2y|~ zohpV`ACI4nDDs%>q9CAGaG$hME}n5=axL_99<0q$(EA*@dR*C`vX_>Ob0Jzk+%!8C zLykDV4Tjn6;9i7Fk9(jGd+t+*FiC4p*7i0(1}+ZkTGQ(%7EKHaZ}b}{@)b2f>%+6k zX3#f8>fW3RD;krcjJ?VwR`%TPYiMJ=o@8{Owl8vLM0-)xYMpg?QE5^7I(oAZ(LW|} z8xjR#^zVw--e2_%P+geaa3Z!4dkH$EdY}e{)TZ>8cdRxRUS37*W$R-|@=h*Me>PB* zgJHxQoOJ|#TjDqEuvcZPDcQh|vmSdE1j=Rk;q}6Mi!Lqpu|U-u{po?iXj%MN;vaFA z9-$Rqc8de6bgr>&GhkQrb@mTzA?Xqk~Jr~IV;}U+o z8cV-O_jCBw&E$o{NWUr{A6t>dMXBUN@?sf1WUb}P)3$xMnZlZapjX-&fy;X)#dAShv+{Xb^oOGW_K#^7;hAiRWe-^) zRvr(+nKo>+DaH)aA1uv^A6TN@#MqlSh{nkYpo~DuGe$E__3fK0N>7^}@s;~VU$jr^ zAUg(}B+h*m7)1I5{(JhH)uA7Y-tMav9X zqOg6Hzx1#>V!P^yH0WA3u=!rxYXuEE4osW1_6Z~xi@;7k zmq!4jHC#TW<=KEE*wjqR{>8BKb3Ba*d@dDXBZ{1C!TeQ)BVFMQm8p2vpk#ZYOhR_+ zl~gCy0$fCe<(TZK0&PXQdh;{pln<$y$H=c})c4 zIjYFHyb}9764cyIJwZd|MYGg3CjE>^wv?a?Jg}3oU8u2jK+12HsGMSJ$WdLMmqDIa z$Uj2!9p_jNy~pHs1zG1keYC4oY{Mmw&#%0D#Im@UNO-&W#_bBS)bM2Fxi+Y zzr#{?T8AhvEjOT?QCRGwAwsF-2wU&)k}g?|NNP69VrvZJEoZ1ZI`{e2T5z|7)skS8 zx}08AkUwTSbPjlpS90BPfOc3(XT;-?Gzz)4cTGdB4Mz{^8Q)RP8mLC=Zux?z%`S%q zp%~(#Jn~RCe@p8vUgA8>Q)Ki(dwUZo@c3z$fK*lmQ3ls#TWfm_$#|%*5KvXnSR~BS~fm&ROJlUya~N zNw^CpQxr|(WQ{rWatnvAF7ftN6IdH&zdsm)bAgvoipIb1r7# z)QTe}-FB0lg!?VPl4(!ZUGnJ{O?8Yao~kX6!Ys?g)O-~dDXtl);$wVnpw`@FT$ES4 zQ=qMu)Z029BSZ?=f-Nsvo0h2B{GzU}z5$-#UK0v1wE_Q+BPGtvXBpiEgZP*YiLyY?nI~fbX?_l1P@h^roFE zR?|PB6Hxkkfbb((&}4D=)}tO!*MR#d+IZ%D@up|uZ z{aL9Yjnr#VHp!2Jqi*jtBvO~sl=$zO5q9tZ0LJfH;O=JUa92S& zCUiJ;(qM+|d!Z+3(YXun@)eKP>P;cY@TCwEaq-I8@$b%~DD*Vh$;&S--dvmWWfKs1 zhp`%h@w<^2%JG<5-UOz-&}Ne0&WYtWbIFW-yG`U7*n#B8#uv+hrF!^Iq%Ba{!L52f zB`BWTzAj>kUUGFRNvAaMqeg%GipS=Kj@LCp;ge{jO{k=tRZ$-W599pBYq`}>OP;*i z6=}`WE+NZpJB{i3Jns4Wt2##PvGa!M@_9`ChaTp!+oSC_HV39>-(O0VIUf?|Z{be| zGasQthddpENQa+(E;{RoNo~k;Vpgm+s$+N5FZ}8?PTXk|2S&5w?$3Nu z8E?K3r5Eb@9p2)+O$wx4&o=|u)qeAN|5mgy`q4KITwAbs{aa