From 9ae847039b26b5c3b6f52be03d1614a63a6f21a8 Mon Sep 17 00:00:00 2001 From: Trenton Holmes Date: Wed, 14 Sep 2022 14:00:37 -0700 Subject: [PATCH] Fixes the seperation of files by barcode, during the case where 2 barcodes appear back to back --- src/documents/barcodes.py | 76 ++++++++++-------- .../samples/barcodes/patch-code-t-double.pdf | Bin 0 -> 36146 bytes src/documents/tests/test_barcodes.py | 20 +++++ 3 files changed, 63 insertions(+), 33 deletions(-) create mode 100644 src/documents/tests/samples/barcodes/patch-code-t-double.pdf diff --git a/src/documents/barcodes.py b/src/documents/barcodes.py index ccfae37cbe..d8a73e2777 100644 --- a/src/documents/barcodes.py +++ b/src/documents/barcodes.py @@ -8,6 +8,7 @@ from typing import List # for type hinting. Can be removed, if only Python >3.8 import magic from django.conf import settings from pdf2image import convert_from_path +from pikepdf import Page from pikepdf import Pdf from PIL import Image from PIL import ImageSequence @@ -122,47 +123,56 @@ def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]: Returns a list of (temporary) filepaths to consume. These will need to be deleted later. """ + + document_paths = [] + + if not pages_to_split_on: + logger.warning("No pages to split on!") + return document_paths + os.makedirs(settings.SCRATCH_DIR, exist_ok=True) tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) fname = os.path.splitext(os.path.basename(filepath))[0] pdf = Pdf.open(filepath) - document_paths = [] - logger.debug(f"Temp dir is {str(tempdir)}") - if not pages_to_split_on: - logger.warning("No pages to split on!") - else: - # go from the first page to the first separator page + + # A list of documents, ie a list of lists of pages + documents: List[List[Page]] = [] + # A single document, ie a list of pages + document: List[Page] = [] + + for idx, page in enumerate(pdf.pages): + # Keep building the new PDF as long as it is not a + # separator index + if idx not in pages_to_split_on: + document.append(page) + # Make sure to append the very last document to the documents + if idx == (len(pdf.pages) - 1): + documents.append(document) + document = [] + else: + # This is a split index, save the current PDF pages, and restart + # a new destination page listing + logger.debug(f"Starting new document at idx {idx}") + documents.append(document) + document = [] + + documents = [x for x in documents if len(x)] + + logger.debug(f"Split into {len(documents)} new documents") + + # Write the new documents out + for doc_idx, document in enumerate(documents): dst = Pdf.new() - for n, page in enumerate(pdf.pages): - if n < pages_to_split_on[0]: - dst.pages.append(page) - output_filename = f"{fname}_document_0.pdf" + dst.pages.extend(document) + + output_filename = f"{fname}_document_{doc_idx}.pdf" + + logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages") savepath = os.path.join(tempdir, output_filename) with open(savepath, "wb") as out: dst.save(out) - document_paths = [savepath] - - # iterate through the rest of the document - for count, page_number in enumerate(pages_to_split_on): - logger.debug(f"Count: {str(count)} page_number: {str(page_number)}") - dst = Pdf.new() - try: - next_page = pages_to_split_on[count + 1] - except IndexError: - next_page = len(pdf.pages) - # skip the first page_number. This contains the barcode page - for page in range(page_number + 1, next_page): - logger.debug( - f"page_number: {str(page_number)} next_page: {str(next_page)}", - ) - dst.pages.append(pdf.pages[page]) - output_filename = f"{fname}_document_{str(count + 1)}.pdf" - logger.debug(f"pdf no:{str(count)} has {str(len(dst.pages))} pages") - savepath = os.path.join(tempdir, output_filename) - with open(savepath, "wb") as out: - dst.save(out) - document_paths.append(savepath) - logger.debug(f"Temp files are {str(document_paths)}") + document_paths.append(savepath) + return document_paths diff --git a/src/documents/tests/samples/barcodes/patch-code-t-double.pdf b/src/documents/tests/samples/barcodes/patch-code-t-double.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b68d3cc7f70f5a6fee319c419589eee00a9ed294 GIT binary patch literal 36146 zc-ri|2S8KV+BQ1F1P!1gL1`8eiJ;VkCPkz}sEUA61Y&^Dn-CBY$PiKK0R$;F1gszd z#D+=)5s;!3k*15U_=G-~|{l4%1_ujLWz4z*Cz2$k|wbss(urx4| zQ;=8Um0*6&WAiFt@R$G>Z(dDJ49=3|K@P?!f*30d&gO6+8H3X&QAoZ49vCexUb4R% zoKbOE0sVjwe+ot!gERAT3&!lnC~=A^bM7jf`*zOV>3elcE&NfL(hrsSQ^@|5V2tWg z2M9p{A%Pf9iZupjKo0eCC0h~nFgOzoB`5?`24Zc3NdCcrq#&}t>tRqW24_Kb^CIa5 zgn`cB!N2VaiWpUOMYsc&a4yRC(OHp$144paL1)yKBCJ83u9T%r!60f`hnMwN`JeSy zeW?chpn$+XT2@h4kylp6s3|MTpk5RyL$^%ls)UXPmkSJt$G(Qec9n) zD#P`|WBk_PZ=+RPK3c#ZjKUJ2{DXklki#f|$bZmD{cqAqO>r5SjN{tY7|TlUDJgCJ z8ZtxKgSRZX5gc@IXi$1U@uqTzl7a#rk5~Mecf&TjERheQBKsX2)^9jE_vEhQ{JFa; zc;)9DqaE)%7FX&x{q*ySpR%<0`TlW89D)7bGd(X#uq8Q2Kfo_Az#od%^u#BD(+b$Wsm`7GHa}%za7d)Yxq-RTJ8#xQzVVNY!{f>!F}GS|3pmTH3aDwL#5%Dfm7?05_4mRBi>=lhLb^)(O1{Ri<sctycDM&k%Tpf#07+zFa(Wam@|8<4#c^{mc^$NzU4TZ%{7$ks{-Go9}e?h zS@FnH-W%L-(BHUb|F5%T9H8((|E<@5WhegMGN6BPEoP#jMUlV5zEgXEghHpRe?H~L zyYe^{V()*x-otC+60cjiaxCVUIZ(rTKi@BiH6ZWGx4(eOu*BJ(Lk9ujL}wusG0T_T_y;kTP&7^ zAULqF+XQ}R)8ICzrcT3ke!zn$RvxO)f(k)jS<SmYx?D*HSb z5OjSS&O(K6uQOrshqIcfUIcfJmV8j{{3Dj}?UkKo0T-)=W2e#rU^bY+5rY_y%OAmT zAIrh@U51Bz3fx$5^)V;mHihB5CaT~*uz?4TPVq6bSd0Z(7voCNoZfz?_y;%_dnfZ- z@H@`2wBb7_%G&uoMhoU-jLaACXbbqBzbzo(+uPLX-tRdj7vRUOLq6c%{t52j3l2{{ z%(QdJa+&kq0@ohk2aCnRYAW2P8JIMzawhyfisuAM;Je9#(#=ds-<~OdHi* zPe;8Q4?T(#PPj2j7p)T?4-KQ=7(C-1uuUoT(3~d0qy8nq-FJY%gjn<#5&i04)lQ&= z$4P76wIh|sjv`4bg@^m!tv|R5(+@yZ%)@*xPsG}Y*YUvJz%Iu-!Gys_Mz@t($0xNs zn?(cL{Nx??j5j7eqrrL8EDGi=I#ivs+Y6xG@fNX8l5LdJm!V5Fa@$Xa0R8%~1j}P# zv~j?ei?g^o5C|~iM9!=+1m;bdX$XtMN%Gnc799CuuO7zboQGngXl!pP#40}Jzf?uU zE4Dv01wqX~3tij%Bs=}&+aSo#FXF+iQ}?{KU%xmR!SeF)+78$|l8QMvJ`O!%Z`XFo zckNHsW^Y#mLz^Xqkyh9(;_sf|V?l0pWv;B;YVy5DNX5G2!UcW!w~DWt)O3LeO!${> z8gR2mCW`|(mb3zmzXM%-%YeDB{lCXsMG?gUy11UQL2UMoRgbiiP^;W1jtQwn_17f2 zk00DX_S>{)U>7}aINqbR<{+wd^;*PXw9%@nwV=&yqWY8vT=>*|mV@^C8DxqsZSb&8 zL)o!yh1-IPuH+Tvm6fND++2qWDInMl_&Ma2T$bS#u?_@1o_Zj;uNlB#!&>^VxCS7& z4TYn5IYtAGd0IqO5d*6608;jGmVE&UH8B@MXs$(}@rk%aetQa`2?r@`NCFYKfrytR zRA>L(ZV>~Dv#O*ELg=7~5kJOcAS(6)f?(JhTaC>dzw`1Klxf(CMhLmcCt{KJGf-_v zVYGq>h=O7FB`PKwVO@w6|GedAb^9Yx@&a|;>$?f$BMe^G!q{eVY^a7ts1V>T%}Z;)%LM*k|HM5R=0rs%TU#cc zE(7y}y$VKL=uN{Q+bLWcuI+gRG|i$xo!yqqh3T0zXaQo$^27gd5Q5JPs^S{cxjClu z;1Y9&iQti>H2~gTw%%GDs?Zm2^ zdAn}q^pLoEce9{{{T3>H>`zCCpL{VyfX%1oFQ8C020=|N8#tp{lz^b|e-GrcSBX0b zV6W|H-E@>-{xQCMHwrnSlJB4yhiqM&s-c;Xx0ye-^^8%|J$zFWEmb}a)xqDYWdM{m ze)^rNl!A3 zut+*vgE&{OAlxPuSenSl)#v~x}cDllFh}nYWVU&I38Arv#O;$}P9P$z4zE9{dDT0>B6^DkqL!mJ$4gAf`*vzsA^J_`X9PHtq3*$wUn zy2L(~`f5(RdHH$yKs_4X-J7@v$uQqEe1RIi zG<*Sp#r$uo1!spE>Y=&8;VV$X=cTeh8K-PG4$~~gx#1X;MaZOaM^!T4cg|w$FwS={ zM3C!dCJ~y&5Z^>o!GCB-sEP`P|0dc8`k=@d5+axgn?xanm>>s(#yxt4ICl)5kASO7 z=T>KVdwIK!0Euf9v){+3$Gc1Cy2^s>!v3$H{5uzRvljOATn1^y_tE9m+z%Ktw{pBL zW$7?K9YG59%;bbJdw_nv24tGJYqdip&Rcfmr)~&(I_qq#a26IzK+%Ruo=axEKyCMnuxg+Bh?`x>+2WHrfZd)9W_q zFrkYabG`>K2w5>@6G%P;-1xzb3$QV!Vh?fWqhOX5EfR~ye@hb~HO3hq&7DODwiBO-6*!Dm6|F_ZxTtQ;G!o_*Kj4dU14VYq z)#YZP1QtRPf z8sI!^Rr2zy(_f&J$JU}n9xXEH=|lG7Ai24 z8ZmjYmk#^`lZyMXdD~)R4vQJxifGIuZqBcN5f@`UV82$h=l*~h1Ga|j__8|%(e4Q? z4##Sz>wKVb=zqVhX^x8o+Ev<`OVhd%=v2hxLlw z2EaS#6JYN*j|Li}E8~X5d!nfI1s4b94@Hd$AY-klCOOypVM2QSQ8a zn6v;R0T1|bcsiiyV)bMKG?N20;aJovFrBI50-Crm-HB&vGKtV6A3Tp_zi4-E|FWW8 zCt78J2>Iy(j=y5`umHT3cj`)lVG9;|6_U7SIu~qLRC5DhV%y8;VPj(l)6IbkF_@w< zpaLAvTTEy+_TO?NnohyKbLrKXX>P%;uq}u2`#_V^>ZguCw(8)&)xFNs-Ih=XSqdG658dfxSqPB66u#y zr$DCj^>fFaK`R58_PZcob|XN8hXG=iJ(ONMRYjBSc*6+ z6;#8+h08UZve7QnO6!s&w2043uSf7KZb4Rdq%JiQ=71m?>bF{s(hF{Z^hMcCT(~aD zHfgc686G%`TO=0(eo#k2y;I1__!DlC65epr@FYEx-QyIQ_=0; zQJO~(*xJDLp>CuCTUN~nr;Ly8P!zFh6L1QktJFp2n`Bl5 zlTCCfgB=NI1h#4;Q05{EOi>5N2vjz65HwM`Uy$85x_3)$ui(_l4Q`R~Gux%pT(TGi z%wCt%*r|o6zEYc0kFCu2JytNMeoBRAq>|!&>7&+BkA3&OeylJW)s3E!dDF%4qx;gK zQs>9Mwos|LHTo8z7hbi<_>4krOmM-TO+B0N4tqA|vpt{g4G40k5C`SFsL)tt=O;iQ zxjlH_Z!6Gl!YQc&^H3QYPo)3|~7A9rt82Z7gwCgsK(pA<(QUe57Kofu7ShcyEh;kB@| zMsr1`PGwv%MnzRAYQZvB87uE0dAp-fKS!2fkha>{`%vd;$0e=5>qA%;`=XL>7blq;5&Y`_cX4A(ztyS#ZmSH%hZ&-(gQ3&;hg^ zF!2ayXaH3Gq;eC2Y77Y9$<32#bisH|runcIQT*=8%!a2XiW}6k+)Pm2GmZ%`I=-l) zRjq>{X)}a1P*9*{n1o?|$zU)YE1ajtzN_eGRG}6r!24W8p_$mB?$KJOtabD(cQK?8 z5tsT-3x=xWM0}GQ6_*K!PsP?m)Dq=kKDalLCs+0E#tvyA>2ncu)TF2wc7*MUOI-lU z!ukOL+MEG{qJ@E$>zsohoYVu^=qV02VAW>=(BU_yvr`}tU1CljwK55X2g!=+A+p7V z`>wI?p-1M~g$rWLnQeOgp9+{AAsMUk%H$I zfX+^ST@4iUJbME>_&lf+aB$uPcH$visXdj$v_`-sVCtZG+Kz<@Hb6fJxaL-%n!zb% zBkZ6W~7#B3h@^2V^tFcbKBBVi^YsC2Gmt-0@qoaaPZ1?ak~=`blm|o#5)Z$ zjrz?YG)$~e;UzRgB-Y>BompS3J2-J84b19ubE;TCZy??SGwqBf8fnA<~& znuHf?K*QQpz$hkzg=6qnvTUcjAY$hnClt?b7~1jl@BZ`Iu*0L*o{B?Iog+^~9GlIiNY1nOU; zgSiJztePybt zk`d3`@1HbMS~V`{^PPuzx}M^8aTIUkEr-qu*}T|WR0tr7uftM&?lM5YLyNZ4d&iwZ zI@`q3Dd&OHI}p)};KOBlWx6W@b@NK?4s``u_BVj=O#L`oA+xa@_zma0hD2|XE{}P@ zYzN^i9|U2smJmCaNJjyEe2atm*XG0p%&yH2!1PEi+)WMr|KYDg>~U;~E(1`+!^=on1sxVTlQaYeEp~Ok_ZI zAT*#+CgbbqK|W_~XfJyKnvQjX7B&KK?V65thV_`A35@sn{5WGYGQ7BuqsJU$TnCKv zI0WQl79w$An8eX;60{!JZ)Kz)P^$3~_R=c9I(tJKbOu;i=2PlC@hSB<0O7H|4^*J# zhZ*k&j{{9P%rt@h`?*viFtULJDo|=5A&v6@nVHj#91#K#{S{i0B946pXa++$x-3KpfGN{OS^l#}R%14ZDmt`=C< zQVU?37X$&mPjFVA`;837Q8fg)#yvJfh-Dtl(2xhyf%g5oiO}LrgQ^?dO8Vg;89X%jX4Djr%u(g0o3)B zaZFL(h4SBp*2GdAW;3&NzD9mCb<_u1SPwBqpo4s_%&z`vHjC;Sd#sq*+Xkd=oX@$3 z*uT(ukwvvTHqJo9yLQlqU$vL~#u;=Sx&&tS6p+*L5-^jR?FvT_p#E81c!~v^r8IfJIB}7ctPM~QIDPO&`e6 z1Pc~!r0_^)BdDl%Mk?Ors^re3TQV*W;$2Pyq&&UkJC6EV!%_}h$t)o7QNVw+MlT4q zL!;J51V<^Eqt>tk108iaJtLjw>K6~{@uimnYbi4nntAF?37Dn%NVczd20^WYAa+p( z+)zti)gQaBxAz+%k3Qq|qe7F?kFkSVhLe{lO1|Bvu}zPV>TlC!MKDJp=uH(h|B+6@ znBm+o?BPA)>wE<7XU6NDJQBthK=1)*yib1%Z=QU&ildJZeE4UBeGuE%<@@xe29V9| zyP$C-20$HfJfJ7xby`|)O?!eNVlHLMp9v{_)s_Hm9OOxj> zLW}Q@!<$ArFgOe@z*E~GzzJV|$+os;P5KMqi^Hxw?!eo#kulcAjRgWT@G9&_*7p@$V}6W2@G38au#QakclJX7quDm_7DFp=a@;*R zi!m_W!Q}UnCx>Svb0I)%sPNhg#D++y&QH#toIA@&fOA{Q9rPZ8aVgpZ7W*t)F++qR z;$seruFrFei?P98(~NVFn5^%`HxB^|s~qYbpCgIlCPM6e4;w6Pc2x-(h;J{GFFPB~ zvj^Ud+J&NIrr%6y-B)(O&1=bZnnG>*=0i30Ba@TGwMeMeuxieV4c!QZR)x2a%qtW` z3?en@Sw>j3mw&9Fi@}>sOBrfFsS!{p$RqXlrAvAqT-PRHgGeQQ%#GM;d^8tuY!9CC zS9Qm#D?lvq*lKKkY_&Wf>P!D79ojQcn-)xAPU9!*=k6-i!z=Px@D{+<)X9TW1Q3Yk z22iaA=-q|cx3os{eJP%c+mqi=qXnZ=RN-C41uw0CTeYv^tP1hA{HrDW5*mW91rr|v z>Lb}9NPanZ5ee=|N2c+&y|4ZI zIFeC~toXiuZw!ixaMu4hNt_)qAZ*lYVBS5?J^?WpbsGuA%%X3r`16eMk@*n@3tm4o z8$oXbUi+dIjujd9PJ4kBLML|GvG!>vQNauJWl5nRI-tvpR}O|bKH8$qT}AEA2b1U< zgnodK@P=3{4!DLic_zepo5j_!`Ph8W2358;dufBT;mcxiUOv33r6wnco^x7Zo$*R= zlOTHRK&7oSoP+s0Dp?LZ>&OE$|*1Bi#5Hw{IWlJ?Aus|__409Va=LBmi z)JNlH&0>JF$9D3_q;WIYNTJ9ujP1w>JCaJ{rg1}qooo^j_Af1{9}kCN95~Yf5DW5x z)UP7|Pua|QfUsGHASV@`utPHyJbsoY*RS!Yltc&ke3{Adis56wSiZ6kg%+>A#zXDh z&00Vv!^!2H=Y8Y%F zuI?ne;g4z>seSXP3CAT3q5Z0r{99h0pRH2h756dYK_FN;OC57VbET}Bg z1bA_kU*+wsZDF2rHzM6-k}$pVAr?(C?&waSDO9o~*AUd+$vW7{r)U&dQ%77@xK&W~g{BZc2Y9{{L)40dZ-KzgYopM2n5 zC?Xuu2V|N!gbFF|gjbv3`4j8VsHk~; ztCmULg>_g1CouzMr}a2^)zz>C>SymNTd&2OLRP5A7C1;ZY%G+jmQT|av&+3t7=MQQ z^EY7ib9d&kL#X9uF@NTdm3+>Uy_UaLLY{W&VvT$gQ%N)pdim@(S+F*b`fdCpH!Ggb5&;b!s0zg0D>9-TG`u#-c z0hQUw3hD2^Nbe`wff+kJIm7jZ3cUDFx0}BBE|tn=jtqyh>KLIsI58#^4lV5f!Y;xr ztQFk-?qHh49B1>@y}75_KMN)0G(+4|OkH6jgoY!T0MnN^(*O{P^MO-371{=|1jSI4 zRl@Mb(FA?UCd{xiFaM}a*}aA@4eE}zk%DM=@id(}FRfV&RDivTv*QY`EL#Zrco=kH zNErS#Czg~UWr-!|U#C5am!44uo*?Tj&Kr!~qM3+Q|-gLEdA195qh2mZfjN&?EahsCBC~!KUj>Vjj+UT(98LH zAaTp@GRu!4=AMF`7Iqa^565wl;Rx^2MkZ&!YiaxQ8@Ssb`EeY4ht&ZjBdo74`BTWt z_F^Y*=6+Ks_@f=qrR`D)PId$nC}JqAg3h%4u*o1fAW*x?IeTodY03QdWyF#)R#tQT zBY2)z6wYR%`_@COkr^J0P8tIm`N}AYppJuQ-8koMd{q2|bi)7QgHQ>YMV9C~v< ziVs>)1}21cdVGd&N<+b1Ahnhd>9M%Awc0q*iKPPW+@DIf`$aaBo+Sf!kcD}ao|z8 zXR!m`^6P~BR2GoJpF!ll>(?*cx^?!l&llk3RC7PL#4oe2-9))Fs*_#K-4n`>W8SR# zbOb5<1@@SFs4RvzjX2W@FQ^_wH6>Wa4s?@5HFhY%GtKOE49SKB(AdqJfEFEV;kaej z!~-xB5KgN7!tpC$7g@v@1su6L1{!#re6_qyZB!VYYz6tr$|oRbdQ9gga6{p4khtKf z&OcW9pX>jG3)2Wh{?4UhD)d1y=Qad^nfpX46;=`WyI?T4jrY#~mUSTK0cVCPj@r1i zv?~sT6Y%;ic(6s`#D0!0ri}x^024Hz871Hk{3m<-sp8D~uMnh*F%tgXjUL3eDp5HA z--?M4ivh8Jch1>*Kwk}2_Hf=!3j3hNOV_la8EY`_p5M~(1vaj8NFaafIyk#KbCL`o za5+~XsN2FCf`(7pGU0Xy?oy#C+jSjZzSTOz{KNZ+ra}u+*9Z{10Uj<8X|r0;Hvo0; zHyrS^9b#pihuByMYK%}ks7oxWuNps@`tU^J+jGc3DfNxWu+7z5 zm1+Z}9;q3;-6dMB=D~G_cb7-vnPh|}|6A%`v!dQSnYk=DkCc%LV*EWWl9mU?mj?#c zFAt4gtXlq|?3Bq54Jwtv-?iB*636N3qq@DH_Yi5*#<{NXQY z!*2VbFNEc3DJPbPTJ>pI#*ZUzVE$FL3%bicUtwea`_=w*^)>Uu8u0P}2O1CNO0W2*QT0{u~Lk=WoVl#(N_18g*gDUzlx zKi7Y*|6KpM{vW>BNc10G-s0(W(M=l(1_VQK6>$ymE7$T524h-7gXHN9XqMy}+kXv# z+LuPtp&{*!fOpR(H0F(A$~3TfI~0W7#fR@Z4NwT~*jV^S`1iBR!7D1Z0uTSjpb7w33qxxJU0iGk+T*F_H;wKQu3snfAXoTgPL>vvi%V2Y9Q#@SQ zLQV#hJ{(Eo`^1Gm(N?B`l)>hW9AzWHucB_7`sSj{V$2H}Y<_3tTHsDL;ieHh$DkOY+-UsGNj@`Z zk@Fj7b~_Tc;QXQ!y19Uqv0X67T*uQsd7{DO3egGZRW2oNL4~?ah5zI}{S~o|QoZ>Y zzdN{WYs>FmRSorjh~I;Do4Eda+xi>s&EM|Ce`Q_&Ck%)!F0MWP`?oF_<5skRxD>72 zlb(mNLXjnTEfb{l{^6JkynLeg4vzNZ?EtSpe=K*^4`c3NXrswAzR~0a22cojB<3E_ zm<6C0)6g`0uEjL8+-2u{7YEPGSTV5MJ;M*5X0aAwCYWxMZD=ZvZScHqyzcO6L#6Q{ zKgY8eqjmhb;I9?UfKLZ$Q!+So+%^WaDFTfk!GG(I-gw71sRCs)2DeIAo1b_F++JRq zROe|T+mHITdOy&PPl&a|X7Q?I*Axh|KHCB(M7e))ey+7n`$CqASlz2|6T1bc6I{&W^G|YCOCLi zz`rAbZ)|IW3rq>FLCPhQ1kMir_X%A-`I7DeQrQ~2LDnRlQoTAdJ%nr#neik;WyV%U zy0d#%_P)x75TDP_f6_txlOtyo{(p67kb>eroEoI^H%|>xSbqGNkpdvdx6`G5&#?Tw zuip>Iv9mD$emu_a$Lah|QziWJBZf4!!vaaJKA;mWWDhU@o#J2aF~u=nZac+oRV?rp zf%;@mFXQkavUT__8`p3jS9LdWZ7p8SFpV(3KtE77Cd|**KUgDdr#K08h^ztL;b@#V zhLeTjvr}9bPQch%SYq@8g2)(UdAyvf5*UfHvb>^-lDeXTEXF#-AG6z)g25|b6!G}& z7zI@g1!WB-b#X0T3~P6ei~%j9seadp!o^VAmk8K#Es@KL!qyTmnKUJH^GnAl9IL@OoQQK4krsRwktXiXvH!Hd5_7cRBaoJt z_!j-=BJlZ4LFC|IFa|A^Z~2$U{W-s-X8%&*7ywXYKNo;kig<ca3b0yzS(psX5)(^tP5kqV8!5?G4C-{27B>ndGV>#SE7W~$Qf7;qo{$=w1zpDps zuD@$RU`UWJN88+7ab#bzAAB|(Py~e^6~WC_!xf+lB_QbE9JxV2fUg&Y;!FN_DDaQ1 z{qvD8#R41w_=Wo&z_gY@AOO{!;z2OL{UeC0{0WIw|BS@T&+gMB1(P{QkJHnIFBx+$ zAT%6>vL*+4xx;2JD43$}NeY5b)`PQt?`=R3M&a9ukLILrSwQ1|fV)jl2zd$HZ32M& zatx+|`tl<&ms;ka4SeRMs;Vkxy8<4gg2!VN!Ml>0Iz~|i5Aaus6NaB~8vOVD9WKMk z5AywBkW~N6MyZJZcSotH@IN)m<>&nA2D`#1;i;*^XW@|oza3VmuBHGVfwz<}OBsOQLsrBnemiUtj#)bB?ypE`LLvEj zx$63R_e(+sx9iL|Gz+<1WTIa!c{av+p z9;koi{-q0Jy=SkLK6*hj^1tnekZv!+)tNcYU;`VBAK>i55} zJiqBZ-@s0#!8J`P6Ii8b&9cm?txwOVJZth8$a%K2J^%CVy7zAvskM?F%?oOZ^ILKR zcRjJ2Wf4?0D6R7=J`*th$en!Z#r1GIKrt+9#qe4_j|PFX`Vc1PhgZoPU2Pjp-B11VDQ?#D(k zyHVAw_jVBx6X(u5PHf(MjevYYb&GI;x^7dSD;dOYH#Z)ad25N8?LEGKQ;ckCOKk;S zAXIMj8DWvP28Y~zsW@ikv68)ua(#F5a3Y5M3wSm4>Z~XHSqjS0&``@m?t{yw#?0}fu_HA2*Y?#VL=)D)Z4dp!0LnJymZ`CcWUA)?U*6K191=dJV< z{eitfSE~cpu8B*LDj0CyohF=Oid!Qpu8mP59+=S$+BoN>u6_5I`^4SM6E6*uQ>of@ z2`9|c3h}4VCv7i8FLDtijI@4QefN~+(S6!FJ;~3M9%sC9O^kL`f8P1z!Asq;vqv_h z-jg~J927pGtj=|>HHddoGrc*i-6Qd}L2<2zHqXfi$Qx!4=Gr@rtKOYgC@g#uIuTNq zVLsrg`Rmv5V_nRjgU~OgOpJovv*yk&l3y}|US{{A?q($RZ~Cd}?2eiM7LAw1z(u93 zd*of2vo1m`(?`7G82xVT<27@9#+yYQ&pc}px+erRtrW_;M(VJdcJE+RHW6N5zh?4W zGJ$G$$9waY*HIyUq|oLqjIufpQisripKdqYB%Q+MxO@~+3gM=eUG5z5R_*oG zB7E9@x90UzyMmLqq-HitiY$D?U{{lyJ^ollytqmz?ao zCflFa7k>IQ@lmF4J85Y1-50Mvr}+{|8-}Igo zzJQKdS`PKN4NvwAlS5svd$_+0t=idto@XQRj94jO_(D_Bz8z~1k?zLTB}gXYv_e~C z?*+^}38#u|pG|-MD$*&){>x+M?7~pKw?A2p5ffhBdhM&^M%E{<^QrJ)f)= z<|vMht#5zuT3J2?HTlRO=*6tt)7|R9qPrer2F*Rx>_eEFj`VhXIh(q-qn8#cyl?S< z&N;EF4;6{qYl}0@v=nO9mt^rB!T>7k6-C#3Pu7hQ|H!?$`P7Q0G}clcZy zEw394I^uKdp0ccML0_ZII+eTkEhO1{49!9-x+(-5mDn>BHM^5!P1hBs7m_nLC9LI!A0-Yuft{4 z^4qHXUTq1hGFNO9Ieyp@l13|B?& zr#oMH!yVgvb)Pkv3OF^rUHv>Za(`g$>o2cr3Jz-D?TL_>-fLQs>yz8DCt?5ok)ln% zPCYdZ%%#>ZToHSAY~_mkYl0u%oZBCHLm2aH;mU95)kcHcG6bZ=En^%$>Ki;7Xr`OS zm@5^>qeQgAHzM=B6qPagb$695&WD!M)YFhBHtx;Ak_+}4w-zPRkUXw_& z!Fb(ud8)~mK83dudlc;ZbBe3OwiT1I-qQ*b2eZz}r8IfPd8Mbcp~OTqN5TW^kC-PA zJ(P~*9%XhKpP7G#KI%BR{DGKf6TN8V|ii$ZJ$#?5XCHF5++8d6|SQa|E z1zemGG7T_kpWL<+O<0%s+sF29<65Jz}hl)JQS=69bb_ z$CG|Dh;IGbwBfBfb|d-z9aMPY0dk_U)As7fL_t^YG}OoJpA@Q3C+=t-o=z+~m^x=t z_^MiDXQJw};h-;=9eEzManAcSW{??jv zD_RFOzK*==x$Rhk`_V4Tfc-}fM~x`<-nDq-;!_%9`Y);uO^qDmSf(} zdpmg3y~Ak`_ptVuhjTq{-i>}(Zl~qXQgtVL(PM{bT3$-(igbxxrrh}K zsnj3?=|W8{KeC}{-PMlcTIU_8&v`Ew;XSw?2*wKLpwxYj-4R^eH)f|>ruRS)A!8@H z!M;FsxU10t-isMw>j&`{N)Q`4U@Nj>+&pmSmULV zGuk*Q7&z##;m+g^YFp4?rvTrP;!5h*kWN{|*H^2O771@FI!;#}Y|q|NxG`DU|Lf7l zr|El)o3l6G3G^O|aJcqa(r~u7P21Gfps-m%#mq{PXeLu&0HSc+UVT;#ljiYs! z3%4;4dnJ^qc04C!q7!UsOaaXd%-(88iTaGd^b2|t!!pq})tdRap2Y)qmGsQ;P1jQV zwC!EmpcSW|@gCsM7?>Pwv@P;;IKlnqnPgxjrQsH(NsOZMu!tekoy39h!O7YWD*VJ>*eY-A zf2m#{=DB(mBl?_5N?hiFBgm0GQdO@DHpKkwj&;1duShvQ$>;PL%evA)8JSTT+pf&_ zd($__$Z39kH%fom$2g=+aT>oqjUKtGvx#dh)iwNfl+kXd%1aa@zIAn=GL#Rdfj zp^WfDHn(!Ravq;kx9JqXen{CTbSixyb3Edr$wj%g@CfcXLk-9F*B)Hm2sz2X z9M97N*$M)N67xfCL$yX4`raD>Hu&+y3c4J7WtkjVQIdx^aG5%Ltx#W%x$vrXel=@m zq;YNk%TLJ}IHqRcP|o%1^VcP13h%D;`CKb{4P*9Xv)2_~o!bW17Ygm%uZr6ZOZx~4 zRa`UO@8CT(YdY-^!r1L|=EiTn(fLAZzR_NMHF9rm|5nFU!?k}*!anyk7ma&LD9a;g zTYC4VvK{>wO@loN=1-fdjn|0n-yk?BluNF23q-6|3!z=$3LNmYkkPq0mopx6=y@kK zpmT&d-^>#$`%*Bu$}jU_;YFoGM@$o5>>qi}JxxH%CQetSuB<#V>Yps&&da;IpdwFL#b$BYy0>wFEk(V-h22UY|e?V^Zrbmf$W#w zmU9lXy|j?|`Nk6wamRMQ(jQx3a@q9w&$bNA&bCUg)Lf`xULSa^P!jdIW+S>bO6Xn8 zPDv*pOJS7CRb?+zO|7JJGG7nq(RgBcc4a?wrG>8@w!lAYd2fNQyY%@A&#$Ks^dJtd zowdMZW3HEtYZ*=8t#;;r6vf&?+uF?w-%hsS78TuZH;yVc3n5Q4Zj)yoFG`ZnSJ}To zI}b;E$gq7q`^ksC_$aB<;?upEd9Sd@s**AGc`eq+dE()ZrPcwgsSi`PQ(us_zNj)@ zY?Caa?eVLT9v<_#(z>B>Zo(NIP*Z<su0 zUt-a!qiVSM!HT)FtJnN0=Q!KPcukqA&S5{VJ^?L0>XppF*{%KpjVFe@?O!;{_ck-i zH@`GT;l;_tqaq`!7fDtQMOK38LN+dWxesWiicoydN=-u;Pc@cRTwoeH;QNIk^NGnyO71v5@l(s}n`K;~x>0n>= zdF4&?n@z-@g<7;c2VY+eiU{AGk$;4&lpgFldofS_ovJ#6ccJyCHD(Db{9dm!(+}JH7E^aIsRVh<_Ry;BGCoxoy;T4adLGddF$Y&l1G-|U1ZHwMoSL_#k zl(R5t7b3g+j&n@zl`P}NVI`Mm31j6*?R9VQYkRTPMyQxX`n1+okO<@cD-eLD%;Jc1iGK1-jp zABx$Rz4Jop&j+@ao6Z-}Mis8dG-@C5-hc3Q>GhGHpQzmQtqcA%xgfkEK&zr=(EBQW zql4#8HMQXfpIfSnBNuWLzjpe1dKY^+&E^NCOln<`lHGT`H%E6Nvhk@6d5xxT@)@;N z%sO=k)1iVw6Z`C|#)ZwQgSZ>Ak57KKauO9cI$3AyS+wnFfCU&}ydpv;CNbCyi~vG8&7A&PF`j`SqM_ z)yyWSY;)x2Pn$MYAN@kkB>w8~n%H}0=S=v*iHu-Ikl1~%ak;MD&b7e|6KV0ohkINq z-@HzkQ*6sSqkfRlpdMHqWov!1Q7-NWULk=Cg>hEoL`va~fhrJ)7 zmTN<&Z&05&MaVErEg-JqsxKDvdJiIcX$g+$g0xBYV&OBN+CGs!@eXOdhP&=ok8s#u zY|d&MXrOR)w^ttS9e*7+wkzWFp35uxb|r74noxN|tF2$IUsSJt(7a}xx$m&e*FuZK zrzfJazZSpSyL)}c9krOV0|JPsTIkGJqEEEv_FGQfYClUHlEM=vo@8-Nh6$auIUv}u z(ei+blw_aP8_hgypY@xFAc?~U(JHg|4xZ%_P7(0j`$jL8Mu<+RxB4763I%l>WJm=n z*f;vDyAvb(R<(V2?-XNl&4jQNo?rK|44zpiu>3Bj3B>}>o_Z}K1@xbXuXr1@(b1He49$UW(|xz6C~c18ejzD3`cl- zp9=e|n{P4j!0eUrO-OzbLNLW;Hn{;-=i4OqS6u29yj>@8YZ%v6OJxZ} zo-PSf*rl{xNOZNrJI`&Yyt#q0g0;rD)20S#RrTAZPZc#vrdTJjZ9D^vyHLg#^`6+3 ztE3nZMY?sitT%hXyHK6lT2g{bSx39z=B63nl*+2UtHBlYOO~1P`rZV|ugAR=K9?G7 z6HPX#JGRBbzE#NTtxmk;PEk_{@%-qxI96BL6E*Lo1N0d~D-uOh)RcFJ{z6P;mm6mO zXa!}P-D!oQtNhuCTV0OWCngVgim!{$yP<=}3uMO@v^c1xAQN34m-wc(ZYe|x>BX+h z`?b?XTxbocKsbbQUa;n;_73w}$(cM4=}oJT(Vk>5n_oIJhuGO_;GWtV{8a6?hOAUT9fq6ov{Ke^M*&t zUJ_PK`_oNUM0JgrL95=DShY8nyyg+oP5D3$GRu4aEcDH! zNB()k+nIAOtA^L@R&&N+15{FGc5cFIjlVUh_-$xl~p$ zgi`Xl#avI;ikG{q0j-+2&HF3zblscB zxmD1vH%+9RU1jqy?1CNV&RrMzxT`d(pwl`$$NKzL$KN_s|JRniIC^v4l%S-UDX*2! zs2TW~Jvex&^Ww$%R%Y!xSEq$U8kc&VIk!K$|CngH=$Tmg&K#Y|63N>){aSo8)O_mZ zI^(%7j@z!7{Z_m%weF7B)0WRV7eb5w&6{~o|NL1c+g`yV!8-F)R;S9IPL%m+`(okj zc?&P^H@(xKSbNSi`oT>rCPgNJw!$E{H4oIS6KC)o3fm;9QsaFh`)IlJqc5@Svrn3w z%60oCcEnta7UBDTtmO)E-BRZt3g7Tusq3-y{o9W6 zN4uRXcTaqGXuJ2p?TUT*kJ@W&TDEU^&Kcl&{J|B;ig!F5=?`Z4ANb7aQ(y3d`Q6{2 zSFg1{^FLE;`<{3I_Y2iEf2$W%^P4=`@^8!YE&ogYe7ShJvUOLvqxV0Kw(Wjvx9{8e z^T~ty+`8lmbB>(xyEgyJoH=%y^F!vd)P3N+mvQwxXYtqKk2_!PeE!_;?Ah~~%jR4B zVflXf_}s9&kFOoKe7$jA@^zWVfB75UF}`M=^E~eV-a8MK7g#6RKC(G#2XeQnlk}&5 zS+nifBGYRb$EC8TW=Ne8|9Q}+r1{>`T;;2)@BB+VY1QK-&VMz!-6|~l^pQ)BrfO@P z9A)#LM*onrI|gj8J@{-|E7!eM{C)7eN`Cc9t}S`7{acUUJS-n?quRPFCnLAI=J%Jf zy4lrRw{A|}8+twQZ9$pM^^a-Ct7R(>%NEH0E4F`P_wbqO_pIg7MbSC$md<_VxH!Tv zgW>LixGTIx4ZUI~7X)u#@wjAH;$2g#IQJ+H^IR5FnfAk*ng5!a#vcDH{(DzS@AD){ zz7tpfe$o8Oce0W_#mJ&$W4YJ^>3!0CvPx@D?|O2qZvC71XRjwUb9ji%aC6w7bIl{F z_GHScw%CB+<_{uvo@YaNh04`0hfI1pd6tSwVYio&>WYO-nKB?m#ve#ESJrxGXVDU>KRT;6=Jz<%*c3jo?~>LV|1Qlbe>~$o?~>LV|1Qlbe>~$ zo?~>LV|1Qlbe>~$o?~>LV|1Qlbe>~$o?~>LV|1Qlbe>~$o?~>LV|1Qlbe>~q&T~{0 zrKWKiC>WV=8Nh*pnW?F~{1OG| zCdJ^)s#IY7E9iT=C`8+sn42V~rY2gX85tR-Sehl9r6nbrrI=Y5CYu==rdTA|5mpfk Yng2>GDgh@hV^dRL>QYs8^>^a}0MV;|+W-In literal 0 Hc-jL100001 diff --git a/src/documents/tests/test_barcodes.py b/src/documents/tests/test_barcodes.py index e4e7566ad2..3ffd5d7538 100644 --- a/src/documents/tests/test_barcodes.py +++ b/src/documents/tests/test_barcodes.py @@ -287,6 +287,26 @@ class TestBarcode(DirectoriesMixin, TestCase): "patch-code-t-middle.pdf", ) pages = barcodes.separate_pages(test_file, [1]) + + self.assertEqual(len(pages), 2) + + def test_separate_pages_double_code(self): + """ + GIVEN: + - Input PDF with two patch code pages in a row + WHEN: + - The input file is split + THEN: + - Only two files are output + """ + test_file = os.path.join( + os.path.dirname(__file__), + "samples", + "barcodes", + "patch-code-t-double.pdf", + ) + pages = barcodes.separate_pages(test_file, [1, 2]) + self.assertEqual(len(pages), 2) def test_separate_pages_no_list(self): -- 2.47.2