3
齠h              *   @   s`  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZdd Zdddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-)Z	d.d/ Z
G d0d1 d1eZd2d3 ZG d4d5 d5ejd6d7d8gZG d9d: d:eZG d;d< d<ZG d=d> d>ZG d?d@ d@eZG dAdB dBejZG dCdD dDZG dEdF dFZdGdH ZG dIdJ dJZdS )K    Nc             C   s   t j| jd S )N	utf_16_be)codecsBOM_UTF16_BEencode)s r   //tmp/pip-build-7iwl8md4/pillow/PIL/PdfParser.pyencode_text   s    r	   u   ˘u   ˇu   ˆu   ˙u   ˝u   ˛u   ˚u   ˜u   •u   †u   ‡u   …u   —u   –u   ƒu   ⁄u   ‹u   ›u   −u   ‰u   „u   “u   ”u   ‘u   ’u   ‚u   ™u   ﬁu   ﬂu   Łu   Œu   Šu   Ÿu   Žu   ıu   łu   œu   šu   žu   €))                                                                                                                           c             C   sH   | d t tj tjkr0| t tjd  jdS djdd | D S d S )Nr    c             s   s   | ]}t j|t|V  qd S )N)PDFDocEncodinggetchr).0Zbyter   r   r   	<genexpr>B   s    zdecode_text.<locals>.<genexpr>)lenr   r   decodejoin)br   r   r   decode_text>   s    r>   c               @   s   e Zd ZdZdS )PdfFormatErrorz\An error that probably indicates a syntactic or semantic error in the
    PDF file structureN)__name__
__module____qualname____doc__r   r   r   r   r?   E   s   r?   c             C   s   | st |d S )N)r?   )	conditionerror_messager   r   r   check_format_conditionL   s    rF   c               @   s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )IndirectReferencec             C   s   d|  S )Nz%s %s Rr   )selfr   r   r   __str__T   s    zIndirectReference.__str__c             C   s   | j  jdS )Nzus-ascii)rI   r   )rH   r   r   r   	__bytes__W   s    zIndirectReference.__bytes__c             C   s$   |j | j ko"|j| jko"|j| jkS )N)	__class__	object_id
generation)rH   otherr   r   r   __eq__Z   s    zIndirectReference.__eq__c             C   s
   | |k S )Nr   )rH   rN   r   r   r   __ne__a   s    zIndirectReference.__ne__c             C   s   t | j| jfS )N)hashrL   rM   )rH   r   r   r   __hash__d   s    zIndirectReference.__hash__N)r@   rA   rB   rI   rJ   rO   rP   rR   r   r   r   r   rG   Q   s
   rG   ZIndirectReferenceTuplerL   rM   c               @   s   e Zd Zdd ZdS )IndirectObjectDefc             C   s   d|  S )Nz	%s %s objr   )rH   r   r   r   rI   i   s    zIndirectObjectDef.__str__N)r@   rA   rB   rI   r   r   r   r   rS   h   s   rS   c               @   sL   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )	XrefTablec             C   s    i | _ i | _ddi| _d| _d S )Nr   i   F)existing_entriesnew_entriesdeleted_entriesreading_finished)rH   r   r   r   __init__n   s    
zXrefTable.__init__c             C   s2   | j r|| j|< n
|| j|< || jkr.| j|= d S )N)rX   rV   rU   rW   )rH   keyvaluer   r   r   __setitem__t   s
    

zXrefTable.__setitem__c             C   s*   y
| j | S  tk
r$   | j| S X d S )N)rV   KeyErrorrU   )rH   rZ   r   r   r   __getitem__|   s    
zXrefTable.__getitem__c             C   s   || j kr0| j | d d }| j |= || j|< nR|| jkrX| j| d d }|| j|< n*|| jkrn| j| }ntdt| d d S )N   z
object ID z+ cannot be deleted because it doesn't exist)rV   rW   rU   
IndexErrorstr)rH   rZ   rM   r   r   r   __delitem__   s    


zXrefTable.__delitem__c             C   s   || j kp|| jkS )N)rU   rV   )rH   rZ   r   r   r   __contains__   s    zXrefTable.__contains__c             C   s.   t t| jj t| jj B t| jj B S )N)r:   setrU   keysrV   rW   )rH   r   r   r   __len__   s    zXrefTable.__len__c             C   s*   t | jj t | jj  t | jj B S )N)rd   rU   re   rW   rV   )rH   r   r   r   re      s    zXrefTable.keysc             C   sX  t t| jj t| jj B }t t| jj }|j }|jd x|rRd }xPt|D ]<\}}|d ksx|d |kr~|}q\|d | }||d  }P q\W |}d }|jd|d t|f  x|D ]}	|	| jkr|jd| j|	   q|j	d}
t
|	|
kd|	 d|
  y|d }W n tk
r2   d}Y nX |jd|| j|	 f  qW qHW |S )	Ns   xref
r_   s   %d %d
r   s   %010d %05d n 
z*expected the next deleted object ID to be z, instead found s   %010d %05d f 
)sortedrd   rV   re   rW   tellwrite	enumerater:   poprF   r`   )rH   fre   Zdeleted_keysZ	startxrefprevindexrZ   Zcontiguous_keysrL   Zthis_deleted_object_idZnext_in_linked_listr   r   r   ri      s<     





zXrefTable.writeN)r@   rA   rB   rY   r\   r^   rb   rc   rf   re   ri   r   r   r   r   rT   m   s   rT   c               @   sd   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zedd Z	e
edddd dD  Zdd ZdS )PdfNamec             C   s6   t |tr|j| _nt |tr&|| _n|jd| _d S )Nzus-ascii)
isinstancero   namebytesr   )rH   rq   r   r   r   rY      s
    


zPdfName.__init__c             C   s   | j jdS )Nzus-ascii)rq   r;   )rH   r   r   r   name_as_str   s    zPdfName.name_as_strc             C   s    t |tr|j| jkp|| jkS )N)rp   ro   rq   )rH   rN   r   r   r   rO      s    zPdfName.__eq__c             C   s
   t | jS )N)rQ   rq   )rH   r   r   r   rR      s    zPdfName.__hash__c             C   s   dt | j dS )NzPdfName())reprrq   )rH   r   r   r   __repr__   s    zPdfName.__repr__c             C   s   | t j|S )N)	PdfParserinterpret_name)clsdatar   r   r   from_pdf_stream   s    zPdfName.from_pdf_stream!      c             C   s   h | ]}t |qS r   )ord)r8   cr   r   r   	<setcomp>   s    zPdfName.<setcomp>z#%/()<>[]{}c             C   sD   t d}x2| jD ](}|| jkr*|j| q|jd|  qW t|S )N   /s   #%02X)	bytearrayrq   allowed_charsappendextendrr   )rH   resultr=   r   r   r   rJ      s    
zPdfName.__bytes__N)r@   rA   rB   rY   rs   rO   rR   rv   classmethodr{   rd   ranger   rJ   r   r   r   r   ro      s   ro   c               @   s   e Zd Zdd ZdS )PdfArrayc             C   s   ddj dd | D  d S )Ns   [     c             s   s   | ]}t |V  qd S )N)pdf_repr)r8   xr   r   r   r9      s    z%PdfArray.__bytes__.<locals>.<genexpr>s    ])r<   )rH   r   r   r   rJ      s    zPdfArray.__bytes__N)r@   rA   rB   rJ   r   r   r   r   r      s   r   c               @   s$   e Zd Zdd Zdd Zdd ZdS )PdfDictc             C   s,   |dkrt jj| || n|| |jd< d S )Nrz   zus-ascii)collectionsUserDict__setattr__r   )rH   rZ   r[   r   r   r   r      s    zPdfDict.__setattr__c             C   s,  y| |j d }W n, tk
r> } zt||W Y d d }~X nX t|trRt|}|jdr(|jdrt|dd  }d}t|dkr|d }t	|dd d	 }t|d
kr|t	|dd
 7 }dd t|d  }t
j|d t|d  |}|dkr(|d	9 }|dkr|d9 }t
jtj|| }|S )Nzus-asciiDatezD:   Z         <         z%Y%m%d%H%M%S+-r_   )r   r   )r   r]   AttributeErrorrp   rr   r>   endswith
startswithr:   inttimestrptimegmtimecalendartimegm)rH   rZ   r[   eZrelationshipoffsetformatr   r   r   __getattr__   s.    



zPdfDict.__getattr__c             C   sr   t d}xV| j D ]J\}}|d kr$qt|}|jd |jtt| |jd |j| qW |jd t|S )Ns   <<   
r   s   
>>)r   itemsr   r   rr   ro   )rH   outrZ   r[   r   r   r   rJ     s    


zPdfDict.__bytes__N)r@   rA   rB   r   r   rJ   r   r   r   r   r      s   r   c               @   s   e Zd Zdd Zdd ZdS )	PdfBinaryc             C   s
   || _ d S )N)rz   )rH   rz   r   r   r   rY   %  s    zPdfBinary.__init__c             C   s   ddj dd | jD  S )Ns   <%s>    c             s   s   | ]}d | V  qdS )s   %02XNr   )r8   r=   r   r   r   r9   )  s    z&PdfBinary.__bytes__.<locals>.<genexpr>)r<   rz   )rH   r   r   r   rJ   (  s    zPdfBinary.__bytes__N)r@   rA   rB   rY   rJ   r   r   r   r   r   $  s   r   c               @   s   e Zd Zdd Zdd ZdS )	PdfStreamc             C   s   || _ || _d S )N)
dictionarybuf)rH   r   r   r   r   r   rY   -  s    zPdfStream.__init__c             C   s   y| j j}W n tk
r"   | jS X |dkrjy| j j}W n tk
rT   | j j}Y nX tj| jt|dS t	dt
| j j dd S )Ns   FlateDecode)bufsizezstream filter z unknown/unsupported)r   Filterr   r   ZDLLengthzlib
decompressr   NotImplementedErrorru   )rH   filterZexpected_lengthr   r   r   r;   1  s    zPdfStream.decodeN)r@   rA   rB   rY   r;   r   r   r   r   r   ,  s   r   c             C   s  | dkrdS | dkrdS | d kr$dS t | ttttfr>t| S t | trVt| jdS t | t	rnt| jdS t | t
jrdt
jd| jd d	 S t | trtt| S t | trtt| S t | trtt| S t | tr| jd
d} | jdd} | jd	d} d|  d	 S t| S d S )NTs   trueFs   falses   nullzus-asciis   (D:z%Y%m%d%H%M%SZ   )   \s   \\   (s   \(s   \))rp   ro   r   r   r   rr   r   ra   r   floatr   struct_timestrftimedictlistr   r	   replace)r   r   r   r   r   B  s4    




r   c                @   s  e Zd ZdZdrddZdd Zd	d
 Zdd Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd ZdsddZdd Zdd  Zd!d" Zed#d$ Zd%d& Zdtd'd(Zd)Zd*Zd+Zd,Zed- Zed. Zd/Zd0Zee e Ze j!ed1 e d2 e d3 e d4 e d5 e d6 e j"Z#e j!ed1 e d7 e d3 e d4 e d5 e e j"Z$d8d9 Z%d:d; Z&e j!eZ'e j!ed< e d= Z(e j!ed> Z)e j!ed? e Z*e+d@dA Z,e j!dBZ-e+dudDdEZ.e j!edF e d= Z/e j!edG e d= Z0e j!edH e d= Z1e j!edI e d= Z2e j!edJ e d= Z3e j!edK Z4e j!edL Z5e j!edM e dN Z6e j!edO Z7e j!edP e dP e dQ e d= Z8e j!edP e dP e dR e d= Z9e j!edS e d= Z:e j!dTe dU e dV Z;e j!edW Z<e j!edX e d= Z=e+dwdZd[Z>e j!d\Z?d]d^d_d`dadbdcdddedfdTdTd=d=dgdge@d]d^e@d_d`e@dadbe@dcdde@dedfe@dTdTe@d=d=e@dgdgiZAe+dhdi ZBe j!edj e ZCe j!ed4 e d4 e e ZDe j!dkZEdldm ZFdydndoZGdzdpdqZHdS ){rw   z|Based on
    https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
    Supports PDF up to 1.4
    Nr   rbc             C   s  |r|rt d|| _|| _|| _|| _d| _d| _|d k	rZ|d krZt|| | _}d| _|d k	r| j| | _}d| _| rt	|dr|j
| _i | _|r| j  nTd | _| _t | _d | _t | _d | _i | _g | _g | _d | _d | _i | _t | _d| j_|r| j  d S )Nz4specify buf or f or filename, but not both buf and fFTrq   r   )RuntimeErrorfilenamer   rl   start_offsetshould_close_bufshould_close_fileopenget_buf_from_filehasattrrq   cached_objectsread_pdf_infofile_size_totalfile_size_thisr   rootroot_refinfoinfo_refpage_tree_rootpages
orig_pages	pages_reflast_xref_section_offsettrailer_dictrT   
xref_tablerX   seek_end)rH   r   rl   r   r   moder   r   r   rY   g  sD    
zPdfParser.__init__c             C   s   | S )Nr   )rH   r   r   r   	__enter__  s    zPdfParser.__enter__c             C   s   | j   dS )NF)close)rH   exc_type	exc_value	tracebackr   r   r   __exit__  s    zPdfParser.__exit__c             C   s   | j   | j  d S )N)	close_bufr   )rH   r   r   r   start_writing  s    zPdfParser.start_writingc             C   s.   y| j j  W n tk
r"   Y nX d | _ d S )N)r   r   r   )rH   r   r   r   r     s
    zPdfParser.close_bufc             C   s2   | j r| j  | jd k	r.| jr.| jj  d | _d S )N)r   r   rl   r   r   )rH   r   r   r   r     s
    
zPdfParser.closec             C   s   | j jdtj d S )Nr   )rl   seekosSEEK_END)rH   r   r   r   r     s    zPdfParser.seek_endc             C   s   | j jd d S )Ns	   %PDF-1.4
)rl   ri   )rH   r   r   r   write_header  s    zPdfParser.write_headerc             C   s   | j jd| djd d S )Nz% 
zutf-8)rl   ri   r   )rH   r   r   r   r   write_comment  s    zPdfParser.write_commentc             C   sl   | j   | j| jj | _| jd| _| j  | j| jtd| jd | j| jtdt	| j
| j
d | jS )Nr   s   Catalog)TypeZPagess   Pages)r   ZCountZKids)del_rootnext_object_idrl   rh   r   r   rewrite_pages	write_objro   r:   r   )rH   r   r   r   write_catalog  s    
zPdfParser.write_catalogc             C   s  g }xt | jD ]\}}| j| }| j|j= |j|td  || jkrJqi }x |j D ]\}}|||j	 < qXW | j
|d< | jd|}x(t | jD ]\}	}
|
|kr|| j|	< qW qW xB|D ]:}x4|r| j| }|j| jkr| j|j= |jdd }qW qW g | _d S )Ns   ParentParent)N)rj   r   r   r   rL   r   ro   r   r   rs   r   
write_pager6   )rH   Zpages_tree_nodes_to_deleteiZpage_refZ	page_infoZstringified_page_inforZ   r[   Znew_page_refjZcur_page_refZpages_tree_node_refZpages_tree_noder   r   r   r     s,    






zPdfParser.rewrite_pagesc             C   s   |r| j   || _| jr(| jd | j| _| jj| j}t| j}| j|d}| j	d k	r`| j	|d< | jrp| j|d< || _	| jjdt
t| d|   d S )N)s   Roots   Sizes   Prevs   Infos   trailer
s   
startxref
%d
%%%%EOF)r   r   r   r   r   r   ri   rl   r:   r   rr   r   )rH   Znew_root_refZ
start_xrefZnum_entriesr   r   r   r   write_xref_and_trailer  s    



z PdfParser.write_xref_and_trailerc             O   sL   t |tr| j| }d|kr(td|d< d|kr:| j|d< | j|f||S )Nr   s   Pager   )rp   r   r   ro   r   r   )rH   refobjsdict_objr   r   r   r     s    


zPdfParser.write_pagec             O   s   | j }|d kr| j|j }n|j |jf| j|j< |jtt|  |j	dd }|d k	rft
||d< |rx|jt| x|D ]}|jt| q~W |d k	r|jd |j| |jd |jd |S )Nstreamr   s   stream
s   
endstream
s   endobj
)rl   r   rh   rM   r   rL   ri   rr   rS   rk   r:   r   )rH   r   r   r   rl   r   objr   r   r   r     s$    




zPdfParser.write_objc             C   s.   | j d krd S | j| j j= | j| jd j= d S )Ns   Pages)r   r   rL   r   )rH   r   r   r   r     s    
zPdfParser.del_rootc             C   sT   t | dr| j S t | dr$| j S ytj| j dtjdS  tk
rN   dS X d S )N	getbuffergetvaluer   )accessr   )r   r   r   mmapfilenoZACCESS_READ
ValueError)rl   r   r   r   r     s    

zPdfParser.get_buf_from_filec             C   s   t | j| _| j| j | _| j  | jd | _| jjdd | _	t
| j| j| _| j	d krdt
 | _nt
| j| j	| _td| jkd t| jd dkd td| jkd tt| jd td	 | jd | _| j| j| _| j| j| _| jd d  | _d S )
Ns   Roots   Infos   Typez/Type missing in Roots   Catalogz/Type in Root is not /Catalogs   Pagesz/Pages missing in Rootz+/Pages in Root is not an indirect reference)r:   r   r   r   r   read_trailerr   r   r6   r   r   read_indirectr   r   rF   rp   rG   r   r   linearize_page_treer   r   )rH   r   r   r   r     s(    

zPdfParser.read_pdf_infoc             C   sX   yt t| jj d d}W n tk
r:   t dd}Y nX |d k	rT|df| j|j< |S )Nr_   r   )rG   maxr   re   r   rL   )rH   r   	referencer   r   r   r   9  s    zPdfParser.next_object_ids   [][()<>{}/%]s$   [][()<>{}/%\000\011\012\014\015\040]s   [\000\011\012\014\015\040]s#   [\000\011\012\014\015\0400-9a-fA-F]   *   +s   [\000\011\014\015\040]*s   [\r\n]+s   trailers   \<\<(.*\>\>)s	   startxrefs   ([0-9]+)s   %%EOF   $s   \<\<(.*?\>\>)c             C   s   t | jd }|| jk r| j}| jj| j|}t|d |}x$|r`|}| jj| j|j d }q>W |sj|}|jd}t|jd| _	| j
|| _t | _| j| j	d d| jkr| j| jd  d S )Ni @  ztrailer end not found   r_   r   )xref_section_offsets   Prev)r:   r   r   re_trailer_endsearchrF   startgroupr   r   interpret_trailerr   rT   r   read_xref_tableread_prev_trailer)rH   Zsearch_start_offsetmZ
last_matchtrailer_datar   r   r   r   j  s$    



zPdfParser.read_trailerc             C   sv   | j |d}| jj| j||d  }t|d |jd}tt|jd|kd | j|}d|krr| j|d  d S )N)r	  i @  zprevious trailer not foundr_   r   zGxref section offset in previous trailer doesn't match what was expecteds   Prev)	r  re_trailer_prevr  r   rF   r  r   r  r  )rH   r	  Ztrailer_offsetr  r  r   r   r   r   r    s    


zPdfParser.read_prev_trailers   /([!-$&'*-.0-;=?-Z\\^-z|~]+)(?=r   s   \<\<s   \>\>c             C   s   i }d}x|| j j||}|sV| jj||}t|o>|j t|kdt||d    P | j|jd}| j	||j \}}|||< q
W td|kot
|d td td|kot
|d td |S )Nr   z+name not found in trailer, remaining data: r_   s   Sizez&/Size not in trailer or not an integers   Rootz1/Root not in trailer or not an indirect reference)re_namematchre_dict_endrF   endr:   ru   rx   r  	get_valuerp   r   rG   )ry   r  trailerr   r  rZ   r[   r   r   r   r    s*    zPdfParser.interpret_trailers   ([^#]*)(#([0-9a-fA-F]{2}))?Fc             C   sr   d}xR| j j|D ]B}|jdrF||jdtj|jdjd 7 }q||jd7 }qW |rf|jdS t|S d S )Nr      r_   zus-asciizutf-8)re_hashes_in_namefinditerr  r   fromhexr;   rr   )ry   rawZas_textrq   r  r   r   r   rx     s    
&
zPdfParser.interpret_names   null(?=s   true(?=s   false(?=s   ([-+]?[0-9]+)(?=s)   ([-+]?([0-9]+\.[0-9]*|[0-9]*\.[0-9]+))(?=s   \[   ]s   \<(s   *)\>s   \(s   ([-+]?[0-9]+)s   R(?=s   obj(?=s	   endobj(?=r   s	   %[^\r\n]*s   )*s   stream\r?\ns   endstream(?=r_   c             C   s  |dkrdS | j j||}|r&|j }| jj||}|rtt|jddkd tt|jddkd t|d kp|tt|jdt|jdkd | j||j |d d\}}|d kr|d fS | j	j||}t|d ||j fS t| d	 | j
j||}|r^tt|jddkd
 tt|jddkd tt|jdt|jd|j fS | jj||}|r|j }i }| jj||}xv|s| j|||d d\}}|d kr|d fS | j|||d d\}	}|	||< |d kr|d fS | jj||}qW |j }| jj||}|ryt|d }
W n@ tttfk
rp } ztd|jdd  |W Y d d }~X nX ||j |j |
  }| jj||j |
 }t|d |j }tt||}nt|}||fS | jj||}|r\|j }g }| jj||}xN|sN| j|||d d\}	}|j|	 |d kr<|d fS | jj||}qW ||j fS | jj||}|r|d |j fS | jj||}|rd|j fS | jj||}|rd|j fS | jj||}|rt| j|jd|j fS | jj||}|rt|jd|j fS | j j||}|r@t!|jd|j fS | j"j||}|rt#dd |jdD }t$|d dkr|jt%d t#j&|j'd|j fS | j(j||}|r| j)||j S tdt*|||d   d S )Nr   r_   z<indirect object definition: object ID must be greater than 0r   z;indirect object definition: generation must be non-negativez2indirect object definition different than expected)max_nestingz(indirect object definition end not foundz$indirect object definition not foundz;indirect object reference: object ID must be greater than 0z:indirect object reference: generation must be non-negatives   Lengthz)bad or missing Length in stream dict (%r)zstream end not foundTFc             S   s   g | ]}|d kr|qS )s   0123456789abcdefABCDEFr   )r8   r=   r   r   r   
<listcomp>a  s    z'PdfParser.get_value.<locals>.<listcomp>   0zus-asciizunrecognized object:     )NN)+
re_commentr  r  re_indirect_def_startrF   r   r  rG   r  re_indirect_def_endre_indirect_referencere_dict_startr  re_stream_start	TypeErrorr]   r   r?   r6   re_stream_endr   r   re_array_startre_array_endr   re_nullre_truere_falser  ro   rx   re_intre_realr   re_string_hexr   r:   r~   r  r;   re_string_litget_literal_stringru   )ry   rz   r   expect_indirectr   r  objectr   rZ   r[   Z
stream_lenr   Zstream_dataZ
hex_stringr   r   r   r    s    

&




zPdfParser.get_valuesF   (\\[nrtbf()\\])|(\\[0-9]{1,3})|(\\(\r\n|\r|\n))|(\r\n|\r|\n)|(\()|(\))   nr      r      t   	   b      f   r   c             C   s  d}t  }x| jj||D ]}|j|||j   |jdrZ|j| j|jdd   n|jdr|jt|jddd  d nt|jdrnh|jdr|jd nR|jdr|jd	 |d7 }n4|jd
r|dkrt	||j
 fS |jd |d8 }|j
 }qW tdd S )Nr   r_   r      r     r      r      r   zunfinished literal string)r   re_lit_str_tokenr  r   r  r  escaped_charsr   r   rr   r  r?   )ry   rz   r   Znesting_depthr   r  r   r   r   r5    s,    

 






zPdfParser.get_literal_strings   xrefs+   ([0-9]{10}) ([0-9]{5}) ([fn])( \r| \n|\r\n)c             C   s  d}| j j| j|| j }t|d |j }x| jj| j|}|sNt|d P d}|j }t|jd}t|jd}xt	||| D ]}| j
j| j|}t|d |j }|jdd	k}t|jd}	|st|jd|	f}
t|| jkp| j| |
kd
 |
| j|< qW q.W |S )NFzxref section start not foundzxref subsection start not foundTr_   r   zxref entry not foundr  r?  z)xref entry duplicated (and not identical))re_xref_section_startr  r   r   rF   r  re_xref_subsection_startr   r  r   re_xref_entryr   )rH   r	  Zsubsection_foundr  r   Zfirst_objectnum_objectsr   Zis_freerM   Z	new_entryr   r   r   r    s8    

zPdfParser.read_xref_tablec          
   C   st   | j |d  \}}t||d kd|d  d|d  d| d|  | j| j|| j t| |dd }|| j|< |S )Nr   r_   zexpected to find generation z for object ID z) in xref table, instead found generation z at offset )r6  r   )r   rF   r  r   r   rG   r   )rH   r   r   r   rM   r[   r   r   r   r    s    
&
zPdfParser.read_indirectc             C   sn   |d kr| j }t|d dkd g }xD|d D ]8}| j|}|d dkrT|j| q.|j| j|d q.W |S )Ns   Types   Pagesz%/Type of page tree node is not /Pagess   Kidss   Page)node)r   rF   r  r   r   r  )rH   rK  r   ZkidZ
kid_objectr   r   r   r    s    
zPdfParser.linearize_page_tree)NNNr   r   )N)N)Fr   )Nr   r   )r   )N)Ir@   rA   rB   rC   rY   r   r   r   r   r   r   r   r   r   r   r   r   r   r   staticmethodr   r   r   	delimiterZdelimiter_or_ws
whitespaceZwhitespace_or_hexZwhitespace_optionalZwhitespace_mandatoryZwhitespace_optional_no_nlZnewline_onlynewlinerecompileDOTALLr
  r  r   r  Zre_whitespace_optionalr  r(  r  r   r  r  rx   r.  r/  r0  r1  r2  r,  r-  r3  r4  r'  r%  r&  r$  r)  r+  r  rE  r~   rF  r5  rG  rH  rI  r  r  r  r   r   r   r   rw   a  s   
%
	

.*

""y
!
rw   )r   r   r   r   r   rP  r   r   r	   r5   r>   r   r?   rF   
namedtuplerG   rS   rT   ro   r   r   r   r   r   r   r   rw   r   r   r   r   <module>   s~   
['0