B
    .(b)                 @   s  d dl mZ d dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
mZmZmZmZ ddlmZ ejdd	d
dggdd Zdd Zejdddgdd Zdd Zdd Zdd Zdd Zdd Zdd Zejd d!dd"d"d ged ddd#gdd"dd#gdddd#gd#d#d#d#ggfd$dd"d"d d%dd"d"d g	ed ddd#d#d%d%d%d%g	dd"dd#d#d%d%d%d%g	dddd#d#d%d%d%d%g	d#d#d#d#d#d%d%d%d%g	d#d#d#d#d&d%d%d%d%g	d%d%d%d%d%d'd(d(d)g	d%d%d%d%d%d(d*d(d)g	d%d%d%d%d%d(d(d(d)g	d%d%d%d%d%d)d)d)d)g	g	fgd+d, Zd-d. Zd/d0 Zd1d2 Z dS )3    )unicode_literalsN)DocSpan)Vocab)ENT_TYPEENT_IOB
SENT_STARTHEADDEP   )get_doctextZoneZtwoZthreec             C   s|   t | |d}|d }|d }|d }|\}}}||  k rB|k sHn t||krTt||ks`t||kslt||ksxtd S )N)words)r   AssertionError)en_vocabr   docZtoken3Ztoken2Ztoken1 r   K/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/doc/test_doc_api.py'test_doc_api_compare_by_string_position   s    
r   c          	   C   sV  d}| |}|d j dkst|d j dks0ttt |t|  W d Q R X dd }|dd }||rrt|dd	 }||d
kst|dd	d }||d
ksttt |dd	d  W d Q R X tt |dd	d  W d Q R X |dd }||dkst|d	d }||dks0t|dd }||dksNt|dd	 }|j|j  krvdkrn n
||rt|d	d }|j|j  krd	krn n
||rt|d d  }||dkst|d	d  }||dkst|d d	 }||dkst|d d }||dks:t|dd  }||dksXt|d	d }||dksvt|dd	 }||dkst|dd }|j|j  krdkrn n
||rt|dd }|j|j  krdkrn n
||rt|dd	 }|d j	dks(t|d d  }||d
ksFt|d d }||dksdt|dd  }||dkst|d d }||dkst|dd  }||dkst|dd }||dkst|dd }||dkst|dd }||d
kst|dd }|j|j  kr@d	krNn n
||rRtd S )NzGive it back! He pleaded.r   ZGiver   .c             S   s   d dd | D S )N/c             s   s   | ]}|j V  qd S )N)r   ).0tokenr   r   r   	<genexpr>&   s    z7test_doc_api_getitem.<locals>.to_str.<locals>.<genexpr>)join)spanr   r   r   to_str%   s    z$test_doc_api_getitem.<locals>.to_str      z	it/back/!r      z
He/pleadedzback/!   zGive/it/back/!/He/pleaded/.zHe/pleaded/.zGive/it/back/!2   ii(      itzit/backr   back)
r   r   pytestraises
IndexErrorlen
ValueErrorstartendZorth_)en_tokenizerr   tokensr   r   Zsubspanr   r   r   test_doc_api_getitem   s    ....r3   zGive it back! He pleaded.z Give it back! He pleaded. c             C   s  | |}d|d _ d|d _d|d _d|d _t|j| }|j|jksTt	dd |D dd |D kstt	dd |D dd |D kst	|d j dkst	|d jdkst	|d jdkst	|d jdkst	t|jj|jd	gd
d	gd
}|j|jkst	dd |D dd |D ks0t	dd |D dd |D ksRt	t|jj|jdgd
dgd
}|j|jkst	dd |D dd |D kst	dd |D dd |D kst	d S )Nlemmar   ZnormZ	ent_kb_idZent_idc             S   s   g | ]
}|j qS r   )r   )r   tr   r   r   
<listcomp>s   s    z*test_doc_api_serialize.<locals>.<listcomp>c             S   s   g | ]
}|j qS r   )orth)r   r5   r   r   r   r6   t   s    Ztensor)excludec             S   s   g | ]
}|j qS r   )r   )r   r5   r   r   r   r6   ~   s    c             S   s   g | ]
}|j qS r   )r7   )r   r5   r   r   r   r6      s    Z	sentimentc             S   s   g | ]
}|j qS r   )r   )r   r5   r   r   r   r6      s    c             S   s   g | ]
}|j qS r   )r7   )r   r5   r   r   r   r6      s    )
Zlemma_Znorm_Z
ent_kb_id_Zent_id_r   vocab
from_bytesto_bytesr   r   )r1   r   r2   Z
new_tokensr   r   r   test_doc_api_serializeh   s.    



  
""
"r<   c          	   C   s   d}| |}t |jdkst|jjd ddfg|_t t|jdksJtdd |D ddd	dddddgkspt|jd jdkst|jd jdkst|jd jdkstd S )
Nz#I use goggle chrone to surf the webr   ZPRODUCTr   r    r   c             S   s   g | ]
}|j qS r   )Zent_iob)r   r5   r   r   r   r6      s    z)test_doc_api_set_ents.<locals>.<listcomp>   )	r-   entsr   r9   stringslistZlabel_r/   r0   )r1   r   r2   r   r   r   test_doc_api_set_ents   s    &rA   c             C   s,   | d}d|_ t|j}t|dks(td S )N Tr   )	is_parsedr@   sentsr-   r   )r1   r   rD   r   r   r   test_doc_api_sents_empty_string   s    
rE   c       	      C   s  d}ddddddddd	dd
ddddddddd	dddddddddddg}| |}t |jdd |D |d}g }xP|jD ]F}x*t|dkr|d jdkr|dd  }qzW t|dkrt|| qtW | 8}x0|D ](}|jj|j	|jj
d}|j||d qW W d Q R X d S )Nu   67% of black households are single parent 

72% of all black babies born out of wedlock 

50% of all black kids don’t finish high schoolZnummodZnsubjprepamodZpobjROOTattrrB   ZapposZdetZaclZauxnegZccompZdobjc             S   s   g | ]
}|j qS r   )r   )r   r5   r   r   r   r6      s    z.test_doc_api_runtime_error.<locals>.<listcomp>)r   depsr   r   )ZadvmodrG   Zcompound)tagr4   Zent_type)attrs)r   r9   Znoun_chunksr-   dep_appendZ
retokenizerootZtag_r   Z	ent_type_merge)	r1   r   rK   r2   r   ZnpsnpZretokenizerrM   r   r   r   test_doc_api_runtime_error   s$    

rS   c             C   s   d}dddddddddddddddddddd	dd
dddddddg}| |}t |jdd |D |d}|d jdksvtdd |d jD }|ddddddddddddddgkst|d jjdkstdS )zHTest for bug occurring from Unshift action, causing incorrect right edgezI have proposed to myself, for the sake of such as live under the government of the Romans, to translate those books into the Greek tongue.r   r   r   r   r!      r   iiic             S   s   g | ]
}|j qS r   )r   )r   r5   r   r   r   r6      s    z+test_doc_api_right_edge.<locals>.<listcomp>)r   headsr"   forc             S   s   g | ]
}|j qS r   )r   )r   wr   r   r   r6      s    ZtheZsakeZofZsuchasliveZunderZ
governmentZRomans,N)r   r9   r   r   subtreeZ
right_edge)r1   r   rU   r2   r   r[   r   r   r   test_doc_api_right_edge   s.    $r\   c              C   sJ   t  } | jdd | jdtjddgddd t| dgd	}|jsFtd S )
Nr   )widthZkitteng        g       @f)dtype)Zvector)r   )r   Zreset_vectorsZ
set_vectornumpyZasarrayr   Z
has_vectorr   )r9   r   r   r   r   test_doc_api_has_vector   s
    ra   c           	   C   s   t t dgd} | | d dks&t| | jd dks>tt | jdddgd}tt2 | |d d dksxt| |dkstW d Q R X d S )	Na)r   r   g      ?bcr   g        )r   r   Z
similarityr   r9   r*   ZwarnsUserWarning)r   Zdoc2r   r   r   test_doc_api_similarity_match   s    rf   zsentence,heads,lca_matrixzthe lazy dog sleptr   r=   z(The lazy dog slept. The quick fox jumpedr   r    r$   r'      r"   c             C   sn   | |}t |jdd |D |d}| }||k s:t|d dksJt|d dksZt|d dksjtd S )	Nc             S   s   g | ]
}|j qS r   )r   )r   r5   r   r   r   r6     s    z/test_lowest_common_ancestor.<locals>.<listcomp>)rU   )r   r   r   )r   r   r   )r   r   )r   r9   Zget_lca_matrixallr   )r1   ZsentencerU   Z
lca_matrixr2   r   Zlcar   r   r   test_lowest_common_ancestor   s    ri   c             C   s   dddddg}t | |d}|jr$tt|ddd	d
g|_|jsBttjddgddgddgddgddggdd}t | |dtt	g|}|jstt | 
| }|jstd S )NIrY   inNewYork)r   r=   r$   ZGPE)labelr   i  r   Zuint64)r_   )r   Zis_neredr   r   r>   r`   array
from_arrayr   r   r:   r;   )r   r   r   arrnew_docr   r   r   test_doc_is_nered  s    

,
rs   c             C   s  ddddddddddg
}d	d	d	d	d	d	d
d
d
d
g
}dddddddddddg}t | |d}xHtt||D ]6\}\}}||| _|| || _||krfd|| _qfW |j ttg}|	|}	t | |d}
t
t |
||	 W d Q R X ttg}|	|}	t | |d}
|
||	 dd |D dd |
D ks2t|
jr>tttg}|	|}	t | |d}
|
||	 dd |D dd |
D kst|
jstd S )Nrj   rY   rk   rl   rm   r   likeZcatsr   r"   rH   dep)r   Tc             S   s   g | ]
}|j qS r   )is_sent_start)r   r5   r   r   r   r6   3  s    z3test_doc_from_array_sent_starts.<locals>.<listcomp>c             S   s   g | ]
}|j qS r   )rv   )r   r5   r   r   r   r6   :  s    )r   	enumerateziprN   headrv   rC   r   r	   Zto_arrayr*   r+   r.   rp   r
   r   )r   r   rU   rK   r   iru   ry   rM   rq   rr   r   r   r   test_doc_from_array_sent_starts  s6    


"
"r{   c             C   s6   t | ddgd}|jdkst|j| jd ks2td S )NZHelloZworld)r   en)r   Zlang_r   langr?   )r   r   r   r   r   test_doc_lang>  s    r~   )!
__future__r   r*   r`   Zspacy.tokensr   r   Zspacy.vocabr   Zspacy.attrsr   r   r   r	   r
   utilr   markZparametrizer   r3   r<   rA   rE   rS   r\   ra   rf   ro   ri   rs   r{   r~   r   r   r   r   <module>   sH   K 

2(#