B
    .(b*                 @   s  d dl mZ d dlmZmZ d dlmZmZmZ d dlmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ dd	lmZ d d
lZd d
lZdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zej !d!d"d#d$gd%d$gd&d'd'dgd'd(gd d d)i ffd"d#d*d$gd+d$gd,d'd'd'dgd'd&gd d d d-i ffd"d.gd%d$gd,d'd'gd'd'gd d iddiffd%d$d/gd"d#d0gd1d'd'd'gd'd'd'gd(d(d2d d d)ffd"d#d0gd"d#d$d/gd&d dd'gd dd'd'gi d(d(d3ffd4d"gd"gdd'd gdgi i ffgd5d6 Z"d7d8 Z#d
S )9    )unicode_literals)biluo_tags_from_offsetsoffsets_from_biluo_tags)spans_from_biluo_tags	GoldParseiob_to_biluo)
GoldCorpusdocs_to_jsonalign)English)Doc)get_words_and_spaces   )make_tempdirNc             C   sb   dddddg}dddddg}t | ||d}td	td
dfg}t||}|dddddgks^td S )NIflewtoLondon.TF)wordsspacesz
I flew to zI flew to LondonLOCOzU-LOC)r   lenr   AssertionError)en_vocabr   r   docentitiestags r   D/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/test_gold.pytest_gold_biluo_U   s    
r!   c             C   sh   ddddddg}ddddddg}t | ||d	}td
tddfg}t||}|ddddddgksdtd S )Nr   r   r   San	Franciscor   TF)r   r   z
I flew to zI flew to San Franciscor   r   zB-LOCzL-LOC)r   r   r   r   )r   r   r   r   r   r   r   r   r    test_gold_biluo_BL   s    
r$   c             C   sn   dddddddg}dddddd	dg}t | ||d
}tdtddfg}t||}|dddddddgksjtd S )Nr   r   r   r"   r#   Valleyr   TF)r   r   z
I flew to zI flew to San Francisco Valleyr   r   zB-LOCzI-LOCzL-LOC)r   r   r   r   )r   r   r   r   r   r   r   r   r    test_gold_biluo_BIL!   s    
r&   c          	   C   sz   dddddddg}dddddd	dg}t | ||d
}tdtddftdtddfg}tt t|| W d Q R X d S )Nr   r   r   r"   r#   r%   r   TF)r   r   z
I flew to zI flew to San Francisco Valleyr   zI flew to San Francisco)r   r   pytestraises
ValueErrorr   )r   r   r   r   r   r   r   r    test_gold_biluo_overlap*   s    r*   c          	   C   s~   ddddddg}ddddddg}t | ||d	}td
tddfg}tt t||}W d Q R X |ddddddgksztd S )Nr   r   r   r"   r#   zValley.TF)r   r   z
I flew to zI flew to San Francisco Valleyr   r   -)r   r   r'   ZwarnsUserWarningr   r   )r   r   r   r   r   r   r   r   r    test_gold_biluo_misalign6   s    r-   c          	   C   sr  ddddg}ddddg}t | ||d}tdtd	d
fg}t|dddddddg|d}|jddddgksltdddddddg}dddddddg}t | ||d}tdtd	d
fg}t|ddddg|d}|jdddddddgkstdddddg}dddddg}t | ||d}tdtd	d
fg}t|dddddg|d}|jdddddgksTttddddddgd\}}t | ||d}tdtdd
fg}t|ddddddg|d}|jdddddddgkstddddd d!d"d#gd$gdf}t||d% f|d& }|jddddd'dgkstd(dddd d)d"d#gd$gdf}t||d% f|d& }|jddddd*d+dgksntd S ),Nr   zflew tozSan Francisco Valleyr   TF)r   r   z
I flew to zI flew to San Francisco Valleyr   r   r   r"   r#   r%   )r   r   r   zU-LOCzB-LOCzI-LOCzL-LOCzI flewzSan FranciscozFrancisco Valleyz I flew  to San Francisco Valley.zI flew  to zI flew  to San Francisco Valley u   I'll return the ₹54 amountz'llreturnZtheu   ₹Z54amount)      ZMONEYr   r   zU-MONEYzI'll return the $54 amount$zB-MONEYzL-MONEY)r   r   r   nerr   r   )r   en_tokenizerr   r   r   r   Zgpdatar   r   r    &test_gold_biluo_different_tokenization@   sV    
r7   c             C   sX   d}ddddddddg}ddg}| |}t ||}||ks>tt||}||ksTtd S )Nz$I flew to Silicon Valley via London.r   zB-LOCzL-LOCzU-GPE)
      r   )   #   GPE)r   r   r   )r5   text
biluo_tagsoffsetsr   Zbiluo_tags_convertedZoffsets_convertedr   r   r    'test_roundtrip_offsets_biluo_conversion   s    

r@   c             C   s   | d}ddddddddg}t ||}t|dks6t|d jdksHt|d jd	ksZt|d
 jdkslt|d
 jdks~td S )Nz$I flew to Silicon Valley via London.r   zB-LOCzL-LOCzU-GPE   r   zSilicon Valleyr   r   r   r<   )r   r   r   r=   Zlabel_)r5   r   r>   Zspansr   r   r    test_biluo_spans   s    
rB   c             C   s,   | d}d dddddddg}t ||d}d S )Nz$I flew to Silicon Valley via London.r   zB-LOCzL-LOCzU-GPE)r   )r   )r5   r   r>   Zgoldr   r   r    test_gold_ner_missing_tags   s    rC   c           	   C   sd   ddddddg} ddddddg}dddddg}t | }||ksBttt t | W d Q R X d S )Nr   zB-LOCzI-LOCzB-PERSONzL-LOCzU-PERSON")r   r   r'   r(   r)   )Zgood_iobZ
good_biluoZbad_iobZconverted_biluor   r   r    test_iob_to_biluo   s    rE   c           	   C   s  d} ddddddddg}ddddd	dd
dg}ddddddddg}ddddddddg}ddd}t  }|| }xBtt|D ]2}|| || _|| || _|||  || _qzW t|||_||_d|_	d|_
t 2}	|	d }
t|
t|g tt|
t|
}W d Q R X t||\}}t|| ks4t| |jksDt||jksTt||jksdt||jkstt||jkstd|jkstd|jkst|d |jd kst|d |jd kstt 2}	|	d }t|t|g tt|t|}W d Q R X t||\}}t|| ks:t| |jksJt||jksZt||jksjt||jkszt||jkstd|jkstd|jkst|d |jd kst|d |jd kstt R}	|	d }t|t|g tt|t|}t||j tt|t|}W d Q R X t||\}}t|| ks`t| |jkspt||jkst||jkst||jkst||jkstd|jkstd|jkst|d |jd kst|d |jd ks td S )Nz$I flew to Silicon Valley via London.ZPRPZVBDINZNNPr   r      rA      ZnsubjROOTprepZcompoundZpobjpunctr   zB-LOCzL-LOCzU-GPEg      ?g        )TRAVELBAKINGTzroundtrip.jsonrL   rM   zroundtrip.jsonl)r   ranger   Ztag_Zdep_headr   ZentscatsZ	is_taggedZ	is_parsedr   srsly
write_jsonr	   r   strnextZ
train_docsZcount_trainr   r=   r   labelsheadsr4   Zwrite_jsonlZtrain_tuples)r=   r   rV   depsr>   rP   Znlpr   iZtmpdirZ	json_fileZ
goldcorpusZreloaded_docZ	goldparseZ
jsonl_filer   r   r    test_roundtrip_docs_to_json   s~    
rY   ztokens_a,tokens_b,expectedabcab   rA   )r   r   rD   zab"rG   )r   r   rA   ZbcdZcd   )r   rA   )rA   r^   r.   c             C   sh   t | |\}}}}}|t|t|||f|ks2tt || \}}}}}|t|t|||f|ksdtd S )N)r
   listr   )Ztokens_aZtokens_bexpectedZcostZa2bZb2aZ	a2b_multiZ	b2a_multir   r   r    
test_align   s    rd   c             C   s`   d}| |}t |dgdgdgdgd}|jddgks8t|jd dgksJt|jd dgks\td S )Nz arZ   zU-DATErI   r   )r   r   rW   rV   r.   )r   r   r   r4   rU   )r5   r=   r   gr   r   r    test_goldparse_startswith_space  s    rf   )$
__future__r   Z
spacy.goldr   r   r   r   r   r   r	   r
   Zspacy.lang.enr   Zspacy.tokensr   Z
spacy.utilr   utilr   r'   rQ   r!   r$   r&   r*   r-   r7   r@   rB   rC   rE   rY   markZparametrizerd   rf   r   r   r   r    <module>   sF   			
E
P,
"*&
",