B
    .(b"                 @   s`  d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZ d dlZd dlZddlmZ dd Zdd Zejdddgdd Z dd Z!dd Z"dd Z#dd Z$d d! Z%ejdd"d#d$gejd%ee
gd&d' Z&d(d) Z'd*d+ Z(d,d- Z)d.d/ Z*d0d1 Z+d2d3 Z,dS )4    )unicode_literalsN)displacy)English)Japanese)MultiLanguage)Language)Matcher)DocSpan)Vocab)pickle)link_vectors_to_models   )get_docc           	   C   sn   t  } | d}tt |  W dQ R X | | | d}|jsJt| 	ddg}t
|}|jsjtdS )zETest the tagger sets is_tagged correctly when used via Language.pipe.taggerNzhello worldZhelloworld)r   create_pipepytestZwarnsUserWarningbegin_trainingadd_pipeZ	is_taggedAssertionErrorpipenext)nlpr   docZdocsZ	piped_doc r   Y/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/regression/test_issue2501-3000.pytest_issue2564   s    


r   c                s   | d t  dd jjd dg _t j}|ddddgg  fd	d
| D }t|tdd}t|dksxtt|d dkst|d j	dkstdS )zTest that operator + is greedy.zIt is May 15, 1993.r      ZDATE)labelZRULE+)ZENT_TYPEOPc                s   g | ]\}}} || qS r   r   ).0_startend)r   r   r   
<listcomp>)   s    z"test_issue2569.<locals>.<listcomp>T)keyreverse
   r      zMay 15, 1993N)
r
   vocabstringsentsr   addsortedlenr   text)en_tokenizermatchermatchedr   )r   r   test_issue2569#   s    
r6   r2   u  ABLEItemColumn IAcceptance Limits of ErrorIn-Service Limits of ErrorColumn IIColumn IIIColumn IVColumn VComputed VolumeUnder Registration of VolumeOver Registration of VolumeUnder Registration of VolumeOver Registration of VolumeCubic FeetCubic FeetCubic FeetCubic FeetCubic Feet1Up to 10.0100.0050.0100.005220.0200.0100.0200.010350.0360.0180.0360.0184100.0500.0250.0500.0255Over 100.5% of computed volume0.25% of computed volume0.5% of computed volume0.25% of computed volume TABLE ItemColumn IAcceptance Limits of ErrorIn-Service Limits of ErrorColumn IIColumn IIIColumn IVColumn VComputed VolumeUnder Registration of VolumeOver Registration of VolumeUnder Registration of VolumeOver Registration of VolumeCubic FeetCubic FeetCubic FeetCubic FeetCubic Feet1Up to 10.0100.0050.0100.005220.0200.0100.0200.010350.0360.0180.0360.0184100.0500.0250.0500.0255Over 100.5% of computed volume0.25% of computed volume0.5% of computed volume0.25% of computed volume ItemColumn IAcceptance Limits of ErrorIn-Service Limits of ErrorColumn IIColumn IIIColumn IVColumn VComputed VolumeUnder Registration of VolumeOver Registration of VolumeUnder Registration of VolumeOver Registration of VolumeCubic FeetCubic FeetCubic FeetCubic FeetCubic Feet1Up to 10.0100.0050.0100.005220.0200.0100.0200.010350.0360.0180.0360.0184100.0500.0250.0500.0255Over 100.5% of computed volume0.25% of computed volume0.5% of computed volume0.25% of computed volumezoow.jspsearch.eventoracleopenworldsearch.technologyoraclesolarissearch.technologystoragesearch.technologylinuxsearch.technologyserverssearch.technologyvirtualizationsearch.technologyengineeredsystemspcodewwmkmppscem:c             C   s   | |}|st dS )zDCheck that sentence doesn't cause an infinite loop in the tokenizer.N)r   )r3   r2   r   r   r   r   test_issue2626_28350   s    	r7   c             C   s   | d}t |dkst|d jdks*t|d jdks<t|d jdksNt|d	 jd
ks`t|d jdksrt|d jdkst|d jdkst|d jdkst|d jdkst|d jdkst|d jdkstdS )z`Test that tokenizer correctly splits off punctuation after numbers with
    decimal points.
    z&I went for 40.3, and got home by 10.0.   r   I   Zwentr   for   z40.3r+   ,   andr   got   home   Zby	   z10.0r*   .N)r1   r   r2   )r3   r   r   r   r   test_issue2656=   s    rF   c              C   s   t  } t| j}d}ddidddddig}|||g | d}| d	}||}x&|D ]\}}}	| jj| |ksXtqXW ||}
x&|
D ]\}}}	| jj| |kstqW d
S )z^Ensure the correct entity ID is returned for matches with quantifiers.
    See also #2675
    Ztest_patternZLOWERhighT?)ZIS_PUNCTr"   Z
adrenalinez$This is a high-adrenaline situation.z$This is a high adrenaline situation.N)r   r   r,   r/   r-   r   )r   r4   Z
pattern_idpatternZdoc1Zdoc2Zmatches1Zmatch_idr%   r&   Zmatches2r   r   r   test_issue2671P   s    

rJ   c             C   sr   t | dddgd}t|ddddg|_tj|dd	}d
|ks@tt|ddddg|_tj|dd	}d
|ksntdS )z9Test that displaCy ENT visualizer escapes HTML correctly.testz	<RELEASE>)wordsr   r:   ZTEST)r    ent)stylez&lt;RELEASE&gt;r   N)r	   r
   r.   r   renderr   )en_vocabr   htmlr   r   r   test_issue2728g   s    rR   c             C   s8   | d}|d j dkst| d}|d j dks4tdS )zFTest that words like 'a' and 'a.m.' don't get exceptional norm values.ar   amN)Znorm_r   )r3   rS   rT   r   r   r   test_issue2754r   s    rU   c             C   s\   d  }dddddddddd	ddd
dg}dgt| }t| |||d}|d jdksXtdS )zATest that deprojectivization doesn't mess up sentence boundaries.zHWhen we write or communicate virtually , we can hide our true feelings .r+   r:   rA   r<   r   r   dep)rL   headsdepsN)splitr1   r   Zis_sent_startr   )rP   rL   r[   r\   r   r   r   r   test_issue2772z   s
     r^   z-0.23z+123,456u   ±1lang_clsc             C   s0   | }|| }t |dkst|d js,tdS )z2Check that like_num handles + and - before number.r:   r   N)r1   r   Zlike_num)r2   r_   r   r   r   r   r   test_issue2782   s    r`   c        
   	   C   s   g } |  ddg ifg dd tdD }t }|d}|| xt|D ]}|| qNW | }xHtdD ]<}i }t	|  x(| D ] \}}	|j
|g|	g||dd	 qW qrW d
S )zdTest issue that arises when too many labels are added to NER model.
    Used to cause segfault.
    zOne sentenceentitiesc             S   s   g | ]}t |qS r   )str)r#   ir   r   r   r'      s    z"test_issue2800.<locals>.<listcomp>i  ner   g      ?)ZsgdlossesZdropN)extendranger   r   r   listZ	add_labelr   randomshuffleupdate)
Z
train_dataZentity_typesr   rd   Zentity_typeZ	optimizerrc   rf   Z	statementra   r   r   r   test_issue2800   s    


rm   c             C   s   | d}t |dkst|d jdks*t|d jdks<t|d jdksNt|d jd	ks`t|d
 jdksrt|d jdkst|d jdkstdS )z7Test that the abbreviation of poco is kept as one word.zVuoi un po' di zucchero?r   r   ZVuoir:   Zunr   zpo'Zpocor<   Zdir+   Zzuccheror>   rH   N)r1   r   r2   Zlemma_)Zit_tokenizerr   r   r   r   test_issue2822   s    rn   c          	   C   s`   t | ddgd}tt t|d  W dQ R X tt t|dd  W dQ R X dS )zATest that a custom error is raised if a token or span is pickled.ZHellor   )rL   r   Nr   )r	   r   ZraisesNotImplementedErrorr   dumps)rP   r   r   r   r   test_issue2833   s
    rq   c              C   s   dddg} t dd}|jjdd tjddd	}x$| D ]}|| }|||d
  q6W d|j_t| |d jd
ksxt	|d jdkst	|d jdkst	|jj
ddd
kst	|jj
dddkst	|jj
dddkst	dS )zCTest that vectors recover the correct key for spaCy reserved words.ZdogcatZSUFFIXtest_issue2871)Zvectors_name)r<   r*   )shapef)Zdtyper   Zdummy_vectorsr:   r   )r(   N)r   ZvectorsresizenumpyzerosZ
set_vectornamer   Zrankr   find)rL   r,   Zvector_datawordr$   r   r   r   rs      s    


rs   c              C   s<   y
t  } W n tk
r&   t  Y nX | d}|s8tdS )zTest that `nlp` doesn't fail.u   pythonが大好きですN)r   ImportErrorr   skipr   )r   r   r   r   r   test_issue2901   s    
r~   c             C   s   | d}t |dkst|d jdks*t|d jdks<t|d jdksNt|d	 jd
ks`t|d jdksrt|d jdkst|d jdkst|d jdkstdS )zdTest that the tokenizer correctly splits tokens separated by a slash (/)
    ending in a digit.
    z"Learn html5/css3/javascript/jqueryrC   r   ZLearnr:   html5r   /r<   Zcss3r+   r>   Z
javascriptr   rA   ZjqueryN)r1   r   r2   )Zfr_tokenizerr   r   r   r   test_issue2926   s    r   )-
__future__r   r   Zspacyr   Zspacy.lang.enr   Zspacy.lang.jar   Zspacy.lang.xxr   Zspacy.languager   Zspacy.matcherr   Zspacy.tokensr	   r
   Zspacy.vocabr   Zspacy.compatr   Z	spacy._mlr   rw   rj   utilr   r   r6   markZparametrizer7   rF   rJ   rR   rU   r^   r`   rm   rn   rq   rs   r~   r   r   r   r   r   <module>   sB   			