B
    .(bo?                 @   s  d dl mZ d dlZd dlZd dlmZ d dlmZmZ d dl	m
Z
mZ ddlmZ ddlmZ ejd	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Z ej!"d)d*d+ Z#d,d- Z$d.d/ Z%d0d1 Z&d2d3 Z'ej!(d4d5d6d7gfd8d9gfd:d6gfd;d7gfd<d6d9gfd=d9d7gfgd>d? Z)d@dA Z*dBdC Z+dDdE Z,dFdG Z-ej!(dHdIdJigd6fdKdJigd6fdLdJigdMfdNdJigd6fdOdJigdPfdQdJigdRfdSdJigdTfdUdJigdVfdWdJigdXfdYdJigdZfd[dJigd\fd]dJigd^fd_dJigd`fdadJigd`fdbdJigdMfdcdJigddfdedJigdffgdgdh Z.didj Z/dkdl Z0dmdn Z1dodp Z2dqdr Z3dS )s    )unicode_literalsN)Mock)MatcherDependencyMatcher)DocToken   )clean_underscore)get_docc             C   sX   ddiggddiddiggddiggd}t | }x | D ]\}}||| q<W |S )NORTH
JavaScriptGoogleNowZLOWERjava)JS	GoogleNowJava)r   itemsadd)en_vocabrulesmatcherkeypatterns r   S/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/matcher/test_matcher_api.pyr      s    
r   c             C   s   t | }ddig}t|dks"t|d|g t|dks@t|d d|ksVt|d|g d|kspt|d\}}t|d std S )Nr   testr   Rule   )r   lenAssertionErrorr   removeget)r   r   patternon_matchr   r   r   r   test_matcher_from_api_docs   s    

r%   c             C   s   d}t | |dd}dddddd	g}d
d |D }dd }t| }|jd||d || |jdksjt|d jdks|td S )Nu'   Wow 😀 This is really cool! 😂 😂 )wordsu   😀u   😃u   😂u   🤣u   😊u   😍c             S   s   g | ]}d |igqS )r   r   ).0emojir   r   r   
<listcomp>,   s    z0test_matcher_from_usage_docs.<locals>.<listcomp>c       
   	   S   sp   || \}}}|j j| dkr,| jd7  _||| }| }|| W d Q R X || }	d|	j |	j _d S )NHAPPYg?zhappy emoji)vocabstrings	sentimentZ
retokenizemergetextnorm_)
r   docimatchesZmatch_idstartendspanZretokenizertokenr   r   r   label_sentiment.   s    
z5test_matcher_from_usage_docs.<locals>.label_sentimentr+   )r$   r   r   zhappy emoji)r   splitr   r   r.   r    r1   )r   r0   r2   Z	pos_emojiZpos_patternsr9   r   r   r   r   test_matcher_from_usage_docs(   s    
r;   c             C   s@   t | dkst| dddigg d| ks0td| ks<td S )N   TESTr   r   ZTEST2)r   r    r   )r   r   r   r   test_matcher_len_contains?   s    r>   c             C   s  t | ddgd}ddigddiddigg}t| }|jd|  t||dksTtt| }t }|jd|f|  t||dkst|jdkstt| }|d| t||dkstt| }t }|jd	||d
 t||dkst|jdks td S )Nab)r'   TEXTOLD_APIr   ZOLD_API_CALLBACKZNEW_APIZNEW_API_CALLBACK)r$   )rB   N)r   r   r   r   r    r   
call_count)r   r2   r   r   r$   r   r   r   test_matcher_add_new_old_apiF   s$    rD   c             C   s*   t | jddddgd}| |g ks&td S )NIlikeZcheese.)r'   )r   r,   r    )r   r2   r   r   r   test_matcher_no_match\   s    rH   c             C   s8   t | jdddgd}| || jjd ddfgks4td S )Nr   isZgood)r'   r   r   r   )r   r,   r-   r    )r   r2   r   r   r   test_matcher_match_starta   s    rJ   c             C   s<   dddg}t | j|d}| ||jjd ddfgks8td S )NrE   rF   r   )r'   r   r   r<   )r   r,   r-   r    )r   r'   r2   r   r   r   test_matcher_match_endf   s    
rK   c             C   s@   dddddg}t | j|d}| ||jjd dd	fgks<td S )
NrE   rF   r   r   best)r'   r   r      )r   r,   r-   r    )r   r'   r2   r   r   r   test_matcher_match_middlel   s    rN   c             C   sT   dddddddg}t | j|d}| ||jjd	 d
df|jjd ddfgksPtd S )NrE   rF   r   r   andr   rL   )r'   r   r   rM   r         )r   r,   r-   r    )r   r'   r2   r   r   r   test_matcher_match_multir   s
    rR   c             C   s   t | }t|jdddgd}|dddii ddigg ||}t|dksPt|d dd	 d
kshtt | }|dddii gg ||}|d dd	 dkstd	S )zBTest matcher allows empty token specs, meaning match on any token.r?   r@   c)r'   zA.Cr   r   r   N)r   r<   zA.)r   r   )r   r   r,   r   r   r    )r   r   r2   r4   r   r   r   test_matcher_empty_dict{   s    rT   c             C   sv   t | }t|jdddgd}ddidddddig}|d	|g ||}t|d
ksZt|d d
d  dksrtd S )Nr?   r@   rS   )r'   r   T+)IS_ALPHAOPzA.Cr   r   )r   r<   )r   r   r,   r   r   r    )r   r   r2   r#   r4   r   r   r   test_matcher_operator_shadow   s    rX   c             C   s   d  }d  }ddiddddddddig}ddiddiddiddiddig}| d	|g t| j|d
}t| |dkstt| j|d
}t| |dkst| d	|g t| |dkstd S )NzHe said , " some words " ...z"He said , " some three words " ...r   "!T)rW   IS_PUNCTr[   Quote)r'   r   r   )r:   r   r   r,   r   r    )r   Zwords1Zwords2pattern1pattern2r2   r   r   r   test_matcher_match_zero   s$    

r_   c             C   s^   d  }ddidddddig}t| j} | d|g t| j|d}t| |d	ksZtd S )
NzHe said , " some words " ...r   rY   *F)rW   r[   r\   )r'   r   )r:   r   r,   r   r   r   r    )r   r'   r#   r2   r   r   r   test_matcher_match_zero_plus   s    
ra   c             C   s   t | j}|dd ddig t|jddgd}||}t|dksHtddddddg}| d	|g | |}t|d
kstd S )NZBasicPhilipper   ZPhilippe)r'   r   1)r   rW   rU   ZKleenePhilipper   )r   r,   r   r   r   r    )r   controlr2   mr#   r   r   r   test_matcher_match_one_plus   s    
re   c                s   t | }|dddiddigg t| dddgd  fd	d
| D }t|dksZt|d dksjt|d dkszt|d dkstdS )z;Test that patterns with "any token" {} work with operators.r=   r   r   rW   r`   helloworld)r'   c                s    g | ]\}}} || j qS r   )r0   )r(   _r5   r6   )r2   r   r   r*      s    z3test_matcher_any_token_operator.<locals>.<listcomp>r<   r   r   z
test hellor   ztest hello worldN)r   r   r   r   r    )r   r   r4   r   )r2   r   test_matcher_any_token_operator   s    ri   r	   c             C   s   t | }dd }tjd|dd ddidddiig}|d	|g t| dd
gd}||}t|dksjtt| ddgd}||}t|dkstd S )Nc             S   s
   | j dkS )N)appleZbanana)r0   )r8   r   r   r   <lambda>       z2test_matcher_extension_attribute.<locals>.<lambda>Zis_fruitT)getterforcer   anrh   ZHAVING_FRUITrj   )r'   r   aardvarkr   )r   r   set_extensionr   r   r   r    )r   r   Zget_is_fruitr#   r2   r4   r   r   r    test_matcher_extension_attribute   s    rr   c             C   s|   t | }ddddgiig}|d|g t| dddgd}||}t|dksRtt| d	gd}||}t|d
ksxtd S )Nr   INro   r?   A_OR_ANrj   )r'   r   rp   r   )r   r   r   r   r    )r   r   r#   r2   r4   r   r   r   test_matcher_set_value   s    ru   c             C   s   t | }dddgiddddig}|d|g t| d	ddgd
}||}t|dksZtt| ddgd
}||}t|dkstd S )Nrs   r?   the?)r   rW   r   ZhouseZ	DET_HOUSEZIn)r'   r   Zmyr   )r   r   r   r   r    )r   r   r#   r2   r4   r   r   r   test_matcher_set_value_operator   s    rx   c             C   sx   t | }dddiig}|d|g t| dddgd}||}t|d	ksNtt| d
gd}||}t|dksttd S )Nr   REGEXz(?:a|an)rt   ro   r?   hi)r'   r   byer   )r   r   r   r   r    )r   r   r#   r2   r4   r   r   r   test_matcher_regex   s    r|   c             C   sx   t | }dddiig}|d|g t| dddgd}||}t|d	ksNtt| d
gd}||}t|dksttd S )NZSHAPEry   z^[^x]+$Z	NON_ALPHAZ99problemsrZ   )r'   r   r{   r   )r   r   r   r   r    )r   r   r#   r2   r4   r   r   r   test_matcher_regex_shape   s    r~   zcmp, badz==r?   aaaz!=aaz>=z<=><c             C   s   t | }d|diig}|d|g t| dddgd}||}t|t|t| ksZtt| |d}||}t|dks~td S )	NZLENGTHr   ZLENGTH_COMPAREr?   r   r   )r'   r   )r   r   r   r   r    )r   cmpbadr   r#   r2   r4   r   r   r   test_matcher_compare_length  s    r   c             C   s   t | }dd }tjd|dd ddddd	giiig}|d
|g t| dddgd}||}t|dksntt| dgd}||}t|dkstd S )Nc             S   s   d t| jS )N )joinreversedr0   )r8   r   r   r   rk   %  rl   z7test_matcher_extension_set_membership.<locals>.<lambda>r   T)rm   rn   rh   rs   ZeybZihZREVERSEDrz   r{   rf   )r'   r   rp   r   )r   r   rq   r   r   r   r    )r   r   Zget_reversedr#   r2   r4   r   r   r   %test_matcher_extension_set_membership#  s    r   c          	   C   s:  dd }|  |}ddiddiddddd	d
ddddddd	|didg}ddiddiddddd	ddidd
ddd	ddidg}ddiddiddddd	ddiddddd	ddidg}ddiddiddddd	ddiddddd	ddidg}t| }t }|jd|g|d |jd|g|d |jd|g|d |jd|g|d ttdksltd}	ddddddddd g	}
d!ddd"d#d$d%d!dg	}ttj|		 |
|d&}t|}t|dkst|d d dddggkst|d d dddggkst|d d dddggks&t|j
dks6td S )'Nc             S   s   t td| S )Nzbrown|yellow|over)boolrecompilematch)r0   r   r   r   is_brown_yellow2  s    z+dependency_matcher.<locals>.is_brown_yellow	NODE_NAMEZfoxr   )ZSPECZPATTERNqr   )r   Z
NBOR_RELOPZ	NBOR_NAMEZquickZamod)r   DEPrTZjumpedrG   z>>ZbrownZNOMATCHr]   )r$   r^   pattern3pattern4rM   z,The quick brown fox jumped over the lazy foxr<   r   r   r   ZdetZnsubjROOTprepZpobj)headsdeps)Zadd_flagr   r   r   r   dependency_matcherr    r
   r,   r:   rC   )r   r   ZIS_BROWN_YELLOWr]   r^   r   r   r   r$   r0   r   r   r2   r4   r   r   r   r   1  sN    











r   c          	   C   s>   t | }ddiddig}tt |d| W d Q R X d S )NrA   rf   rg   r=   )r   pytestraises
ValueErrorr   )r   r   r#   r   r   r   test_matcher_basic_check  s    r   c          
   C   sp  t | dgd}d|_t | dgd}d|_t | dgd}t| }|dddigg || tt || W d Q R X tt || W d Q R X xldD ]d}t| }|d|digg || tt || W d Q R X tt || W d Q R X qW t| }|dddigg || || || t| }|dd	digg || || || d S )
NZTest)r'   Tr=   r   r?   )TAGPOSZLEMMAr   rA   )r   Z	is_parsed	is_taggedr   r   r   r   r   )r   Zdoc1Zdoc2Zdoc3r   attrr   r   r   test_attr_pipeline_checks  s<    
r   zpattern,textrV   TZIS_ASCIIZIS_DIGITrb   ZIS_LOWERZIS_UPPERAZIS_TITLEZAaaar[   rG   ZIS_SPACE
Z
IS_BRACKET[ZIS_QUOTErY   ZIS_LEFT_PUNCTz``ZIS_RIGHT_PUNCTz''ZIS_STOPrv   ZSPACYZLIKE_NUMZLIKE_URLzhttp://example.comZ
LIKE_EMAILzmail@example.comc             C   sT   t | }t| |dd}|d|g t|dks8t||}t|dksPtd S )Nr&   )r'   r   r   )r   r   r:   r   r   r    )r   r#   r0   r   r2   r4   r   r   r   $test_matcher_schema_token_attributes  s    r   c          	   C   sL   t | }tt |jdddiggg d W dQ R X |t| dgd dS )z0Test that on_match can only be None or callable.r=   rA   r   )r$   N)r'   )r   r   r   r   r   r   )r   r   r   r   r   test_matcher_valid_callback  s    "r   c             C   s\   t  }t| }ddig}|jd|g|d t| dddddgd	}||}|||d
| d S )Nr   r   r   )r$   ThisrI   r?   rG   )r'   r   )r   r   r   r   Zassert_called_once_with)r   mockr   r#   r2   r4   r   r   r   test_matcher_callback  s    
r   c             C   sn   d}t | j| d}|d d }|dd  }t| |dksBtt| |dksVtt| |dksjtd S )Nz%JavaScript is good but Java is better)r'   r<   rM   r   r   )r   r,   r:   r   r    )r   r0   r2   Zspan_jsZ	span_javar   r   r   test_matcher_span  s    r   c             C   st   t | }ddig}|d|g t| dddddgd	}||}t|d
ksNtd|ksZt|d d|ksptd S )NrW   rZ   r   r   rI   r?   r   rG   )r'   r   )r   r   r   r   r    r!   )r   r   r#   r2   r4   r   r   r   !test_matcher_remove_zero_operator  s    

r   c             C   s`   t | ddgd}d|d _d|d _d|_t| }|d	d
ddgg t||dks\td S )Nr?   r@   )r'   r   r   Br   Tr=   Crw   )r   rW   )r   Ztag_r   r   r   r   r    )r   r2   r   r   r   r   test_matcher_no_zero_length  s    

r   )4
__future__r   r   r   r   r   Zspacy.matcherr   r   Zspacy.tokensr   r   Zdoc.test_underscorer	   utilr
   Zfixturer   r%   r;   r>   rD   rH   rJ   rK   rN   rR   rT   rX   r_   ra   re   ri   markZusefixturesrr   ru   rx   r|   r~   Zparametrizer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s   	
	

N$

