B
    .(b)                 @   s   d dl mZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	 ddl
mZ dd	 Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zejd*d+d,gd-d. ZdS )/    )unicode_literalsN)Mock)PhraseMatcher)Doc   )get_docc             C   s:  t | dddddgd}t | ddgd}t| }|d|g t||dksPtt | dgd}t| }|d|g t||dkstt | ddgd}t| }|d	|g t||dkstt | dgd}t| }|d
|g t||dkstt | ddgd}t| }|d|g t||dks6td S )NIlikeGoogleNowbest)wordsCOMPANY   ZILIKEZBESTZNOWBEST)r   r   addlenAssertionError)en_vocabdocpatternmatcher r   V/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/matcher/test_phrase_matcher.pytest_matcher_phrase_matcher   s*    r   c             C   sl   t | }t|dkst|dt| dgdg t|dks@t|dt| dgdg t|dkshtd S )	Nr   TESTtest)r   r   TEST2test2r   )r   r   r   r   r   )r   r   r   r   r   test_phrase_matcher_length)   s    r   c             C   s<   t | }|dt| dgdg d|ks,td|ks8td S )Nr   r   )r   r   )r   r   r   r   )r   r   r   r   r   test_phrase_matcher_contains2   s    r   c             C   s  t | ddgd}t | dgdt | ddgdg}t| }|jd
|  t||dksXtt| }t }|jd|f|  t||dkst|jdkstt| }|d| t||dkstt| }t }|jd||d	 t||dkst|jdkstd S )Nab)r   OLD_APIr   ZOLD_API_CALLBACKZNEW_APIZNEW_API_CALLBACK)on_match)r"   N)r   r   r   r   r   r   Z
call_count)r   r   patternsr   r#   r   r   r   test_phrase_matcher_add_new_api9   s$    r%   c             C   s   t | }|dt| dgdg |dt| dgdg |dt| dgdg |dt| dgdg t| dddddgd}d|kstd|kstt||d	kstd S )
Nr   r	   )r   r   r
   r   r   r   r   )r   r   r   r   r   )r   r   r   r   r   r    test_phrase_matcher_repeated_addO   s    r&   c          	   C   sp  t | }|dt| dgdg |dt| dgdg t| dddddgd}d|ksZtd|ksftd	|ksrtt||d
kst|d d|kstd|kstd	|kstt||dkst|d d|kstd|kstd	|kstt||dksttt |d	 W d Q R X d|ks:td|ksHtd	|ksVtt||dksltd S )NTEST1r	   )r   r   r   r   r
   r   TEST3r   r   r   )	r   r   r   r   r   removepytestraisesKeyError)r   r   r   r   r   r   test_phrase_matcher_remove\   s0    

r-   c             C   s  t | }|dt| dgdg |dt| dgdg t| dddddgd}d|ksZtt|d	ksjtt||d	ks~t|d d|kstt|d
kstt||d
kst||d d | jd kst|d d|kstt|dkstt||dkstd S )Nr   r	   )r   r   r   r
   r   r   r   r   r   )r   r   r   r   r   r)   strings)r   r   r   r   r   r   +test_phrase_matcher_overlapping_with_removew   s     

r/   c             C   s   dddg}dddg}ddd	d
dddg}dddddddg}t | ||d}t| dd}|d|g t | ||d}||}t|dkst|d \}	}
}|	| jd kst|
dkst|dkstd S )Nr   r	   catsPRONVERBNOUNZYes,ZyouZhateZdogsZveryZmuchZINTJZPUNCTZADV)r   posPOS)attrr   r   r   r      )r   r   r   r   r   r.   )r   words1pos1words2pos2r   r   r   matchesZmatch_idstartendr   r   r    test_phrase_matcher_string_attrs   s    

r@   c       	      C   sz   dddg}dddg}ddd	g}d
d
d
g}t | ||d}t| dd}|d|g t | ||d}||}t|dksvtdS )zATest that token with the control codes as ORTH are *not* matched.r   r	   r0   r1   r2   r3   zmatcher:POS-PRONzmatcher:POS-VERBzmatcher:POS-NOUNX)r   r5   r6   )r7   r   r   N)r   r   r   r   r   )	r   r9   r:   r;   r<   r   r   r   r=   r   r   r   )test_phrase_matcher_string_attrs_negative   s    



rB   c             C   s   dddg}dddddd	g}t | |d
}t| dd}|d|g t | |d
}||}t|dksdt|d \}}}	|d \}
}}|| jd kst|
| jd kst|dkst|	dkst|dkst|dkstd S )NZHelloworld!ZNoproblemr4   heZsaid.)r   ZIS_PUNCT)r7   r   r   r   r         )r   r   r   r   r   r.   )r   r9   r;   r   r   r   r=   Z	match_id1Zstart1Zend1Z	match_id2Zstart2Zend2r   r   r   test_phrase_matcher_bool_attrs   s     
rJ   c          	   C   s   t | dgd}d|_t | dgd}d|_t | dgd}t| dd}tt |d|g W d Q R X tt |d|g W d Q R X td }|d|g |jrt	W d Q R X t| ddd	}td }|d
|g |jrt	W d Q R X d S )NTest)r   T)validater'   r   r(   r6   )r7   rL   ZTEST4)
r   	is_parsed	is_taggedr   r*   ZwarnsUserWarningr   listr   )r   doc1doc2doc3r   recordr   r   r   test_phrase_matcher_validation   s"    rU   c          	   C   s&   t t t| dd W d Q R X d S )NZUNSUPPORTED)r7   )r*   r+   
ValueErrorr   )r   r   r   r   test_attr_validation   s    rW   c          
   C   s@  t | dgd}d|_t | dgd}d|_t | dgd}t| dd}|d|g tt |d|g W d Q R X tt |d|g W d Q R X xnd	D ]f}t| |d}|d|g tt |d|g W d Q R X tt |d|g W d Q R X qW t| d
d}|d|g t| dd}|d|g d S )NrK   )r   TZDEP)r7   r'   r   r(   )ZTAGr6   ZLEMMAZORTHZTEXT)r   rM   rN   r   r   r*   r+   rV   )r   rQ   rR   rS   r   r7   r   r   r   test_attr_pipeline_checks   s,    
rX   c             C   sb   t  }t| dddddgd}t| ddgd}t| }|jd|g|d ||}|||d	| d S )
Nr   r	   r
   r   r   )r   r   )r#   r   )r   r   r   r   Zassert_called_once_with)r   mockr   r   r   r=   r   r   r   test_phrase_matcher_callback   s    rZ   c             C   sn   t | }t| dgd}t| ddgd}t| dddgd}t| ddddgd}|d||||g |d d S )Nthis)r   isr    wordZTHIS)r   r   r   r)   )r   r   Zpattern1Zpattern2Zpattern3Zpattern4r   r   r   /test_phrase_matcher_remove_overlapping_patterns   s    r^   c          	   C   s>   t | }t| ddgd}tt |d| W d Q R X d S )NZhellorC   )r   r   )r   r   r*   r+   rV   r   )r   r   r   r   r   r   test_phrase_matcher_basic_check  s    r_   c             C   s   t | }t }|dt| dgdg |jdt| dgdg|d t| ddd	d
ddgd}t|dksjtt|}t|}||}||}t|t|kst||kst|	 d \}}	}
}t
|
dtstd S )Nr   r   )r   r   r   )r#   ZtheseZaretests:r   r   )r   r   r   r   r   r   srslyZpickle_dumpsZpickle_loads
__reduce__
isinstanceget)r   r   rY   r   r!   Zmatcher_unpickledr=   Zmatches_unpickledZvocabZdocs	callbacksr7   r   r   r   test_phrase_matcher_pickle  s    

rg   r7   Z
SENT_STARTZIS_SENT_STARTc             C   s   t | |d}d S )N)r7   )r   )r   r7   r   r   r   r   test_phrase_matcher_sent_start'  s    rh   ) 
__future__r   r*   rb   rY   r   Zspacy.matcherr   Zspacy.tokensr   utilr   r   r   r   r%   r&   r-   r/   r@   rB   rJ   rU   rW   rX   rZ   r^   r_   rg   markZparametrizerh   r   r   r   r   <module>   s0   	

