B
    .(b              	   @   s  d dl mZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZmZmZmZ dd Zejjdddd Zdd Zdd Zdd Z dd Z!dd Z"dd Z#ej$ddd d!d"d#d$gd%d& Z%d'd( Z&d)d* Z'dS )+    )unicode_literalsN)Doc)Vocab)English)	LEX_ATTRS)Matcher)	Tokenizer)
Lemmatizer)Lookups)ORTHLEMMAPOSVERBVerbForm_partc              C   s   d} t j }|| }ddd |D ks,tddd |D ksBt|dtdig || }ddd |D ksrtddd |D kstt j }|dtdig || }dd	d |D kstdd
d |D kstdS )z>Test special-case works after tokenizing. Was caching problem.zTI like _MATH_ even _MATH_ when _MATH_, except when _MATH_ is _MATH_! but not _MATH_.ZMATHc             S   s   g | ]
}|j qS  )text).0wr   r   Y/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/regression/test_issue1001-1500.py
<listcomp>   s    z"test_issue1061.<locals>.<listcomp>Z_MATH_c             S   s   g | ]
}|j qS r   )r   )r   r   r   r   r   r      s    c             S   s   g | ]
}|j qS r   )r   )r   r   r   r   r   r      s    c             S   s   g | ]
}|j qS r   )r   )r   r   r   r   r   r      s    c             S   s   g | ]
}|j qS r   )r   )r   r   r   r   r   r   "   s    c             S   s   g | ]
}|j qS r   )r   )r   r   r   r   r   r   #   s    N)r   ZDefaultsZcreate_tokenizerAssertionErroradd_special_caser   )r   	tokenizerdocr   r   r   test_issue1061   s    

r   zdg is split of as a unit, as the suffix regular expression can not look back further (variable-width))reasonc              C   s   t  } d}| |}t|dks"t|d jdks4t|d jdksFt|d jdksXt|d	 jd
ksjt|d jdks|tdS )z@Test that g is not split of if preceded by a number and a letterz
e2g 2g 52g   r   Ze2g   2   g   Z52   N)r   lenr   r   )nlpZ	testwordsr   r   r   r   test_issue1235&   s    r%   c              C   s\   t  } | d}t|dkstt| ddg}t|d dksDtt|d dksXtd S )N r   hellor   )r   r#   r   listpipe)r$   r   Zdocsr   r   r   test_issue12426   s    r*   c              C   sv   t dtdtdig} t }|jd|  dd |dD }|ddddgksLtd	d |dD }|ddddgksrtd
S )zTest cached special cases.ZreimburZ	reimburser   c             S   s   g | ]
}|j qS r   )lemma_)r   r   r   r   r   r   D   s    z"test_issue1250.<locals>.<listcomp>zreimbur, reimbur...,z...c             S   s   g | ]
}|j qS r   )r+   )r   r   r   r   r   r   F   s    N)r   r   r   r   r   r   r   )Zspecial_caser$   Zlemmasr   r   r   test_issue1250?   s    r-   c              C   sT   t t dddgd} t t dddgd}| d |d ks<t| d |d krPtdS )z#Test that tokens compare correctly.abc)wordser   N)r   r   r   )Zdoc1Zdoc2r   r   r   test_issue1257J   s    r3   c           	   C   s   t t dddgd} tt | d ds2tW dQ R X | d djdksTttt | d	 dsrtW dQ R X | d djdkstdS )
zBTest that token.nbor() raises IndexError for out-of-bounds access.01r   )r1   r   Nr   r   )r   r   pytestZraises
IndexErrorZnborr   r   )r   r   r   r   test_issue1375R   s    r9   c              C   s   dt ttdii} t }|dddi |ddddii |d	dd
dggi t|}t|| d}t|dgd}d|d _|d j	dkst
|d jdkst
d S )NZVBGTZlemma_indexZverb)copeZcopZ	lemma_excZcoping)r:   Zlemma_rulesZingr&   )
lemmatizertag_map)r1   r   r:   )r   r   r   r
   Z	add_tabler	   r   r   Ztag_r   r   r+   )r<   Zlookupsr;   vocabr   r   r   r   test_issue1387]   s    
r>   c              C   st   ddidddg} t td}t|ddgd}t|dgd}t|}|d	| g ||}|s`t||}|sptd
S )z=Test matches occur when optional element at end of short doc.r   ZHelloT?)ZIS_ALPHAOP)Zlex_attr_gettersZWorld)r1   Z	MyMatcherN)r   r   r   r   addr   )patternr=   Zhello_worldr'   matchermatchesr   r   r   test_issue1434k   s    
rE   zstring,start,end)r.   r   r   )za br   r   )za cr   r   )za b cr   r   )za b b cr   r!   )za b br   r!   c             C   s   ddidddg}t t }|d|g tt |  d}||}|dksT|dkr`|g ks`t|d	 d
 |kstt|d	 d |kstdS )z5Test matcher works when patterns end with * operator.r   r.   r/   *)r   r@   ZTSTEND)r1   Nr6   r   r   )r   r   rA   r   splitr   )stringstartendrB   rC   r   rD   r   r   r   test_issue1450y   s    
rK   c                 sn   t dt dt d t d fdd} t }| ||_|d}x|D ]}|jsXtqXW d S )Nz[\[\("']z[\]\)"']z[-~\.]z
^https?://c                s   t | ji jj jjdS )N)Zprefix_searchZsuffix_searchinfix_finditerZtoken_match)r   r=   searchfinditermatch)r$   )infix_re	prefix_resimple_url_re	suffix_rer   r   my_tokenizer   s    z$test_issue1488.<locals>.my_tokenizerzThis is a test.)recompiler   r   r   r   )rT   r$   r   tokenr   )rP   rQ   rR   rS   r   test_issue1488   s    






rX   c                 s   t d ddddddgfddd	gfd
dddddgfg}  fdd}t }|||_x*| D ]"\}}dd ||D |ksZtqZW d S )Nz[^a-z]ztoken 123testrW   r5   r   3testztoken 1testZ1testzhello...testr'   .c                s   t | ji  jdS )N)rL   )r   r=   rN   )r$   )rP   r   r   new_tokenizer   s    z%test_issue1494.<locals>.new_tokenizerc             S   s   g | ]
}|j qS r   )r   )r   rW   r   r   r   r      s    z"test_issue1494.<locals>.<listcomp>)rU   rV   r   r   r   )Z
test_casesr\   r$   r   expectedr   )rP   r   test_issue1494   s    


r^   )(
__future__r   r7   rU   Zspacy.tokensr   Zspacy.vocabr   Zspacy.lang.enr   Zspacy.lang.lex_attrsr   Zspacy.matcherr   Zspacy.tokenizerr   Zspacy.lemmatizerr	   Zspacy.lookupsr
   Zspacy.symbolsr   r   r   r   r   r   markZxfailr%   r*   r-   r3   r9   r>   rE   ZparametrizerK   rX   r^   r   r   r   r   <module>   s<   	