B
    .(ไb  ใ               @   s(  d dl mZ d dlZd dlmZ d dlmZ ddddgZd	d
ddgZddddgZ	ej
 ddddgกdd Zej
 deกej
 ddgกdd Zej
 deกej
 ddgกdd Zej
 deกej
 ddgกej
 ddgกdd Zej
 deกej
 ddgกej
 ddgกd d! Zej
 deกej
 ddgกd"d# Zej
 deกej
 ddgกd$d% Zej
 dd&gกd'd( Zej
jej
 dd)gกd*d+ Zej
 d,e	กej
 ddgกd-d. Zej
 d,e	กej
 d/d0gกej
 ddgกd1d2 Zej
 d3d4gกd5d6 Zd7d8 ZdS )9้    )ฺunicode_literalsN)ฺcompile_prefix_regex)ฺTOKENIZER_PREFIXES๚(๚[ฺ{ฺ*๚)๚]ฺ})r   r	   )r   r
   )r   r   )r   r   ฺtextz((๚<c             C   s    | |}t |t |kstd S )N)ฺlenฺAssertionError)ฺen_tokenizerr   ฺtokensฉ r   ๚M/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/lang/en/test_punct.pyฺ$test_en_tokenizer_handles_only_punct   s    r   ฺpunctZHelloc             C   sD   | || }t |dkst|d j|ks.t|d j|ks@td S )N้   r   ้   )r   r   r   )r   r   r   r   r   r   r   ฺ#test_en_tokenizer_splits_open_punct   s    r   c             C   sD   | || }t |dkst|d j|ks.t|d j|ks@td S )Nr   r   r   )r   r   r   )r   r   r   r   r   r   r   ฺ$test_en_tokenizer_splits_close_punct   s    r   ฺ	punct_add๚`c             C   sZ   | || | }t |dks t|d j|ks2t|d j|ksDt|d j|ksVtd S )N้   r   r   r   )r   r   r   )r   r   r   r   r   r   r   r   ฺ,test_en_tokenizer_splits_two_diff_open_punct&   s
    r   ๚'c             C   sZ   | || | }t |dks t|d j|ks2t|d j|ksDt|d j|ksVtd S )Nr   r   r   r   )r   r   r   )r   r   r   r   r   r   r   r   ฺ-test_en_tokenizer_splits_two_diff_close_punct1   s
    r   c             C   sL   | || | | }t |dks$t|d j|ks6t|d j|ksHtd S )N้   r   r   )r   r   r   )r   r   r   r   r   r   r   ฺ(test_en_tokenizer_splits_same_open_punct<   s    r!   c             C   sL   | || | | }t |dks$t|d j|ks6t|d j|ksHtd S )Nr    r   r   )r   r   r   )r   r   r   r   r   r   r   ฺ)test_en_tokenizer_splits_same_close_punctE   s    r"   z'Thec             C   s.   | |}t |dkst|d jdks*td S )Nr   r   r   )r   r   r   )r   r   r   r   r   r   ฺ)test_en_tokenizer_splits_open_appostropheN   s    r#   zHello''c             C   s4   | |}t |dkst| d}t |dks0td S )Nr   z''r   )r   r   )r   r   r   Ztokens_punctr   r   r   ฺ)test_en_tokenizer_splits_double_end_quoteU   s    r$   zpunct_open,punct_closec             C   sZ   | || | }t |dks t|d j|ks2t|d j|ksDt|d j|ksVtd S )Nr   r   r   r   )r   r   r   )r   ฺ
punct_openฺpunct_closer   r   r   r   r   ฺ)test_en_tokenizer_splits_open_close_punct^   s
    r'   zpunct_open2,punct_close2)r   r   c             C   s   | || | | | }t |dks(t|d j|ks:t|d j|ksLt|d j|ks^t|d j|kspt|d j|kstd S )N้   r   r   r   r   r    )r   r   r   )r   r%   r&   Zpunct_open2Zpunct_close2r   r   r   r   r   ฺ test_en_tokenizer_two_diff_punctj   s    r)   z
text,punct)z(can'tr   c             C   s&   t tj}|| }| ก |ks"td S )N)r   r   ฺsearchฺgroupr   )r   r   Zen_search_prefixesฺmatchr   r   r   ฺ(test_en_tokenizer_splits_pre_punct_regexy   s    
r-   c             C   s*   d}| |}|t |d  jdks&td S )Nz*(And a 6a.m. run through Washington Park).r   ฺ.)r   r   r   )r   r   r   r   r   r   ฺ'test_en_tokenizer_splits_bracket_period   s    r/   )ฺ
__future__r   ZpytestZ
spacy.utilr   Zspacy.lang.punctuationr   Z
PUNCT_OPENZPUNCT_CLOSEZPUNCT_PAIREDฺmarkZparametrizer   r   r   r   r   r!   r"   r#   Zxfailr$   r'   r)   r-   r/   r   r   r   r   ฺ<module>   s>   		