B
    .(ไbึ  ใ               @   sB  d dl mZ d dlZd dlmZ d dlmZ d dlmZ dd Z	ej
 dd	gกd
d Zdd Zdd Zdd Zej
 ddddddgกdd Zej
 ddgกdd Zej
 ddddgกdd  Zd!d" Zej
 d#d$gกd%d& Zd'd( Zej
 d)d	d*d+id*d,igfgกd-d. Zej
 d)d	d+d/d0d*d,igfgกd1d2 ZdS )3้    )ฺunicode_literalsN)ฺVocab)ฺ	Tokenizer)ฺensure_pathc             C   s   | d}t |dkstd S )Nฺ r   )ฺlenฺAssertionError)ฺ	tokenizerฺtokensฉ r   ๚S/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/tokenizer/test_tokenizer.pyฺtest_tokenizer_handles_no_word
   s    r   ฺtextZloremc             C   s   | |}|d j |kstd S )Nr   )r   r   )r	   r   r
   r   r   r   ฺ"test_tokenizer_handles_single_word   s    r   c             C   sh   d}| |}t |dkst|d jdks.t|d jdks@t|d jdksRt|d jdksdtd S )	NzLorem, ipsum.้   r   ฺLorem้   ๚,้   Zipsum)r   r   r   )r	   r   r
   r   r   r   ฺtest_tokenizer_handles_punct   s    r   c             C   s    d}| |}t |dkstd S )NzLorem, (ipsum).้   )r   r   )r	   r   r
   r   r   r   ฺ#test_tokenizer_handles_punct_braces   s    r   c             C   sZ   ddg}d}| |}|d j |krVt|dks2t|d jdksDt|d jdksVtd S )	NฺhuZbnzLorem ipsum: 1984.r   ้   r   ้   Z1984)Zlang_r   r   r   )r	   ฺ
exceptionsr   r
   r   r   r   ฺtest_tokenizer_handles_digits%   s    r   z
google.comz
python.orgzspacy.iozexplosion.aizhttp://www.google.comc             C   s   | |}t |dkstd S )Nr   )r   r   )r	   r   r
   r   r   r   ฺtest_tokenizer_keep_urls0   s    r   zNASDAQ:GOOGc             C   s   | |}t |dkstd S )Nr   )r   r   )r	   r   r
   r   r   r   ฺtest_tokenizer_colons9   s    r   zhello123@example.comzhi+there@gmail.itzmatt@explosion.aic             C   s   | |}t |dkstd S )Nr   )r   r   )r	   r   r
   r   r   r   ฺtest_tokenizer_keeps_email?   s    r   c             C   s    d}| |}t |dkstd S )Naำ  Lorem ipsum dolor sit amet, consectetur adipiscing elit

Cras egestas orci non porttitor maximus.
Maecenas quis odio id dolor rhoncus dignissim. Curabitur sed velit at orci ultrices sagittis. Nulla commodo euismod arcu eget vulputate.

Phasellus tincidunt, augue quis porta finibus, massa sapien consectetur augue, non lacinia enim nibh eget ipsum. Vestibulum in bibendum mauris.

"Nullam porta fringilla enim, a dictum orci consequat in." Mauris nec malesuada justo.r   )r   r   )r	   r   r
   r   r   r   ฺ test_tokenizer_handles_long_textG   s    r    ฺ	file_namezsun.txtc             C   sL   t tj| }|jddd ก }t|dks0t| |}t|dksHtd S )Nฺrฺutf8)ฺencodingr   ้d   )r   ฺ__file__ฺparentฺopenฺreadr   r   )r	   r!   ฺlocr   r
   r   r   r   ฺ$test_tokenizer_handle_text_from_fileU   s
    r+   c             C   s@   d}d}| |}| |}|d j dks*t|d j dks<td S )Nz2Lorem dolor sit amet, consectetur adipiscing elit.z8Lorem ipsum dolor sit amet, consectetur adipiscing elit.r   r   )r   r   )r	   Ztext1Ztext2Ztokens1Ztokens2r   r   r   ฺ(test_tokenizer_suspected_freeing_strings^   s    r,   ztext,tokensฺorthฺloฺremc             C   sL   |   ||ก | |}|d j|d d ks.t|d j|d d ksHtd S )Nr   r-   r   )ฺadd_special_caser   r   )r	   r   r
   ฺdocr   r   r   ฺtest_tokenizer_add_special_caseg   s    r2   ฺNN)r-   ฺtagc             C   s   t dddiid}t|i d d d }| | |ก || }|d j|d d ksPt|d j|d d ksjt|d jdks|t|d j|d d kstd S )	Nr3   ฺposZNOUN)Ztag_mapr   r-   r4   r   )r   r   r0   r   r   Ztag_Zpos_)r   r
   Zvocabr	   r1   r   r   r   ฺ#test_tokenizer_add_special_case_tago   s    r6   )ฺ
__future__r   ZpytestZspacy.vocabr   Zspacy.tokenizerr   Z
spacy.utilr   r   ฺmarkZparametrizer   r   r   r   r   r   r   r    r+   r,   r2   r6   r   r   r   r   ฺ<module>   s*   
		(