B
    .(b                 @   s   d dl mZ d dlZd dlmZ dZed  ddddd	d
dddddddd
ddddd	ddgfgZed  ddddd	d
dddddddd
ddddddgfgZej	dedd Z
ej	dedd Zej	dedd  Zd!d" Zd#d$ ZdS )%    )unicode_literalsN)_get_pkuseg_trie_data)ul   作为语言而言，为世界使用人数最多的语言，目前世界有五分之一人口做为母语。u   作为u   语言u   而言u   ，u   为u   世界u   使用u   人u	   数最多u   的u   目前u   有u   五分之一u   人口u   做u   母语u   。u   人数u   最多u   做为textc             C   s&   dd | |D }|t |ks"td S )Nc             S   s   g | ]
}|j qS  )r   ).0tokenr   r   Q/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/lang/zh/test_tokenizer.py
<listcomp>   s    z*test_zh_tokenizer_char.<locals>.<listcomp>)listAssertionError)zh_tokenizer_charr   tokensr   r   r   test_zh_tokenizer_char   s    r   ztext,expected_tokensc             C   s"   dd | |D }||kst d S )Nc             S   s   g | ]
}|j qS r   )r   )r   r   r   r   r   r	   !   s    z+test_zh_tokenizer_jieba.<locals>.<listcomp>)r   )Zzh_tokenizer_jiebar   expected_tokensr   r   r   r   test_zh_tokenizer_jieba   s    r   c             C   s"   dd | |D }||kst d S )Nc             S   s   g | ]
}|j qS r   )r   )r   r   r   r   r   r	   '   s    z,test_zh_tokenizer_pkuseg.<locals>.<listcomp>)r   )zh_tokenizer_pkusegr   r   r   r   r   r   test_zh_tokenizer_pkuseg%   s    r   c             C   sp   t | jjj}| dg t | jjj}t|t|d ks@t| jg dd t | jjj}t|dksltd S )NZnonsense_asdf   T)resetr   )r   Z
pkuseg_segZpreprocesserZtrieZpkuseg_update_user_dictlenr   )r   Z	user_dictZupdated_user_dictZreset_user_dictr   r   r   "test_zh_tokenizer_pkuseg_user_dict+   s    r   c             C   s   | d}|d j dkstd S )NzI   like cheese.r   z  )Zorth_r   )r   r   r   r   r   test_extra_spaces;   s    r   )
__future__r   ZpytestZspacy.lang.zhr   ZTEXTSZJIEBA_TOKENIZER_TESTSZPKUSEG_TOKENIZER_TESTSmarkZparametrizer   r   r   r   r   r   r   r   r   <module>   s    