B
    .(äbl	  ã               @   s˜   d dl mZ d dlZd dlZd dlmZ d dlmZ ddlm	Z	m
Z
 dd„ Zd	d
„ Zejjddej ddddg¡dd„ ƒƒZejjdddd„ ƒZdS )é    )Úunicode_literalsN)Úget_lang_class)Ú	Tokenizeré   )Úmake_tempdirÚassert_packed_msg_equalc             C   s   t dƒj ¡ }| | ¡ |S )NÚen)r   ÚDefaultsÚcreate_tokenizerÚ
from_bytes)ÚbÚtok© r   ú]/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/serialize/test_serialize_tokenizer.pyÚload_tokenizer   s    
r   c             C   sè   t | |jd}| ¡ }t | ƒ |¡ tdƒj ¡ }t d¡j	|_
|ji ksNt‚|j
dk	s\t‚|jdk	sjt‚| |¡ |ji ks‚t‚|j
dkst‚|jdksžt‚t | dddiddigid	}i |_| ¡ }t | ƒ |¡}|ji ksät‚dS )
z–Test that custom tokenizer with not all functions defined or empty
    properties can be serialized and deserialized correctly (see #2494,
    #4991).)Úsuffix_searchr   ÚtestNzABC.ZORTHÚABCÚ.)Úrules)r   r   Úto_bytesr   r   r	   r
   ÚreÚcompileÚmatchZtoken_matchr   ÚAssertionErrorZ	url_match)Zen_vocabÚen_tokenizerÚ	tokenizerZtokenizer_bytesZtokenizer_reloadedr   r   r   Útest_serialize_custom_tokenizer   s"    
r   z%Currently unreliable across platforms)ÚreasonÚtextu   IðŸ’œyouu	   theyâ€™reu   â€œhelloâ€c             C   sj   | }t | ¡ ƒ}t| ¡ | ¡ ƒ | ¡ | ¡ ks6t‚||ƒ}||ƒ}dd„ |D ƒdd„ |D ƒksft‚d S )Nc             S   s   g | ]
}|j ‘qS r   )r   )Ú.0Útokenr   r   r   ú
<listcomp>5   s    z<test_serialize_tokenizer_roundtrip_bytes.<locals>.<listcomp>)r   r   r   r   )r   r   r   Znew_tokenizerZdoc1Zdoc2r   r   r   Ú(test_serialize_tokenizer_roundtrip_bytes,   s    r#   c          	   C   sJ   | }t ƒ 6}|d }| |¡ |  |¡}| ¡ | ¡ ks<t‚W d Q R X d S )Nr   )r   Zto_diskZ	from_diskr   r   )r   r   ÚdÚ	file_pathZtokenizer_dr   r   r   Ú'test_serialize_tokenizer_roundtrip_disk8   s    

r&   )Ú
__future__r   Zpytestr   Z
spacy.utilr   Zspacy.tokenizerr   Úutilr   r   r   r   ÚmarkÚskipZparametrizer#   r&   r   r   r   r   Ú<module>   s   