B
    .(bz                 @   sj   d dl mZ d dlZd dlZd dlmZ d dlmZ ddlm	Z	 ej
dd Zd	d
 Zdd Zdd ZdS )    )unicode_literalsN)Language)	Tokenizer   )make_tempdirc               C   s    ddddddddddd d	d
S )Nzname-in-fixturezversion-in-fixturezdescription-in-fixturezauthor-in-fixturezemail-in-fixturezurl-in-fixturezlicense-in-fixturer   )widthvectorskeysname)r
   versiondescriptionauthoremailurllicenser    r   r   r   \/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/serialize/test_serialize_language.py	meta_data   s    r   c          	   C   sF   t | d}t }|| t  |}W d Q R X |j|jksBtd S )N)meta)r   r   to_diskZ	from_diskr   AssertionError)r   languagedZnew_languager   r   r   !test_serialize_language_meta_disk   s
    

r   c           	      s^   t dt dt d  fdd} t }| ||_t }|| W dQ R X dS )zTest that serialization with custom tokenizer works without token_match.
    See: https://support.prodi.gy/t/how-to-save-a-custom-tokenizer/661/2
    z$1/|2/|:[0-9][0-9][A-K]:|:[0-9][0-9]: z[~]c                s   t | ji jj jdS )N)Zprefix_searchZsuffix_searchZinfix_finditer)r   Zvocabsearchfinditer)nlp)infix_re	prefix_re	suffix_rer   r   custom_tokenizer*   s    z>test_serialize_with_custom_tokenizer.<locals>.custom_tokenizerN)recompiler   	tokenizerr   r   )r!   r   r   r   )r   r   r    r   $test_serialize_with_custom_tokenizer"   s    


	
r%   c          	   C   s   d}t | d}|jd |ks tt  | }|jd |ksBtt  j| dgd}|jd |krjtt  |jdgd}|jd |krttt |jdd W d Q R X tt t  j| dd W d Q R X d S )Nzname-in-fixture)r   r
   r   )excludeF)r   r   r   
from_bytesto_bytespytestZraises
ValueError)r   r
   r   Znew_nlpr   r   r   test_serialize_language_exclude9   s    
r+   )
__future__r   r)   r"   Zspacy.languager   Zspacy.tokenizerr   utilr   Zfixturer   r   r%   r+   r   r   r   r   <module>   s   