B
    .(b                 @   sd   d dl mZ d dlZd dlmZ d dlmZmZmZ d dl	m
Z
 dd Zdd	 Zd
d Zdd ZdS )    )unicode_literalsN)English)conllu2jsoniob2jsonconll_ner2json)	make_docsc              C   sR  ddddg} d | }t|dd}t|dks2t|d d	 dksFtt|d d
 dks^tt|d d
 d d dks~t|d d
 d d d }t|d dkst|d }dd |D ddddgkstdd |D ddddgkstdd |D ddddgkstdd |D ddddgks.td d |D d!d"d#d!gksNtd S )$NzG1	Dommer	dommer	NOUN	_	Definite=Ind|Gender=Masc|Number=Sing	2	appos	_	Oz/2	Finn	Finn	PROPN	_	Gender=Masc	4	nsubj	_	B-PERz.3	Eilertsen	Eilertsen	PROPN	_	_	2	name	_	I-PERuC   4	avstår	avstå	VERB	_	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	O
   )n_sentsr   id
paragraphs	sentencestokens   c             S   s   g | ]}|d  qS )orth ).0tr   r   C/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/test_cli.py
<listcomp>   s    z3test_cli_converters_conllu2json.<locals>.<listcomp>ZDommerZFinnZ	Eilertsenu   avstårc             S   s   g | ]}|d  qS )tagr   )r   r   r   r   r   r      s    ZNOUNZPROPNZVERBc             S   s   g | ]}|d  qS )headr   )r   r   r   r   r   r      s       c             S   s   g | ]}|d  qS )depr   )r   r   r   r   r   r      s    ZapposZnsubjnameROOTc             S   s   g | ]}|d  qS )nerr   )r   r   r   r   r   r       s    OzB-PERzL-PER)joinr   lenAssertionError)lines
input_data	convertedsentr   r   r   r   test_cli_converters_conllu2json   s$    
   r&   c           
   C   s  ddddg} d | }t|dd}t|dks2t|d	 d
 d	ksFtt|d	 d dks^tt|d	 d d	 d dks~txtd	dD ]}|d	 d d	 d | }t|d dkst|d }dd |D ddddddddgkstdd |D ddddddddgkstqW d S ) NzAI|O like|O London|I-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|OzAI|O like|O London|B-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|Oz^I|PRP|O like|VBP|O London|NNP|I-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|Oz^I|PRP|O like|VBP|O London|NNP|B-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|Or   
   )r
   r	   r   r   r   r   r   r      c             S   s   g | ]}|d  qS )r   r   )r   r   r   r   r   r   5   s    z0test_cli_converters_iob2json.<locals>.<listcomp>IlikeLondonandNewYorkCity.c             S   s   g | ]}|d  qS )r   r   )r   r   r   r   r   r   6   s    r   zU-GPEzB-GPEzI-GPEzL-GPE)r   r   r    r!   range)r"   r#   r$   ir%   r   r   r   r   test_cli_converters_iob2json#   s     
 &r3   c           .   C   sv  ddddddddd	d
ddddddddddddddddddddddddd d!d"dd#d$d%d&d'd(d)d*g.} d+ | }t|d,d-}t| t|d.kst|d/ d0 d/kstt|d/ d1 d.kstt|d/ d1 d/ d2 d3kstxtd/d3D ]}|d/ d1 d/ d2 | }t|d4 d5kst|d4 }d6d7 |D d8d9d:d;d<d=d>d?gksHtd@d7 |D dAdAdBdAdCdDdEdAgkstqW d S )FNz-DOCSTART- -X- O O zI	Ozlike	OzLondon	B-GPEzand	Oz	New	B-GPEz
York	I-GPEz
City	I-GPEz.	OzI Ozlike OzLondon B-GPEzand Oz	New B-GPEz
York I-GPEz
City I-GPEz. OzI PRP Oz
like VBP OzLondon NNP B-GPEzand CC OzNew NNP B-GPEzYork NNP I-GPEzCity NNP I-GPEz. . Oz	I PRP _ Ozlike VBP _ OzLondon NNP _ B-GPEz
and CC _ OzNew NNP _ B-GPEzYork NNP _ I-GPEzCity NNP _ I-GPEz. . _ Oz	I	PRP	_	Ozlike	VBP	_	OzLondon	NNP	_	B-GPEz
and	CC	_	OzNew	NNP	_	B-GPEzYork	NNP	_	I-GPEzCity	NNP	_	I-GPEz.	.	_	Or   r'   )r
   r	   r   r   r   r      r   r(   c             S   s   g | ]}|d  qS )r   r   )r   r   r   r   r   r   w   s    z6test_cli_converters_conll_ner2json.<locals>.<listcomp>r)   r*   r+   r,   r-   r.   r/   r0   c             S   s   g | ]}|d  qS )r   r   )r   r   r   r   r   r   x   s    r   zU-GPEzB-GPEzI-GPEzL-GPE)r   r   printr    r!   r1   )r"   r#   r$   r2   r%   r   r   r   r   "test_cli_converters_conll_ner2json:   sv    
 (r7   c           	   C   s  t  } ddi}t| |gdd\}}t|dks2t|dks>tdddgi}t| |gdd\}}t|dksnt|dksztd}tt t| |gdd W d Q R X d	d
i}tt t| |gdd W d Q R X ddi}t| |gdd\}}t|dkst|dkstdg i}t| |gdd\}}t|dks:t|dksHtddi}	t| |	gdd\}}t|dksvt|dkstddi}
t| |
gdd\}}t|dkst|dkstd S )Ntextz	Some textr	   r'   r   r   ZSomed   invalidzDoes not matterr4   zThis text is not long enough   z4This text contains way too much tokens for this testr5   )r   r   r    r!   pytestZraises	TypeError
ValueError)ZnlpZvalid_jsonl_textZdocsZ
skip_countZvalid_jsonl_tokensZinvalid_jsonl_typeZinvalid_jsonl_keyZempty_jsonl_textZempty_jsonl_tokensZtoo_short_jsonlZtoo_long_jsonlr   r   r   test_pretrain_make_docs|   s>    r?   )
__future__r   r<   Zspacy.lang.enr   Zspacy.cli.convertersr   r   r   Zspacy.cli.pretrainr   r&   r3   r7   r?   r   r   r   r   <module>   s   B