B
    .(bt                 @   s   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 ejdd Zejd	d
 Zejdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zejd#d$d%gd&d' ZdS )(    )unicode_literalsN)Span)Language)EntityRuler)MatchPatternErrorc               C   s   t  S )N)r    r   r   U/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/pipeline/test_entity_ruler.pynlp   s    r	   c            	   C   sR   ddddddiddigddddigddd	d
dgdddddddddgS )NHELLOzhello world)labelpatternBYELOWERZbyeORTHCOMPLEXZfoo*)r   OPTECH_ORGZApplea1)r   r   id	Microsofta2r   r   r   r   r   patterns   s    
r   c              C   s   dd } | S )Nc             S   s    t | dd| jjd dg| _| S )Nr      ORG)r   )r   Zvocabstringsents)docr   r   r   add_ent_component   s    z"add_ent.<locals>.add_ent_componentr   )r   r   r   r   add_ent   s    r   c             C   s   t | |d}t|t|ks tt|jdks2td|ks>td|ksJt| | | d}t|jdksnt|jd jdkst|jd jdkstd S )	N)r      r
   r   zhello world bye bye   r      )r   lenAssertionErrorlabelsadd_piper   label_)r	   r   rulerr   r   r   r   test_entity_ruler_init%   s    
r)   c             C   sf   t | |d}| | | | | d}t|jdks:t|jd jdksNt|jd jdksbtd S )N)r   zOH HELLO WORLD bye byer!   r   r   r"   r   )r   r&   r#   r   r$   r'   )r	   r   r   r(   r   r   r   r   test_entity_ruler_existing2   s    

r*   c             C   s|   t | |dd}| | | | | d}t|jdks<t|jd jdksPt|jd jdksdt|jd jdksxtd S )	NT)r   overwrite_entszOH HELLO WORLD bye byer!   r   r
   r"   r   )r   r&   r#   r   r$   r'   text)r	   r   r   r(   r   r   r   r   $test_entity_ruler_existing_overwrite<   s    

r-   c             C   s   t | |dd}| | | | | d}t|jdks<t|jd jdksPt|jd jdksdtt|jd dksztt|jd dkstd S )	NT)r   r+   zfoo foo bye byer!   r   r   r"   r   )r   r&   r#   r   r$   r'   )r	   r   r   r(   r   r   r   r   "test_entity_ruler_existing_complexG   s    

r.   c             C   s^   t | |dd}| | | d}t|jdks2t|jd jdksFt|jd jdksZtd S )NT)r   r+   zApple is a technology companyr"   r   r   r   )r   r&   r#   r   r$   r'   ent_id_)r	   r   r(   r   r   r   r   test_entity_ruler_entity_idS   s    
r0   c             C   sn   t | |ddd}d|jkst| | | d}t|jdksBt|jd jdksVt|jd jd	ksjtd S )
NTz**)r   r+   Z
ent_id_sepzTECH_ORG**a1zApple is a technology companyr"   r   r   r   )r   Zphrase_patternsr$   r&   r#   r   r'   r/   )r	   r   r(   r   r   r   r    test_entity_ruler_cfg_ent_id_sep\   s    
r1   c             C   s   t | |d}t|t|ks tt|jdks2t| }t | }t|dksRtt|jdksdt||}t|t|kstt|jdkstt|jt|jkstx|jD ]}||jkstqW t|jt|jkstd S )N)r   r    r   )r   r#   r$   r%   to_bytes
from_bytesr   sorted)r	   r   r(   ruler_bytes	new_rulerr   r   r   r   !test_entity_ruler_serialize_bytesf   s    
r7   c             C   s   t | d|d}t|t|ks"tt|jdks4t| }t | }t|dksTtt|jdksft|jd kstt||}t|t|kstt|jdkst|jdkstd S )Nr   )phrase_matcher_attrr   r    r   )r   r#   r$   r%   r2   r8   r3   )r	   r   r(   r5   r6   r   r   r   5test_entity_ruler_serialize_phrase_matcher_attr_bytesw   s    
r9   c          	   C   s   t | }t | dd}dddigd}dddigd}tt ||g W d Q R X ||g tt ||g W d Q R X d S )NT)validater
   r   )r   r   ZASDF)r   pytestZraises
ValueErroradd_patternsr   )r	   r(   Zvalidated_rulerZvalid_patternZinvalid_patternr   r   r   test_entity_ruler_validate   s    r>   c             C   sF   t | |dd}t|jtddddgks,tt|jddgksBtd S )	NT)r   r+   r
   r   r   r   r   r   )r   r4   r%   r$   Zent_ids)r	   r   r(   r   r   r   test_entity_ruler_properties   s    r?   c             C   s^   t | }ddddddg}|| || d}t|jdksFt|jd jdksZtd S )	NZFOOBARzfoo bar)r   r   ZBARBAZzbar bazzfoo bar bazr"   r   )r   r=   Zmake_docr#   r   r$   r'   )r	   r(   r   r   r   r   r   #test_entity_ruler_overlapping_spans   s    
r@   	n_processr"   r!   c             C   sj   t | }dg}ddddg}|| | | x4| j|ddD ]"}x|jD ]}|jdksLtqLW q@W d S )NzI enjoy eating Pizza Hut pizza.ZFASTFOODz	Pizza HutZ1234)r   r   r   r!   )rA   )r   r=   r&   piper   r/   r$   )r	   rA   r(   Ztextsr   r   entr   r   r   !test_entity_ruler_multiprocessing   s    

rD   )
__future__r   r;   Zspacy.tokensr   Zspacy.languager   Zspacy.pipeliner   Zspacy.errorsr   Zfixturer	   r   r   r)   r*   r-   r.   r0   r1   r7   r9   r>   r?   r@   markZparametrizerD   r   r   r   r   <module>   s(   	
	
