B
    .(b*                 @   sF  d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ ejdd Zejdd Zejdd Zejdd Zejdd Zdd Zdd Zdd Zdd Zdd Zdd  Z d!d" Z!d#d$ Z"d%d& Z#d'd( Z$d)d* Z%d+d, Z&d-d. Z'd/d0 Z(G d1d2 d2e)Z*dS )3    )unicode_literalsN)English)Language)Lookups)EntityRecognizerEntityRuler)Vocab)BiluoPushDown)	GoldParse	minibatch)Docc               C   s   t  S )N)r    r   r   J/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/parser/test_ner.pyvocab   s    r   c             C   s   t | ddddddgdS )NZCaseyZwenttoZNewZYork.)words)r   )r   r   r   r   doc   s    r   c             C   s4   | dd }| dd }|j |jdf|j |jdfgS )Nr            PERSONGPE)Z
start_charZend_char)r   Zcaseynyr   r   r   entity_annots   s    r   c             C   s   t tdd | D S )Nc             S   s   g | ]\}}}|qS r   r   ).0selabelr   r   r   
<listcomp>&   s    z entity_types.<locals>.<listcomp>)sortedset)r   r   r   r   entity_types$   s    r"   c             C   s   t j|d}t | j|S )N)r"   )r	   Zget_actionsstrings)r   r"   actionsr   r   r   tsys)   s    r%   c                sP   t ||d} |  ||} fdd|D }|ddddddgksLtd S )N)entitiesc                s   g | ]}  |qS r   )get_class_name)r   act)r%   r   r   r   3   s    z)test_get_oracle_moves.<locals>.<listcomp>zU-PERSONOzB-GPEzL-GPE)r
   preprocess_goldget_oracle_sequenceAssertionError)r%   r   r   goldact_classesnamesr   )r%   r   test_get_oracle_moves/   s
    
r0   c                sx   dd |D }t ||d}x(t|jD ]\}}|dkr&d|j|< q&W  |  ||} fdd|D }|sttd S )Nc             S   s    g | ]\}}}||d | fqS )!r   )r   r   r   r   r   r   r   r   8   s    z;test_get_oracle_moves_negative_entities.<locals>.<listcomp>)r&   zL-!GPE-c                s   g | ]}  |qS r   )r'   )r   r(   )r%   r   r   r   ?   s    )r
   	enumeratenerr*   r+   r,   )r%   r   r   r-   itagr.   r/   r   )r%   r   'test_get_oracle_moves_negative_entities7   s    
r7   c                sb   t |ddddgd}t|g d}ddddg|_ |  ||} fd	d
|D }|s^td S )NABCD)r   )r&   z	B-!PERSONz	L-!PERSONc                s   g | ]}  |qS r   )r'   )r   r(   )r%   r   r   r   I   s    z<test_get_oracle_moves_negative_entities2.<locals>.<listcomp>)r   r
   r4   r*   r+   r,   )r%   r   r   r-   r.   r/   r   )r%   r   (test_get_oracle_moves_negative_entities2C   s    
r<   c                sb   t |ddddgd}t|g d}ddddg|_ |  ||} fd	d
|D }|s^td S )Nr8   r9   r:   r;   )r   )r&   r)   z!Oc                s   g | ]}  |qS r   )r'   )r   r(   )r%   r   r   r   S   s    z4test_get_oracle_moves_negative_O.<locals>.<listcomp>)r   r
   r4   r*   r+   r,   )r%   r   r   r-   r.   r/   r   )r%   r    test_get_oracle_moves_negative_OM   s    
r=   c       
      C   s   dddg}d d dg}t | |d}t|||d}t| j}d}x|D ]}|d krRqBqB|dkrn||dd	 qB|d
\}}	||d|	 ||d|	 ||d|	 ||d|	 qBW || ||| d S )Nr9   Z52ZBomberz	L-PRODUCT)r   )r   r&   )Mr9   ILUr)   r)    r2   r?   r@   rA   )	r   r
   r	   r#   
add_actionindexsplitr*   r+   )
en_vocabr   
biluo_tagsr   r-   moves
move_typesr6   actionr   r   r   r   test_oracle_moves_missing_BW   s$    




rK   c       
   	   C   s   dddddddddg	}dddd	d d
dddg	}t | |d}t|||d}t| j}d}xT|D ]L}|d krjqZqZ|dkr||dd qZ|d\}}	||||	 qZW || ||| d S )N
production
ZofZNorthropzCorp.z'sZradarr)   zB-ORGzI-ORGzL-ORG)r   )r   r&   )r>   r9   r?   r@   rA   r)   rB   r2   )	r   r
   r	   r#   rC   rD   rE   r*   r+   )
rF   r   rG   r   r-   rH   rI   r6   rJ   r   r   r   r   test_oracle_moves_whitespaceo   s    


rN   c              C   s  t  } | d}t|j}dd |D dddddgks8tdd |D dddddgksXt|jdd |d |j|gd }|j|d	 |j|d	 |j|d	 |j	|d
stt  }|d}t|j}dg|_
dd |D dddddgkstdd |D dddddgks t|jdd |jdd |d |j|gd }|j|d	 |j|d	 |j|d	 |j	|d
rt|j	|dst|j|d |j	|d
rt|j	|dstdS )z5Test succesful blocking of tokens to be in an entity.zI live in New Yorkc             S   s   g | ]
}|j qS r   )ent_iob_)r   tokenr   r   r   r      s    z-test_accept_blocked_token.<locals>.<listcomp>rB   c             S   s   g | ]
}|j qS r   )	ent_type_)r   rP   r   r   r   r      s    r   r   r   r)   zB-GPE)r   r   r   c             S   s   g | ]
}|j qS r   )rO   )r   rP   r   r   r   r      s    r9   c             S   s   g | ]
}|j qS r   )rQ   )r   rP   r   r   r   r      s       zU-N)r   r   r   r,   rH   rC   	add_label
init_batchapply_transitionis_validents)Znlp1Zdoc1ner1Zstate1Znlp2Zdoc2ner2Zstate2r   r   r   test_accept_blocked_token   s<    
  

 "
rZ   c        	      C   s   dddgifddg ifg} t  }|d}|d |j|dd |  xFtd	D ]:}i }t| }x(|D ] }t| \}}|j|||d
 qjW qTW dS )z7Test that training an empty text does not throw errors.zWho is Shaka Khan?r&   )      r   rB   r4   r   T)last   )lossesN)	r   create_piperS   add_pipebegin_trainingranger   zipupdate)	Z
train_datanlpr4   itnr_   ZbatchesbatchZtextsannotationsr   r   r   test_train_empty   s     


rj   c              C   s   t  } | d}| j|dd |   | d}dd |D dddddgksNtdd |D dddddgksntt|j}|jd	d |	d
 |j
|gd }|j|dst|j|dst|j|d |j|dst|j|dstd S )Nr4   )namezI live in New Yorkc             S   s   g | ]
}|j qS r   )rO   )r   rP   r   r   r   r      s    z(test_overwrite_token.<locals>.<listcomp>r)   c             S   s   g | ]
}|j qS r   )rQ   )r   rP   r   r   r   r      s    rB   r   r   r   zB-GPEzU-GPEzI-GPEzL-GPE)r   r`   ra   rb   r,   r   r   rH   rC   rS   rT   rV   rU   )rf   rX   r   rY   stater   r   r   test_overwrite_token   s     
  

rm   c              C   s   t  } t| }dddg}|| | | | d}|d | | |   | d}dddddddg}dd	d	d	d	d	d	g}d
d |D |kstdd |D |kstdS )zN Test that an NER works after an entity_ruler: the second can add annotations THINGThis)r   patternr4   MY_LABELz*This is Antti Korhonen speaking in Finlandr9   r)   rB   c             S   s   g | ]
}|j qS r   )rO   )r   rP   r   r   r   r      s    z)test_ruler_before_ner.<locals>.<listcomp>c             S   s   g | ]
}|j qS r   )rQ   )r   rP   r   r   r   r      s    N)r   r   add_patternsra   r`   rS   rb   r,   )rf   rulerpatternsuntrained_nerr   expected_iobsexpected_typesr   r   r   test_ruler_before_ner   s    




rx   c              C   s   t  } | d}|d | j|dd |   t| }dddg}|| | | | d}d	d
d
d
d
d
d
g}dddddddg}dd |D |kstdd |D |kstdS )zV Test that an entity_ruler works after an NER: the second can overwrite O annotations r4   rq   uner)rk   rn   ro   )r   rp   z*This is Antti Korhonen speaking in Finlandr9   r)   rB   c             S   s   g | ]
}|j qS r   )rO   )r   rP   r   r   r   r     s    z)test_ner_before_ruler.<locals>.<listcomp>c             S   s   g | ]
}|j qS r   )rQ   )r   rP   r   r   r   r     s    N)r   r`   rS   ra   rb   r   rr   r,   )rf   ru   rs   rt   r   rv   rw   r   r   r   test_ner_before_ruler   s    



rz   c              C   s   t  } | tdd | d}|d | j|dd |   | d}ddd	d	d	dddg}d
d
d
d
d
d
d
d
g}dd |D |kstdd |D |kstdS )zK Test functionality for blocking tokens so they can't be in a named entity r^   r   r4   rq   ry   )rk   z,This is Antti L Korhonen speaking in Finlandr)   r9   rB   c             S   s   g | ]
}|j qS r   )rO   )r   rP   r   r   r   r     s    z"test_block_ner.<locals>.<listcomp>c             S   s   g | ]
}|j qS r   )rQ   )r   rP   r   r   r   r     s    N)r   ra   BlockerComponent1r`   rS   rb   r,   )rf   ru   r   rv   rw   r   r   r   test_block_ner  s    

r|   c              C   s   t  } | d}| | |d |   |jjj|jks@t	t  } | d}| | |d | jddddid |jjjdkst	| d d S )Nr4   r   r      )Znr_feature_tokensZtoken_vector_width)Zcomponent_cfgzhello world)
r   r`   ra   rS   rb   modellowerZnFZ
nr_featurer,   )rf   r4   r   r   r   test_change_number_features!  s    





r   c           	   C   s   t  } t | j_t| jjr t| d}| | t	t
 |   W d Q R X | jjd d| jjdd< t	d }|   |jrtW d Q R X d S )Nr4   Zlexeme_normr8   a)r   r   r   Zlookupslenr,   r`   ra   pytestZwarnsUserWarningrb   Z	add_tableZ	get_tablelist)rf   r4   recordr   r   r   test_ner_warns_no_lookups6  s    


r   c               @   s    e Zd ZdZdd Zdd ZdS )r{   Z
my_blockerc             C   s   || _ || _d S )N)startend)selfr   r   r   r   r   __init__H  s    zBlockerComponent1.__init__c             C   s   d| j | jfg|_|S )Nr   )r   r   rW   )r   r   r   r   r   __call__L  s    zBlockerComponent1.__call__N)__name__
__module____qualname__rk   r   r   r   r   r   r   r{   E  s   r{   )+
__future__r   r   Zspacy.lang.enr   Zspacy.languager   Zspacy.lookupsr   Zspacy.pipeliner   r   Zspacy.vocabr   Zspacy.syntax.nerr	   Z
spacy.goldr
   r   Zspacy.tokensr   Zfixturer   r   r   r"   r%   r0   r7   r<   r=   rK   rN   rZ   rj   rm   rx   rz   r|   r   r   objectr{   r   r   r   r   <module>   s:   


/