B
    .(b,              
   @   s  d dl mZ d dlZd dlZd dlmZ d dlmZmZ d dl	m
Z
mZ d dlmZ d dlmZ dd	lmZ ejd
d Zejdd Zejdd d eddfdd eddfdededed dfdgdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Z d'd( Z!d)d* Z"d+d, Z#d-d. Z$d/d0 Z%d1d2 Z&d3d4 Z'd5d6 Z(d7d8 Z)d9d: Z*d;d< Z+d=d> Z,d?d@ Z-dAdB Z.dS )C    )unicode_literalsN)assert_array_equal)ORTHLENGTH)DocSpan)Vocab)filter_spans   )get_docc             C   sh   d}ddddddddddddddg}ddd	d
dddd	d
ddd	ddg}| |}t |jdd |D ||dS )Nz:This is a sentence. This is another sentence. And a third.   r   ZnsubjROOTZdetattrpunctZnpadvmodc             S   s   g | ]
}|j qS  )text).0tr   r   H/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/doc/test_span.py
<listcomp>   s    zdoc.<locals>.<listcomp>)wordsheadsdeps)r   vocab)en_tokenizerr   r   r   tokensr   r   r   doc   s     r   c             C   s.   d}| |}t |jdd |D d}d|_|S )Nz:This is a sentence. This is another sentence. And a third.c             S   s   g | ]
}|j qS r   )r   )r   r   r   r   r   r      s    z"doc_not_parsed.<locals>.<listcomp>)r   F)r   r   Z	is_parsed)r   r   r   r   r   r   r   doc_not_parsed   s
    r    zi_sent,i,j,textz	This is ar   zThis is anotherzAnd za third)r   r   r
   Nc             C   s:   t | j}|| ||}|s(|r6tn|j|ks6td S )N)listsents	char_spanAssertionErrorr   )r   Zi_sentijr   r"   spanr   r   r   test_char_span$   s
    


r(   c             C   s`   t | j}|d jdkst|d jdks.tt|dks>ttdd |D t| ks\td S )Nr         c             s   s   | ]}t |V  qd S )N)len)r   sentr   r   r   	<genexpr>;   s    z(test_spans_sent_spans.<locals>.<genexpr>)r!   r"   startr$   endr+   sum)r   r"   r   r   r   test_spans_sent_spans6   s
    
r1   c             C   sP   | dd }t |dkst|jdks*t|jjdks:t|jjjdksLtd S )Nr
      z
a sentenceZsentenceis)r+   r$   r   roothead)r   r'   r   r   r   test_spans_root>   s
    r6   c             C   sJ   | dd }t |dkst|jdks*t|jdks8t|jdksFtd S )Nr   r2   zThis is a sentencezTHIS IS A SENTENCEzthis is a sentence)r+   r$   r   Zupper_Zlower_)r   r'   r   r   r   test_spans_string_fnF   s
    r7   c             C   sP   d}dddddg}| |}t |jdd |D |d	}|dd  jjd
ksLtd S )Nz through North and South Carolinar   r*   r   r   c             S   s   g | ]
}|j qS r   )r   )r   r   r   r   r   r   R   s    z$test_spans_root2.<locals>.<listcomp>)r   r   ZCarolina)r   r   r4   r   r$   )r   r   r   r   r   r   r   r   test_spans_root2N   s
    r9   c             C   s   t t| jst| dd jjjdks,t| dd jjdksDt| dd jjjjdks`td|d	 _d|d
 _|dd j|d	d
 kst|dd j|d
d kstdS )zTest span.sent propertyNr
   r3   zThis is a sentence .      ThisTr   r)   r   r*   
      )	r+   r!   r"   r$   r,   r4   r   Z	left_edgeZis_sent_start)r   r    r   r   r   test_spans_span_sentV   s    

r?   c          	   C   s  | d}t |jdd |D ddddgd}|dd  }|jd	ksHt|d
 dksXt|d dksht|d dksxt|d dkst|dd  }|jdkst|d
 dkst|d dkst|d dkst|dd  }|jd	kst|d
 dkst|d dkst|d dks*t|d dks<t| d}t |jdd |D ddddddgd}|dd  }t|tdddgdddgdddgg dS )z!Test span's lca matrix generationzthe lazy dog sleptc             S   s   g | ]
}|j qS r   )r   )r   r   r   r   r   r   f   s    z)test_spans_lca_matrix.<locals>.<listcomp>r
   r   r   )r   r   N)r
   r
   )r   r   )r   r   r   )r   r   )r   r   )r*   r*   )r   r
   zI like New York in Autumnc             S   s   g | ]
}|j qS r   )r   )r   r   r   r   r   r   ~   s    r   r2   )r   r   Zget_lca_matrixshaper$   r   numpyZasarray)r   r   r   Zlcar   r   r   test_spans_lca_matrixc   s0    "$rB   c           	   C   s   t t ddddgd} | d d }| dd  }ttJ ||dksLt|| dks^t|d d | jd dks~tW d Q R X d S )Nab)r   r
   g      ?g        r   )r   r   pytestZwarnsUserWarningZ
similarityr$   r   )r   span1span2r   r   r   test_span_similarity_match   s    rI   c             C   s   d}| |}d|j |d j _d|j |d j _t|j dd |D d}|d	d jd
ks^t|dd	 jdkstt|d	d jdkstd	S )z:Test span.sentiment property's default averaging behaviourzgood stuff bad stuffg      @r   g       r
   c             S   s   g | ]
}|j qS r   )r   )r   r   r   r   r   r      s    z0test_spans_default_sentiment.<locals>.<listcomp>)r   Ng      ?r   g      r   gUUUUUU?)r   r   	sentimentr   r$   )r   r   r   r   r   r   r   test_spans_default_sentiment   s    rK   c             C   s   d}| |}d|j |d j _d|j |d j _t|j dd |D d}d	d
 |jd< |dd jdkslt|dd jdkst|dd jdkstdS )z:Test span.sentiment property's default averaging behaviourzgood stuff bad stuffg      @r   g       r
   c             S   s   g | ]
}|j qS r   )r   )r   r   r   r   r   r      s    z1test_spans_override_sentiment.<locals>.<listcomp>)r   c             S   s   dS )Ng      $@r   )r'   r   r   r   <lambda>       z/test_spans_override_sentiment.<locals>.<lambda>rJ   Ng      $@r   r   )r   r   rJ   r   Zuser_span_hooksr$   )r   r   r   r   r   r   r   test_spans_override_sentiment   s    rN   c             C   s\   d}| |}|dd }|dd }t |t |ks8t|dd }t |t |ksXtdS )zTest spans can be hashed.zgood stuff bad stuffNr
   r2   r   )hashr$   )r   r   r   rG   rH   Zspan3r   r   r   test_spans_are_hashable   s    rP   c          	   C   s\  | dd }| j |j|jdd}|j|jks0t|j|jks@t|jdksNt| j |j|jddd}|j|jkstt|j|jkst|jdkst| j |jd |jddd}|j|jkst|j|jkst|jdkst| j |jd |jdd	d}|j|jkst|j|jkst|jdks(ttt  | j |jd |jdd
d}W d Q R X d S )Nr   r   ZGPE)labelstrict)rQ   Zalignment_moder*   ZcontractexpandZunk)r#   Z
start_charZend_charr$   label_rE   raises
ValueError)r   rG   rH   r   r   r   test_spans_by_character   s.    rW   c             C   sb   | dd }| ttg}|jt|dfks0t|d |d jksFt|d t|d ks^td S )Nr   r   r
   )r   r   r   )r   r   )Zto_arrayr   r   r@   r+   r$   Zorth)r   r'   Zarrr   r   r   test_span_to_array   s
    rX   c             C   sZ   | dd }|  }|j|j ks(tt|| js8t|| k	sDt|d jdksVtd S )Nr2   r=   r   )as_docr   stripr$   
isinstance	__class__idx)r   r'   Zspan_docr   r   r   test_span_as_doc   s    r^   c             C   sx   d}d}|| j |< | dd }|jdd}| }| j |d|ksHt|j |d|ks^t|j |ddksttdS )z?Test that the user_data can be preserved (but not by default). Zmy_infoiV  r2   r=   T)Zcopy_user_dataN)	user_datarY   getr$   )r   Zmy_keyZmy_valuer'   Zspan_doc_withZspan_doc_withoutr   r   r   test_span_as_doc_user_data   s    
ra   c             C   s^   t | ddddd}|jdks t|j| jjd ks6t|jdksDt|j| jjd ksZtd S )Nr   r   helloQ342)rQ   kb_id)r   rT   r$   rQ   r   stringskb_id_rd   )r   r'   r   r   r   test_span_string_label_kb_id   s
    rg   c          	   C   s,   t | dd}tt d|_W d Q R X d S )Nr   r   rb   )r   rE   rU   NotImplementedErrorrT   )r   r'   r   r   r   test_span_label_readonly  s    ri   c          	   C   s,   t | dd}tt d|_W d Q R X d S )Nr   r   rc   )r   rE   rU   rh   rf   )r   r'   r   r   r   test_span_kb_id_readonly  s    rj   c             C   s  | j jd ddf| j jd ddf| j jd ddfg| _tt| jdksLtt| j}t|dksftt|d jdks|t|d jd jd	kst|d jd jdkst|d jd j	dkst|d jd j
dkstt|d jdkst|d jd jd
kst|d jd jdks&t|d jd j	dks@t|d jd j
dksZt|d jd jdkstt|d jd jdkst|d jd j	dkst|d jd j
dkstdS )zTest span.ents for the ZPRODUCTr   r   r;         r>   r*   r<   Zanotherr
   z	a third .N)r   re   Zentsr+   r!   r$   r"   r   rT   r.   r/   )r   	sentencesr   r   r   test_span_ents_property  s(    
rn   c             C   s.  | dd | dd | dd | dd g}t |}t|dksDt|d jdkr`|d jdksdt|d jdkr|d jdkst|d	 jdkr|d	 jdkst| dd | dd | d
d | dd | dd g}t |}t|d	kstt|d dkstt|d d
kst|d jdkr>|d jdksBt|d jd
krb|d jdksft| dd | d	d
 | d
d | dd | dd g}t |}t|d	kstt|d dkstt|d d
kst|d jdkr|d jdkst|d jd
kr&|d jdks*td S )Nr   r2   r:   rk   r=   r>   r*   r   r
   r)   r;   	   )r	   r+   r$   r.   r/   )r   Zspansfilteredr   r   r   test_filter_spans*  s(    ,   6$$6$rq   c             C   s   | dd | dd kst | dd | dd ks8t | dd |dd ksTt t| dd t| dd ksxt t| dd t| dd kst t| dd t|dd kst d S )Nr   r
   r   r*   )r$   rO   )r   r    r   r   r   test_span_eq_hashD  s    $$rr   c          	   C   s   d}d}| || }x*t ||D ]}|||  | | ks tq W tt |d }W d Q R X tt |d }W d Q R X d S )Nr   r)   )ranger$   rE   rU   
IndexError)r   r.   r/   r'   r%   _r   r   r   test_span_boundariesM  s    rw   c          	   C   s@   | d}|dd }|j jr ttt |j W d Q R X d S )Nz7Check span.sent raises error if doc is not sentencized.r   r*   )r   Zis_sentencedr$   rE   rU   rV   r,   )r   r   r'   r   r   r   	test_sentY  s
    rx   )/
__future__r   rE   rA   Znumpy.testingr   Zspacy.attrsr   r   Zspacy.tokensr   r   Zspacy.vocabr   Z
spacy.utilr	   utilr   Zfixturer   r    markZparametrizer+   r(   r1   r6   r7   r9   r?   rB   rI   rK   rN   rP   rW   rX   r^   ra   rg   ri   rj   rn   rq   rr   rw   rx   r   r   r   r   <module>   sL   	!
'		