B
    .(b/                 @   s`  d dl mZ d dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ ddlmZmZ ejdd Zejdd Zejdd Ze dd Zejdd Zejdd Zejdd Zejdd Z e dd Z!e dd  Z"d!d" Z#d#d$ Z$d%d& Z%d'd( Z&d)d* Z'd+d, Z(d-d. Z)d/d0 Z*d1d2 Z+ej,-d3d4gd5d6 Z.ej,-d3d7gd8d9 Z/ej,-d3d:gd;d< Z0ej,-d3d7d=gd>d? Z1ej,-d3d7d@d=ggdAdB Z2ej,-d3d7d@d=ggdCdD Z3ej,-d3dEgdFdG Z4ej,-dHdIgdJdK Z5ej,-d3d7d=dLggdMdN Z6ej,-d3d7d=dLggdOdP Z7ej,-d3d7d=dLggdQdR Z8ej,-dHdIgdSdT Z9ej,-d3d7d=dLggdUdV Z:ej,-d3d7d=dLggdWdX Z;ej,-d3d7d=dLggdYdZ Z<ej,-dHd7d@d7d[gd=dLgfgd\d] Z=d^d_ Z>d`da Z?ej,j@edbdcddde ZAdfdg ZBdS )h    )unicode_literalsN)assert_allcloseassert_equal)cosine)Vocab)Vectors)	Tokenizer)hash_string)Doc)
is_python2   )add_vecs_to_vocabmake_tempdirc               C   s   ddgS )Nappleorange r   r   r   U/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/vocab_vectors/test_vectors.pystrings   s    r   c               C   s@   ddddgfddddgfd	dddgfd
dddgfddddgfgS )Nr      r      r   andjuice   
   pie   g333333@g!@r   r   r   r   r   vectors   s
    r   c               C   s4   ddddgfddddgfd	ddd
gfddddgfgS )Nr   r   r   r   Zappggɿg333333ӿZpplgٿplgffffff?g?g?r   r   r   r   r   ngrams_vectors"   s    r!   c             C   s   t | | | S )N)r   )en_vocabr!   r   r   r   ngrams_vocab,   s    
r#   c               C   s   t jdddgdddggddS )	Ng        g      ?g       @g      @g       g      @f)dtype)numpyasarrayr   r   r   r   data2   s    r(   c               C   s.   t jdddgdddgdddgdddggdd	S )
Ng        g      ?g       @g       g      @g      g      @r$   )r%   )r&   r'   r   r   r   r   most_similar_vectors_data7   s    "r)   c               C   s   ddddgS )Nabcdr   r   r   r   r   most_similar_vectors_keys?   s    r.   c               C   s   t jddgddggddS )Ng        g      ?g       @g      @r$   )r%   )r&   r'   r   r   r   r   resize_dataD   s    r/   c             C   s   t | | | S )N)r   )r"   r   r   r   r   vocabI   s    
r0   c             C   s   t | i d d d S )N)r   )r0   r   r   r   tokenizer_vO   s    r1   c             C   sJ   t t| dfd}|j|jd |j|jks0t|jt| dfksFtd S )Nr   )shape)r   lenresizer2   AssertionError)r   r/   vr   r   r   #test_init_vectors_with_resize_shapeT   s    r7   c             C   s<   t | d}|j|jd |j|jks(t|j| jks8td S )N)r(   )r2   )r   r4   r2   r5   )r(   r/   r6   r   r   r   "test_init_vectors_with_resize_data[   s    
r8   c             C   sV  dd | D } t |d}|jd d }|j|jd |fd x"t| D ]\}}|j||d qFW t|| d  t|dd |f kstt|| d  t|dd |f kstt |d}|jd d }|j|jd |fd x"t| D ]\}}|j||d qW t|| d  t|d dg ks*tt|| d  t|d dg ksRtd S )Nc             S   s   g | ]}t |qS r   )r	   ).0sr   r   r   
<listcomp>c   s    z*test_get_vector_resize.<locals>.<listcomp>)r(   r   r   )r2   )row)r   r2   r4   	enumerateaddlistr5   )r   r(   r6   Zresized_dimistringr   r   r   test_get_vector_resizeb   s    
((
(rB   c             C   s   t |d}|j|jkstd S )N)r(   )r   r2   r5   )r   r(   r6   r   r   r   test_init_vectors_with_dataz   s    
rC   c             C   s,   t t| dfd}|jt| dfks(td S )Nr   )r2   )r   r3   r2   r5   )r   r6   r   r   r   test_init_vectors_with_shape   s    rD   c             C   s   t |d}dd | D } x"t| D ]\}}|j||d q"W t|| d  t|d ks\tt|| d  t|d ks|tt|| d  t|d kstd S )N)r(   c             S   s   g | ]}t |qS r   )r	   )r9   r:   r   r   r   r;      s    z#test_get_vector.<locals>.<listcomp>)r<   r   r   )r   r=   r>   r?   r5   )r   r(   r6   r@   rA   r   r   r   test_get_vector   s    
  rE   c             C   s   |  }t|d}dd | D } x"t| D ]\}}|j||d q*W t|| d  t|d ksdtt|| d  t|d kst|d || d < t|| d  t|d kstt|| d  t|d kstd S )N)r(   c             S   s   g | ]}t |qS r   )r	   )r9   r:   r   r   r   r;      s    z#test_set_vector.<locals>.<listcomp>)r<   r   r   )copyr   r=   r>   r?   r5   )r   r(   origr6   r@   rA   r   r   r   test_set_vector   s    
   rH   c          	   C   sn   t | |d}|j|jdddd\}}}tdd t|D s@ttt |j|jdddd W d Q R X d S )N)r(   keysr   T)
batch_sizensortc             s   s   | ]\}}|d  |kV  qdS )r   Nr   )r9   r@   r<   r   r   r   	<genexpr>   s    z,test_vectors_most_similar.<locals>.<genexpr>r   )	r   most_similarr(   allr=   r5   pytestraises
ValueError)r)   r.   r6   _Z	best_rowsr   r   r   test_vectors_most_similar   s
    rT   c              C   s   t jddddgddddgddddggdd} t| dddgd	}|t jddddggdd\}}}|d
 d
 dksvtt jdddgdddgdddggdd} t| dddgd	}|t jdddggdd\}}}|d
 d
 dkstdS )zETest that most similar identical vectors are assigned a score of 1.0.   r   r   r$   )r%   ABC)r(   rI   r   g      ?r   N)r&   r'   r   rN   r5   )r(   r6   rI   rS   Zscoresr   r   r   #test_vectors_most_similar_identical   s    ,$&"rY   textzapple and orangec             C   sT   | |}|d |d j t|d jfks,t|d |d j t|d jfksPtd S )Nr   r   r   )rZ   r?   vectorr5   )r1   r   rZ   docr   r   r   test_vectors_token_vector   s    $r]   r   c             C   s&   t | |t |d d ks"td S )Nr   r   )r?   
get_vectorr5   )r#   r!   rZ   r   r   r   test_vectors__ngrams_word   s    r_   Zapplpiec                sr   t | |ddt  fddtt d d D fddttD }x|D ]}|dk sZtqZW d S )Nr      c                s@   g | ]8} d  d  |  d d  |   d d  |  d qS )r   r   r   r   )r9   r@   )r!   r   r   r;      s   z0test_vectors__ngrams_subword.<locals>.<listcomp>c                s    g | ]}t |  |  qS r   )abs)r9   r@   )testtruthr   r   r;      s    gư>)r?   r^   ranger3   r5   )r#   r!   rZ   Zepsr@   r   )r!   rb   rc   r   test_vectors__ngrams_subword   s    

re   r   c             C   s$   | | }t |jst|js td S )N)r?   r[   r5   vector_norm)r0   rZ   lexr   r   r   test_vectors_lexeme_vector   s    rh   r   c             C   s(   t | |d}t|jst|js$td S )N)words)r
   r?   r[   r5   rf   )r0   rZ   r\   r   r   r   test_vectors_doc_vector   s    rj   c             C   s0   t | |ddd }t|js"t|js,td S )N)ri   r   r   )r
   r?   r[   r5   rf   )r0   rZ   spanr   r   r   test_vectors_span_vector   s    rl   zapple orangec             C   s\   | |}|d  |d |d  |d ks0td|d  |d   k rRdk sXn td S )Nr   r   g      g      ?)
similarityr5   )r1   rZ   r\   r   r   r   #test_vectors_token_token_similarity   s    (rn   ztext1,text2)r   r   c             C   sL   | |}|| }| || |ks(td| |  k rBdk sHn td S )Ng      g      ?)rm   r5   )r1   r0   text1text2tokenrg   r   r   r   $test_vectors_token_lexeme_similarity   s    rr   r   c             C   sl   t | |d}|d |dd |dd |d ks<td|d |dd   k rbdk shn td S )N)ri   r   r   r   g      g      ?)r
   rm   r5   )r0   rZ   r\   r   r   r   "test_vectors_token_span_similarity   s    0rs   c             C   sT   t | |d}|d |||d ks,td|d |  k rJdk sPn td S )N)ri   r   g      g      ?)r
   rm   r5   )r0   rZ   r\   r   r   r   !test_vectors_token_doc_similarity   s     rt   c             C   sl   t | |d}| |d  }||dd |dd |ks@td||dd   k rbdk shn td S )N)ri   r   r   r   g      g      ?)r
   rm   r5   )r0   rZ   r\   rg   r   r   r   #test_vectors_lexeme_span_similarity  s    (ru   c             C   sL   | | }| | }| || |ks(td| |  k rBdk sHn td S )Ng      g      ?)rm   r5   )r0   ro   rp   Zlex1Zlex2r   r   r   %test_vectors_lexeme_lexeme_similarity	  s    rv   c             C   sT   t | |d}| |d  }||||ks0td||  k rJdk sPn td S )N)ri   r   g      g      ?)r
   rm   r5   )r0   rZ   r\   rg   r   r   r   "test_vectors_lexeme_doc_similarity  s    rw   c          	   C   s   t | |d}ttn |dd |dd |dd |dd ksPtd|dd |dd   k rzdk sn tW d Q R X d S )N)ri   r   r   r   r   g      g      ?)r
   rP   warnsUserWarningrm   r5   )r0   rZ   r\   r   r   r   !test_vectors_span_span_similarity  s    8rz   c          	   C   sv   t | |d}ttV |dd |||dd ks@td|dd |  k rbdk shn tW d Q R X d S )N)ri   r   r   g      g      ?)r
   rP   rx   ry   rm   r5   )r0   rZ   r\   r   r   r    test_vectors_span_doc_similarity!  s    (r{   r   c             C   sT   t | |d}t | |d}||||ks0td||  k rJdk sPn td S )N)ri   g      g      ?)r
   rm   r5   )r0   ro   rp   Zdoc1Zdoc2r   r   r   test_vectors_doc_doc_similarity)  s    r|   c           	   C   s   t dd} tjddd}d|d< d|d	< | d
|d  | d|d	  | d
 }t|jdddgksht| d }t|jdddgksttt	 | j
j| d jdd W d Q R X d S )Ntest_vocab_add_vector)vectors_name)r   r   r$   )r%   g      ?r   g       @r   catdoghamsteri@B )r<   )r   r&   ndarray
set_vectorr?   r[   r5   rP   rQ   rR   r   r>   Zorth)r0   r(   r   r   r   r   r   r}   3  s    
r}   c              C   s   t dd} | d }| d }| d }tjddd}d	d
dg|d< ddd	g|d< dddg|d< | d|d  | d|d  | d|d  | jddd}t| dgkstt| d \}}|dkst|t	|t
|d |d ddd d S )Ntest_vocab_prune_vectors)r~   r   r   Zkitten)r   r   r$   )r%   g      ?g333333?g?r   g333333?g?r   g?gQ?g?r   )rJ   g-C6?gMbP?)ZatolZrtol)r   r&   r   r   Zprune_vectorsr?   rI   r5   valuesr   r   )r0   rS   r(   ZremapZ	neighbourrm   r   r   r   r   C  s     
r   z+Dict order? Not sure if worth investigating)reasonc           	   C   s  t jddddgddddgddddggdd} t| dddgd	}| }t }|| t|j|j |j|jkstt|	d
 |	d
 |j
dt jddddgddd}|j
dt jddddgddd}||kstt|j|j |j|jkstt }|| || t|j|j |j|jks.t|	d
 |	d
 |j
dt jddddgddd}|j
dt jddddgddd}||kstt|j|j W d Q R X d S )NrU   r   r   r$   )r%   rV   rW   rX   )r(   rI   )r   rU   Dr   )r[   r         (   )r&   r'   r   to_bytes
from_bytesr   r(   Zkey2rowr5   r4   r>   Zis_fullr   Zto_diskZ	from_disk)r(   r6   r+   Zv_rr<   Zrow_rr-   r   r   r   test_vectors_serializeW  s2    ,


  



  r   c              C   s   t dd} tjddd}d|d< d|d	< | d
|d  | d|d	  | d
 jdksZt| d jdkslt| d jdks~td S )NZtest_vocab_is_oov)r~   )r   r   r$   )r%   g      ?r   g       @r   r   r   Fr   T)r   r&   r   r   Zis_oovr5   )r0   r(   r   r   r   test_vector_is_oovt  s    
r   )C
__future__r   rP   r&   Znumpy.testingr   r   Z	spacy._mlr   Zspacy.vocabr   Zspacy.vectorsr   Zspacy.tokenizerr   Zspacy.stringsr	   Zspacy.tokensr
   Zspacy.compatr   utilr   r   Zfixturer   r   r!   r#   r(   r)   r.   r/   r0   r1   r7   r8   rB   rC   rD   rE   rH   rT   rY   markZparametrizer]   r_   re   rh   rj   rl   rn   rr   rs   rt   ru   rv   rw   rz   r{   r|   r}   r   Zskipifr   r   r   r   r   r   <module>   sf   

	"	