B
    (bh                 @   sX  d Z ddlmZ ddlZddlZddlZddlmZm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddlm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ dd Z,dd Z-dd Z.dd Z/dd Z0d d! Z1d"d# Z2d$d% Z3d&d' Z4d(d) Z5d*d+ Z6d,d- Z7d.d/ Z8d0d1 Z9d2d3 Z:d4d5 Z;d6d7 Z<d8d9 Z=d:d; Z>d<d= Z?d>d? Z@d@dA ZAdBdC ZBdDdE ZCdFdG ZDdHdI ZEdJdK ZFdLdM ZGdNdO ZHdPdQ ZIdRdS ZJdTdU ZKdVdW ZLdXdY ZMeNdZkrTe  dS )[z0
Todo: cross-check the F-value with stats model
    )divisionN)statssparse)run_module_suite)assert_equal)assert_almost_equal)assert_raises)assert_true)assert_array_equal)assert_array_almost_equal)assert_not_in)assert_less)assert_warns)ignore_warnings)assert_warns_message)assert_greater)assert_greater_equal)	safe_mask)make_classificationmake_regression)chi2	f_classiff_onewayf_regressionmutual_info_classifmutual_info_regressionSelectPercentileSelectKBest	SelectFpr	SelectFdr	SelectFweGenericUnivariateSelectc              C   sj   t jd} | dd}d| dd }t||\}}t||\}}tt || tt || d S )Nr   
         )nprandomRandomStateZrandnr   r   r	   Zallclose)rngX1X2fpvf2pv2 r/   b/home/dcms/DCMS/lib/python3.7/site-packages/sklearn/feature_selection/tests/test_feature_select.pytest_f_oneway_vs_scipy_stats%   s    r1   c              C   sh   t jd} | jddd}t d}t||\}}t|t j|\}}t||dd t||dd d S )Nr   r"   )r"   r"   )size   )decimal)	r%   r&   r'   randintaranger   astypefloatr   )r(   XyZfintZpintr+   pr/   r/   r0   test_f_oneway_ints0   s    
r<   c              C   s   t ddddddddd	d
dd\} }t| |\}}tt| |\}}t|dk  t|dk  t|dk   t|d d dk   t|dd  dk  t|| t|| d S )N      r#      r      r$   g        r"   F)	n_samples
n_featuresn_informativen_redundant
n_repeated	n_classesn_clusters_per_classflip_y	class_sepshufflerandom_state   g?g-C6?)r   r   r   
csr_matrixr	   allr   )r9   r:   Fr,   F_sparse	pv_sparser/   r/   r0   test_f_classif>   s    
rR   c              C   s  t dddddd\} }t| |\}}t|dk  t|dk  t|dk   t|d d dk   t|dd  d	k  t| |d
d\}}tt| |d
d\}}t|| t|| t| |dd\}}tt| |dd\}}t|| t|| d S )Nr=   r>   rL   Fr   )rA   rB   rC   rJ   rK   r$   g?g-C6?T)center)r   r   r	   rN   r   rM   r   )r9   r:   rO   r,   rP   rQ   r/   r/   r0   test_f_regressionR   s     


rT   c              C   sj   t jd} | dd}t dt j}t||\}}t||t j\}}t	||d t	||d d S )Nr   r"   r>   rL   )
r%   r&   r'   randr6   r7   intr   r8   r   )r(   r9   r:   F1Zpv1F2r.   r/   r/   r0   test_f_regression_input_dtypel   s    rY   c              C   s   t dddd} | j}t |}|d d d  d9  < d|d< t| |d	d
\}}t| |dd
\}}t||d  |d  | t|d d d S )N   r$   r?   g      g        r   T)rS   Fg      ?g       @g@9w?)r%   r6   Zreshaper2   onesr   r   r   )r9   rA   YrW   _rX   r/   r/   r0   test_f_regression_centery   s    
r`   c              C   s   t ddddddddd	d
dd\} }t| |\}}t|dk  t|dk  t|dk   t|d d dk   t|dd  dk  d S )Nr=   r>   r#   r?   r   r@   r$   g        r"   F)rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   g?g-C6?)r   r   r	   rN   )r9   r:   rO   r,   r/   r/   r0   test_f_classif_multi_class   s    ra   c              C   s   t ddddddddd	d
dd\} }ttdd}|| || }ttddd| || }t|| | }t	d}d|d d< t|| d S )Nr=   r>   r#   r?   r   r@   r$   g        r"   F)rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK      )
percentilerc   )modeparamrL   )
r   r   r   fit	transformr!   r
   get_supportr%   zeros)r9   r:   univariate_filterX_rX_r2supportgtruthr/   r/   r0   test_select_percentile_classif   s    

ro   c        	      C   s  t ddddddddd	d
dd\} }t| } ttdd}|| || }ttddd| || }t|	 |	  |
 }td}d|d d< t|| ||}tt| t||}t|j| j t|d d |f 	 |	  t| |  d S )Nr=   r>   r#   r?   r   r@   r$   g        r"   F)rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rb   )rc   rc   )rd   re   rL   )r   r   rM   r   r   rf   rg   r!   r
   Ztoarrayrh   r%   ri   inverse_transformr	   issparser   r   shapeZgetnnz)	r9   r:   rj   rk   rl   rm   rn   ZX_r2invZsupport_maskr/   r/   r0   %test_select_percentile_classif_sparse   s*    




rs   c              C   s   t ddddddddd	d
dd\} }ttdd}|| || }ttddd| || }t|| | }t	d}d|d d< t|| d S )Nr=   r>   r#   r?   r   r@   r$   g        r"   F)rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   )kk_best)rd   re   )
r   r   r   rf   rg   r!   r
   rh   r%   ri   )r9   r:   rj   rk   rl   rm   rn   r/   r/   r0   test_select_kbest_classif   s    

rv   c              C   s@   t ddddd\} }ttdd}|| || }t| | d S )Nr>   r"   Fr   )rA   rB   rJ   rK   rN   )rt   )r   r   r   rf   rg   r
   )r9   r:   rj   rk   r/   r/   r0   test_select_kbest_all   s
    rw   c              C   sl   t ddddd\} }ttdd}|| | | }tjdtd}t|| t	t
d|j| }t|jd	 d S )
Nr>   r"   Fr   )rA   rB   rJ   rK   )rt   )dtypezNo features were selected)r>   r   )r   r   r   rf   rh   r%   ri   boolr
   r   UserWarningrg   r   rr   )r9   r:   rj   rm   rn   
X_selectedr/   r/   r0   test_select_kbest_zero   s    

r|   c              C   s   t ddddddddd	d
dd\} }ttdd}|| || }td}d|d d< xDdD ]<}tt|dd| || }t|| |	 }t
|| q\W d S )Nr=   r>   r#   r?   r   r@   r$   g        r"   F)rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   g{Gz?)alpharL   )fdrfprfwe)rd   re   )r   r    r   rf   rg   r%   ri   r!   r
   rh   r   )r9   r:   rj   rk   rn   rd   rl   rm   r/   r/   r0   test_select_heuristics_classif   s    


r   c             C   s:   | j }|  }tt|| t||  d   d S )N)Zscores_rh   r
   r%   sortsum)Zscore_filterscoresrm   r/   r/   r0   assert_best_scores_kept  s    r   c              C   s   t dddddd\} }ttdd}|| || }t| ttd	dd
| || }t|| | }t	
d}d|d d< t|| |  }d|d d t	|f< t||| t|t||t d S )Nr=   r>   rL   Fr   )rA   rB   rC   rJ   rK   rb   )rc   rc   )rd   re   r$   )r   r   r   rf   rg   r   r!   r
   rh   r%   ri   copyZlogical_notrp   r7   ry   )r9   r:   rj   rk   rl   rm   rn   ZX_2r/   r/   r0   !test_select_percentile_regression  s"    



r   c              C   s   t dddddd\} }ttdd}|| || }t| ttd	dd
| || }t|| | }t	
d}t|| d S )Nr=   r>   rL   Fr   )rA   rB   rC   rJ   rK   d   )rc   rc   )rd   re   )r   r   r   rf   rg   r   r!   r
   rh   r%   r]   )r9   r:   rj   rk   rl   rm   rn   r/   r/   r0   &test_select_percentile_regression_full8  s    

r   c              C   sv   t dddddd\} }tttddj| | tttd	dj| | tttd
ddj| | tttd
d	dj| | d S )Nr"   r>   r?   Fr   )rA   rB   rC   rJ   rK   r\   )rc   e   rc   )rd   re   )r   r   
ValueErrorr   rf   r!   )r9   r:   r/   r/   r0   test_invalid_percentileI  s    r   c              C   s   t ddddddd\} }ttdd}|| || }t| ttd	dd
| || }t|| | }t	
d}d|d d< t|| d S )Nr=   r>   rL   Fr   r"   )rA   rB   rC   rJ   rK   noise)rt   ru   )rd   re   r$   )r   r   r   rf   rg   r   r!   r
   rh   r%   ri   )r9   r:   rj   rk   rl   rm   rn   r/   r/   r0   test_select_kbest_regressionU  s    

r   c              C   s   t ddddddd\} }ttdd	}|| || }td}d
|d d< xtdD ]l}tt|dd| || }t|| |	 }t|d d tj
dtjd tt|dd  d
kd qRW d S )Nr=   r>   rL   Fr   r"   )rA   rB   rC   rJ   rK   r   g{Gz?)r}   r$   )r~   r   r   )rd   re   )rL   )rx   r#   )r   r   r   rf   rg   r%   ri   r!   r
   rh   r]   ry   r   r   )r9   r:   rj   rk   rn   rd   rl   rm   r/   r/   r0   !test_select_heuristics_regressionh  s    


r   c              C   sp  t ddgddgddgg} t dgdgdgg}t| |\}}t|t ddg t|t dd	g ttd
d}|| | | }t|t ddg ttdd}|| | | }t|t ddg t	tdd}|| | | }	t|	t ddg t
td
d}
|
| | |
 }t|t ddg ttd
d}|| | | }t|t ddg d S )Nr"   r>      r$   r   g      @ggm?gQaK?gX٬<y?g?)r}   TF)rt   2   )rc   )r%   arrayr   r   r   rf   rh   r
   r   r   r   r    )r9   r:   r   ZpvaluesZ
filter_fdrZsupport_fdrZfilter_kbestZsupport_kbestZfilter_percentileZsupport_percentileZ
filter_fprZsupport_fprZ
filter_fweZsupport_fwer/   r/   r0   test_boundary_case_ch2|  s2    r   c                 sh   dd xZdD ]R xLdD ]Dt  fddtdD } t |  | dkrt|  d	  qW qW d S )
Nc          	   S   s   t dd|d|dd\}}tjdd@ tt| d}||||}ttd	| d
|||}W d Q R X t|| |	 }t
||d  dk}	t
|d | dk}
|	dkrdS |	|
|	  }|S )N   r>   Fr"   )rA   rB   rC   rJ   rK   r   T)record)r}   r~   )rd   re   r$   r   g        )r   warningscatch_warningsr   r   rf   rg   r!   r
   rh   r%   r   )r}   rC   rK   r9   r:   rj   rk   rl   rm   Znum_false_positivesZnum_true_positivesfalse_discovery_rater/   r/   r0   
single_fdr  s"    $

z.test_select_fdr_regression.<locals>.single_fdr)gMbP?g{Gz?g?)r$   rL   r"   c                s   g | ]} |qS r/   r/   ).0rK   )r}   rC   r   r/   r0   
<listcomp>  s   z.test_select_fdr_regression.<locals>.<listcomp>r   r   r"   )r%   Zmeanranger   r   )r   r/   )r}   rC   r   r0   test_select_fdr_regression  s    


r   c              C   s   t dddddd\} }ttdd}|| || }ttd	dd
| || }t|| | }t	d}d|d d< t|d d tj
dtjd tt|dd  dkd d S )Nr=   r>   rL   Fr   )rA   rB   rC   rJ   rK   g{Gz?)r}   r   )rd   re   r$   )rL   )rx   r?   )r   r    r   rf   rg   r!   r
   rh   r%   ri   r]   ry   r   r   )r9   r:   rj   rk   rl   rm   rn   r/   r/   r0   test_select_fwe_regression  s    

r   c              C   s   dddgdddgdddgdddgg} dg}dd }xx| D ]p}t |dd}t|j|g|}t|jd d t| t |dd}t|j|g|}t|jd d t| q8W d S )Nr   r$   c             S   s   | d | d fS )Nr   r/   )r9   r:   r/   r/   r0   <lambda>      z.test_selectkbest_tiebreaking.<locals>.<lambda>)rt   r?   )r   r   fit_transformr   rr   r   )Xsr:   dummy_scorer9   selr)   r*   r/   r/   r0   test_selectkbest_tiebreaking  s    $
r   c              C   s   dddgdddgdddgdddgg} dg}dd }xx| D ]p}t |dd}t|j|g|}t|jd d t| t |dd}t|j|g|}t|jd d t| q8W d S )	Nr   r$   c             S   s   | d | d fS )Nr   r/   )r9   r:   r/   r/   r0   r     r   z3test_selectpercentile_tiebreaking.<locals>.<lambda>"   )rc   C   r?   )r   r   r   r   rr   r   )r   r:   r   r9   r   r)   r*   r/   r/   r0   !test_selectpercentile_tiebreaking  s    $
r   c              C   s   t dddgdddgg} ddg}xvtdD ]h}| d d |f }ttdd||}t|jd	 t	d| t
td
d||}t|jd	 t	d| q.W d S )Ni'  i'  i'  r$   r   )r   r$   r?   r?   )rt   )r?   r?   r   )rc   )r%   r   	itertoolspermutationsr   r   r   r   rr   r   r   )ZX0r:   Zpermr9   Xtr/   r/   r0   test_tied_pvalues  s    
r   c              C   s   t dddgdddgdddgg} ddgddgddgg}ttdd	| |}t|jd
 td| ttdd| |}t|jd
 td| d S )Ni'  i'  r   r   i  c   r$   r?   )rt   )r#   r?   r   )rc   )	r%   r   r   r   r   r   rr   r   r   )r9   r:   r   r/   r/   r0   test_scorefunc_multilabel  s    "
r   c              C   sx   t dddgdddgg} ddg}xPdD ]H}tt|d| |}|dddgg}t|d t d| d   q(W d S )Nr   r$   )r$   r?   r#   )rt   r?   r#   )r%   r   r   r   rf   rg   r
   r6   )ZX_trainZy_trainrB   r   ZX_testr/   r/   r0   test_tied_scores  s    
r   c              C   sv   dddgdddgdddgg} dddg}xJt tdttddfD ]0}t|j| | t|jdd	tddg q>W d S )
Nr   r$   r\   g      ?r?   r   )rc   T)indices)	r   r   r   r   rf   r
   rh   r%   r   )r9   r:   selectr/   r/   r0   	test_nans*  s    

r   c              C   sZ   dddgdddgdddgg} dddg}x.t tttttgD ]}tt|ddj| | q8W d S )Nr   r$   r\   g      ?r"   )Z
score_func)	r   r   r    r   r   r!   r   	TypeErrorrf   )r9   r:   ZSelectFeaturesr/   r/   r0   test_score_func_error7  s
    
r   c              C   s   dddgdddgdddgg} dddg}t ttddj| | t ttddj| | t ttdddj| | t ttdddj| | d S )	Nr   r$   r\   g      ?)rt   r3   ru   )rd   re   )r   r   r   rf   r!   )r9   r:   r/   r/   r0   test_invalid_k@  s    
r   c              C   s2   t ddd\} }d| d d df< ttt| | d S )Nr"   rL   )rA   rB   g       @r   )r   r   rz   r   )r9   r:   r/   r/   r0   test_f_classif_constant_featureL  s    r   c              C   s   t jd} | dd}| jdddd}tdd||tdd||tdd||t	dd||t
dd	||g}x<|D ]4}t| t d ttd
|j|}t|jd qW d S )Nr   (   r"   r3   )r2   g{Gz?)r}   )rc   )rt   zNo features were selected)r   r   )r%   r&   r'   rU   r5   r    rf   r   r   r   r   r
   rh   ri   r   rz   rg   r   rr   )r(   r9   r:   Zstrict_selectorsselectorr{   r/   r/   r0   test_no_feature_selectedT  s    
r   c              C   s   t dddddddddddd	\} }ttdd
}|| || }ttddd| || }t|| | }t	d}d|d d< t|| t
tdd}|| || }ttddd| || }t|| | }t	d}d|d d< t|| d S )Nr   rL   r$   r   r?   g        r"   F)rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   )rt   ru   )rd   re   r   )rc   rc   )r   r   r   rf   rg   r!   r
   rh   r%   ri   r   )r9   r:   rj   rk   rl   rm   rn   r/   r/   r0   test_mutual_info_classifi  s.    




r   c              C   s   t ddddddd\} }ttdd}|| || }t| ttddd	| || }t|| | }t	
d}d
|d d< t|| ttdd}|| || }ttddd	| || }t|| | }t	
d}d
|d d< t|| d S )Nr   r"   r?   Fr   )rA   rB   rC   rJ   rK   r   )rt   ru   )rd   re   r$   r>   )rc   rc   )r   r   r   rf   rg   r   r!   r
   rh   r%   ri   r   )r9   r:   rj   rk   rl   rm   rn   r/   r/   r0   test_mutual_info_regression  s*    




r   __main__)O__doc__
__future__r   r   r   numpyr%   Zscipyr   r   Znumpy.testingr   Zsklearn.utils.testingr   r   r   r	   r
   r   r   r   r   r   r   r   r   Zsklearn.utilsr   Z"sklearn.datasets.samples_generatorr   r   Zsklearn.feature_selectionr   r   r   r   r   r   r   r   r   r   r    r!   r1   r<   rR   rT   rY   r`   ra   ro   rs   rv   rw   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __name__r/   r/   r/   r0   <module>   st   8	 
"*	
