B
    (b                 @   s2  d Z ddlmZ ddlZddlZddlmZmZm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$ ddl#m%Z% ddl#m&Z& ddl#m'Z' ddl#m(Z( ddl#m)Z) ddl#m*Z* dd l#m+Z+ dd!l#m,Z, dd"l#m-Z- dd#l#m.Z. dd$l#m/Z/ dd%l#m0Z0 dd&l#m1Z1 dd'l#m2Z2 dd(l#m3Z3 dd)l#m4Z4 dd*l#m5Z5 dd+l6m7Z7 dd,l8m9Z9 dd-l8m:Z: dd.l8m;Z; dd/l<m=Z= dd0l<m>Z> dd1l?m@Z@ dd2lAmBZB dd3lCmDZD dd4lEmFZF eGd5ZHeId5d6 ZJeeKd7ZLeMd8d8d8d8d6d6d6d9d9d9d9d9geMdddd8d8d8d6d6d6d9d9d9geMdd8d6d9dd8d6d9dd8d6d9dd8d6geMd8d8d6d6d6d9d9d9d:d:d:d:d:d:d:d:gd8d8d8d8d6d6d6d9d9d9d9d9gd;d;d;d;d<d<d<d=d=d=d=d=gfZNe= ZOG d>d? d?ePZQed@dA ZRdBdC ZSddDdEZTddFdGZUdHdI ZVdJdK ZWdLdM ZXdNdO ZYdPdQ ZZdRdS Z[dTdU Z\dVdW Z]dXdY Z^dZd[ Z_d\d] Z`d^d_ Zaed`da Zbdbdc Zcddde Zddfdg Zedhdi Zfdjdk Zgdldm Zhdndo Zidpdq Zjdrds Zkdtdu Zldvdw Zmedxdy Zndzd{ Zod|d} Zpd~d Zqdd Zrdd Zsdd Ztedd Zudd Zvdd Zwdd Zxedd Zydd Zzdd Z{dd Z|dd Z}dd Z~dd Zdd Zdd Zdd Zdd Zdd Zdd ZdS )zTest the split module    )divisionN)
coo_matrix
csc_matrix
csr_matrix)stats)combinations)combinations_with_replacement)assert_true)assert_false)assert_equal)assert_almost_equal)assert_raises)assert_raises_regexp)assert_greater)assert_greater_equal)assert_not_equal)assert_array_almost_equal)assert_array_equal)assert_warns_message)assert_warns)assert_raise_message)ignore_warnings)_num_samples)MockDataFrame)cross_val_score)KFold)StratifiedKFold)
GroupKFold)TimeSeriesSplit)LeaveOneOut)LeaveOneGroupOut)	LeavePOut)LeavePGroupsOut)ShuffleSplit)GroupShuffleSplit)StratifiedShuffleSplit)PredefinedSplit)check_cv)train_test_split)GridSearchCV)RepeatedKFold)RepeatedStratifiedKFold)Ridge)_validate_shuffle_split)_CVIterableWrapper)_build_repr)load_digits)make_classification)six)zip)comb)SVC
                  123c            	   @   s@   e Zd ZdZdddZdddZd	d
 ZdddZdddZdS )MockClassifierz-Dummy classifier to test the cross-validationr   Fc             C   s   || _ || _d S )N)aallow_nd)selfr@   rA    rC   W/home/dcms/DCMS/lib/python3.7/site-packages/sklearn/model_selection/tests/test_split.py__init__P   s    zMockClassifier.__init__Nc          	   C   sF  || _ || _|	| _|
dk	r"|
|  | jr8|t|d}|jdkrP| jsPtd|dk	rt|j	d |j	d kd
|j	d |j	d  |dk	rt|j	d tttkd
|j	d ttt |dk	rd}t|j	d |j	d k|
|j	d |j	d  |dk	rBd	}t|j	tj	k|
|j	d |j	d
 tj	d tj	d
  | S )zThe dummy arguments are to test that this fit function can
        accept non-array arguments through cross-validation, such as:
            - int
            - str (this is actually array-like)
            - object
            - function
        Nr:   zX cannot be dr   zKMockClassifier extra fit_param sample_weight.shape[0] is {0}, should be {1}zIMockClassifier extra fit_param class_prior.shape[0] is {0}, should be {1}zRMockClassifier extra fit_param sparse_sample_weight.shape[0] is {0}, should be {1}zUMockClassifier extra fit_param sparse_param.shape is ({0}, {1}), should be ({2}, {3})r9   )	dummy_int	dummy_str	dummy_objrA   reshapelenndim
ValueErrorr	   shapeformatnpuniqueyP_sparse)rB   XYZsample_weightZclass_priorZsparse_sample_weightZsparse_paramrG   rH   rI   callbackfmtrC   rC   rD   fitT   s<    

zMockClassifier.fitc             C   s&   | j r|t|d}|d d df S )NrF   r   )rA   rJ   rK   )rB   TrC   rC   rD   predict   s    zMockClassifier.predictc             C   s   ddt | j  S )Ng      ?r9   )rP   absr@   )rB   rT   rU   rC   rC   rD   score   s    zMockClassifier.scorec             C   s   | j | jdS )N)r@   rA   )r@   rA   )rB   deeprC   rC   rD   
get_params   s    zMockClassifier.get_params)r   F)	NNNNNNNNN)NN)F)	__name__
__module____qualname____doc__rE   rX   rZ   r\   r^   rC   rC   rC   rD   r?   M   s   
  
*
r?   c               C   s  d} d}d}d}d}t ddgddgddgdd	gg}t ddddg}t ddddg}t ddddg}t }	t|}
t|}t|}t }t|}td
d}t	ddddg}d}d}d}d}d}d}d}d}| t
| ||||t
|||dg}xtt|	|
||||||g||||||||gD ]\}\}}t|| |||| t jt||||t|||| xD||||D ]2\}}tt |jjd tt |jjd qW t|t| q"W d}tt||	jd || tt||
jd || d S )Nr;   r7   r6   r9   r:   r8            r   )random_statezLeaveOneOut()zLeavePOut(p=2)z3KFold(n_splits=2, random_state=None, shuffle=False)z=StratifiedKFold(n_splits=2, random_state=None, shuffle=False)zLeaveOneGroupOut()zLeavePGroupsOut(n_groups=2)zVShuffleSplit(n_splits=10, random_state=0, test_size='default',
       train_size=None)z.PredefinedSplit(test_fold=array([1, 1, 2, 2]))iz%The 'X' parameter should not be None.)rP   arrayr   r!   r   r   r    r"   r#   r&   r4   	enumerater3   r   get_n_splitstestinglistsplitasarraydtypekindreprr   rM   ) 	n_samplesZn_unique_groupsn_splitspZn_shuffle_splitsrT   ZX_1drR   groupsZlooZlpokfskfloloZloposspsZloo_reprZlpo_reprZkf_reprZskf_reprZ	lolo_reprZ	lopo_reprZss_reprZps_reprZn_splits_expectedrg   cvZcv_reprtraintestmsgrC   rC   rD   (test_cross_validator_with_default_params   sX    "

r   c              C   sF  d} t jd}|jdd| dfd}|jdd| fd}|dd}|jdd| dfd}|jdd| fd}t tddt t t	 t
 t td	d
t t tddt t t|dg}x|D ]}t|||| t|||| yt|||| W q tk
r< }	 z$d}
d|
}|t|	ks,tW d d }	~	X Y qX qW d S )N   r9   r   r:   r7   )sizerF   )rt   g      ?)	test_size)n_groups)Z	test_fold)binaryZ
multiclassz/Supported target types are: {}. Got 'multilabel)rP   randomRandomStaterandintrJ   r   r!   r   r   r*   r+   r#   r%   r$   r    r"   r   r   r&   rl   rm   rM   rO   strAssertionError)rr   rngrT   rR   Zy_2dy_multilabelru   Z	splittersZsplittereZallowed_target_typesr~   rC   rC   rD   	test_2d_y   s.    
r   c             C   sH   t | t | } }t| |t   |d k	rDt| |t t| d S )N)setr   intersectionunionrange)r|   r}   rr   rC   rC   rD   check_valid_split   s    r   c       
      C   s   t |}|d k	r&t| |||| n| |||}t }d}x:| |||D ](\}}	t||	|d |d7 }||	 qNW t|| |d k	rt|tt| d S )Nr   )rr   r9   )r   r   rj   r   rm   r   updater   )
r{   rT   rR   ru   expected_n_splitsrr   Zcollected_test_samplesZ
iterationsr|   r}   rC   rC   rD   check_cv_coverage   s    
r   c           	   C   s^  t ddgddgddgg} t ddgddgddgddgd	d
gg}tttd| f t dddddg}td}ttdt||| t	
 " t	d t|||d dd W d Q R X t dddddg}ttt||| tttd tttd d}tt|td tt|td tttd tttd tttd tttd tttdd d d S )Nr9   r7   r:   r;   r8   rc   rd   re   	   r6   rF   zThe least populated classignore)ru   r   r   z>k-fold cross-validation requires at least one train/test splitg      ?g       @)rs   shuffle)rP   rh   rM   nextr   rm   r   r   Warningwarningscatch_warningssimplefilterr   r   r   	TypeError)X1X2rR   Zskf_3Zerror_stringrC   rC   rD   test_kfold_valueerrors  s0    (

r   c              C   s`   t d} td}t|| d d dd t d}td}t||d d dd tdtd| d S )N   r:   )rR   ru   r      r8   )rP   onesr   r   r   rj   )r   rv   r   rC   rC   rD   test_kfold_indices5  s    

r   c              C   s   ddgddgddgddgd	d
gg} t d| d d }t|\}}t|ddg t|ddg t|\}}t|ddg t|ddg t d| }t|\}}t|dddg t|ddg t|\}}t|ddg t|dddg d S )Nr9   r7   r:   r;   r8   rc   rd   re   r   r6   rF   r   )r   rm   r   r   )r   splitsr|   r}   rC   rC   rD   test_kfold_no_shuffleE  s    "r   c              C   sx  t dddddg } }td| |}t|\}}t|ddg t|ddg t|\}}t|ddg t|ddg t ddddddddg } }td| |}t|\}}t|ddddg t|dddg t|\}}t|dddg t|ddddg tdtd| | t d} d	d	d	d
d
d
d
g}dddddddg}t jt	td| |t	td| | d S )Nr;   r9   r   r7   r:   rd   r8   rc   r<   0)
rP   r   r   rm   r   r   r   rj   rk   rl   )rT   rR   r   r|   r}   y1y2rC   rC   rD    test_stratified_kfold_no_shuffle\  s.    
r   c              C   sJ  d} t | }t dgtd|   dgtd|    dgtd|    }xdD ]}xtd	|d
||D ]\}}tt || dkt| dd tt || dkt| dd tt || dkt| dd tt || dkt| dd tt || dkt| dd tt || dkt| dd qjW qPW d S )Ni  r;   g?r   g{Gz?r9   g{Gz?)FTr8   )r   r7   )	rP   r   rh   intr   rm   r   sumrK   )rr   rT   rR   r   r|   r}   rC   rC   rD   test_stratified_kfold_ratios  s    
"
"""""r   c              C   s~   xxt ddD ]j} tdjt| d}g }x|D ]\}}|t| q0W tt|t	| dk t
t||  qW d S )N   r   r8   )rT   r9   )r   r   rm   rP   r   appendrK   r	   maxminr   r   )rg   rv   sizes_r}   rC   rC   rD   test_kfold_balance  s    r   c        	      C   s   t d} dgd dgd  }xdD ]}td|d}x~tddD ]p}|| d | |d | }g }x|D ]\}}|t| qjW tt |t 	| dk t
t || q@W q$W d S )	Nr   r   r:   r9      )TF)r   r   )rP   r   r   r   rm   r   rK   r	   r   r   r   r   )	rT   rR   r   r{   rg   rw   r   r   r}   rC   rC   rD   test_stratifiedkfold_balance  s    

r   c              C   s   t d} t dddd}t dddd}td}td}xxt| |||||D ]V\\}}\}}\}	}
x6t|||	fdD ]"\}}ttt	||t| qW d||< qXW t
t|d d S )Nr:   Tr   )r   rf   r9   i,  r7   )r   rP   r   zerosr3   rm   r   r   rK   intersect1dr   r   )rv   Zkf2Zkf3rT   Z	all_foldsZtr1Zte1Ztr2Zte2Ztr3Zte3Ztr_aZtr_brC   rC   rD   test_shuffle_kfold  s    

2r   c           
   C   sJ  t d} dgd dgd  }t d}dgd dgd  }tdddd	}tdddd	}x\||fD ]P}t jt|| |t|| | t jt|||t||| qbW tddd
}tddd
}xv||fD ]j}xdt| |f||fD ]N}y&t j	t|j| t|j|  W n t
k
r0   Y qX t
d| qW qW d S )N   r   rd   r9   re      r:   T)r   rf   )r   zCThe splits for data, %s, are same even when random state is not set)rP   r   r   r   rk   r   rl   rm   r3   r   r   )rT   rR   r   r   rv   rw   r{   datarC   rC   rD   2test_shuffle_kfold_stratifiedkfold_reproducibility  s(    

&*r   c              C   s   t d} dgd dgd  }tdddd}tdddd}x@t|| ||| |D ]"\\}}\}}tt|t| qVW t|| |d dd d S )	N(   r      r9   r8   T)r   rf   )ru   r   )rP   r   r   r3   rm   r   r   r   )ZX_40rR   Zkf0Zkf1r   Ztest0test1rC   rC   rD   test_shuffle_stratifiedkfold  s    
r   c              C   s   t jd d t jd d  } }tddd}d}t|dd}t|| ||d }td	| t|d
 t|ddd}t|| ||d }t|d	 t|ddd}t|| ||d }t|d	 t|}t|| ||d }td| t|d
 d S )NiX  r6   g{Gzt?)Cgammar:   F)rs   r   )r{   gq=
ףp?g?Tr   )r   rf   r9   g(\?)	digitsr   targetr5   r   r   meanr   r   )rT   rR   modelrs   r{   Z
mean_scorerC   rC   rD   1test_kfold_can_detect_dependent_samples_on_digits  s"    	



	
r   c        	      C   s   t dddt} t dddt}t tdddt}x$tjD ]}t |dddt}qDW xt| |||D ]x\}}}}t|d |d  t|d |d  t|d |d  t|d |d  t|d |d  t|d |d  qrW d S )Ng?r   )r   rf   r7   r9   )	r#   rm   rT   rP   int32r2   integer_typesr3   r   )	Zss1Zss2Zss3typZss4t1t2t3Zt4rC   rC   rD   test_shuffle_split!  s    r   c              C   s  t d} t dddddddg}ttttdd| | ttttdd| | ttttddd| | t d} t dddddddddg	}tttddd	 ttttdd
d	| | ttttdd	d
| | ttttdd| | ttttdd| | d S )Nrd   r   r9   r7   r:   g?r   g      ?g333333?re   )
train_size)r   )rP   arangern   r   rM   r   r%   rm   )rT   rR   rC   rC   rD   "test_stratified_shuffle_split_init0  s$    

r   c              C   s   t dddddddddddddddg} d}d}td||ddt t| | }x,|D ]$\}}tt|| tt|| qXW d S )	Nr   r9   r7   r:   r8   r6   rc   )r   r   rf   )rP   rh   r%   rm   r   rK   r   )rR   r   r   sssr|   r}   rC   rC   rD   0test_stratified_shuffle_split_respects_test_sizeO  s    (r   c        	      C   s8  t ddddddddddddgt ddddddddddddgt dddddddddddddddgd t ddddddddddddddddgt dgd dgd  t d	d
 tdD ddddddddddddgddddddddddddgg} x8| D ].}tddddt t||}t |}t 	dt| }t|| }x|D ]\}}t
t || t ||  t t j|| ddd tt||  }t t j|| ddd tt||  }t||d tt|t| |j tt|| tt|| t
t jj||g  qTW q W d S )Nr9   r7   r:   r   r;   rF   i   2   c             S   s   g | ]}|gd |  qS )d   rC   ).0rg   rC   rC   rD   
<listcomp>`  s    z6test_stratified_shuffle_split_iter.<locals>.<listcomp>r   r<   r=   r>   rc   gQ?)r   rf   T)Zreturn_inverse)rP   rh   Zconcatenater   r%   rm   r   rK   Z
asanyarrayceilr   rQ   Zbincountfloatr   r   r   libZarraysetopsr   )	ZysrR   r   r   r   r|   r}   Zp_trainZp_testrC   rC   rD   "test_stratified_shuffle_split_iterZ  s6      *(
r   c                 s  d} d  fdd}xvdD ]l}t |d ddg }t d	|  dd
}dg| }dg| }d}xd|jt ||dD ]L\}}	|d7 }x:||f||	fgD ]&\}
}x|D ]}|
|  d7  < qW qW qxW t|  t|d	|  d	d	|   d\}}tt|| tt|	| ttt|	|	d t 
|}t|jd	|   t|| t| tt|d t|| }t|| }||| ||| qW d S )Nr8   i  c                s>   d  }t  |}x$| D ]}||}t||kd qW d S )Ng?z=An index is not drawn with chance corresponding to even draws)r   ZbinomZpmfr	   )Z
idx_countsrt   	thresholdZbfcountZprob)rs   rC   rD   assert_counts_are_ok  s    

z@test_stratified_shuffle_split_even.<locals>.assert_counts_are_ok)rc      r7   r   r9   g      ?)rs   r   rf   )rT   rR   )r   r   )rP   rh   r%   rm   r   r   r-   rK   r   r   rQ   r   r   )n_foldsr   rr   ru   r   Ztrain_countsZtest_countsn_splits_actualr|   r}   counteridsidZn_trainZn_testZgroup_countsZ	ex_test_pZ
ex_train_prC   )rs   rD   "test_stratified_shuffle_split_even}  s<    





r   c              C   s|   ddddgd ddgd  } t | }tdddd}t|j|| d	\}}tt ||g  tt ||t t	|  d S )
Nr   r9   r7   r:   r;   r8   g      ?)rs   r   rf   )rT   rR   )
rP   	ones_liker%   r   rm   r   r   union1dr   rK   )rR   rT   r   r|   r}   rC   rC   rD   4test_stratified_shuffle_split_overlap_train_test_bug  s    

r   c              C   s  xt ddgddgddgddggt ddgddgddgddgggD ]} t | }tdddd}t|j|| d\}}| | }| | }tt ||g  tt ||t 	t
|  t | d d df }t|t |d d df  t|t |d d df  qHW d S )Nr   r9   g      ?)rs   r   rf   )rT   rR   )rP   rh   r   r%   r   rm   r   r   r   r   rK   r   r   )rR   rT   r   r|   r}   y_trainy_testexpected_ratiorC   rC   rD   (test_stratified_shuffle_split_multilabel  s    $(
r   c        
      C   s   dddgdgd  dddg } dddgdgd  dddg }t | gd |gd  }t |}tdddd}t|j||d\}}|| }|| }t |d d d	f }	t|	t |d d d	f  t|	t |d d d	f  d S )
Nr9   r   i  r6   r   g      ?)rs   r   rf   )rT   rR   r;   )rP   rh   r   r%   r   rm   r   r   )
Zrow_with_many_zerosZrow_with_many_onesrR   rT   r   r|   r}   r   r   r   rC   rC   rD   4test_stratified_shuffle_split_multilabel_many_labels  s    
r   c        	      C   s   dt d } g }g }xBttdddtD ](\}\}}|| || || |< q.W g }g }t| }tt	t 
| |  x(| D ]\}}|| || qW t|| t|| d S )NrF   r6   r8   T)r   )rP   r   ri   r   rm   rT   r   r&   r   rK   rQ   rj   r   )	foldsZkf_trainZkf_testrg   Z	train_indZtest_indZps_trainZps_testrz   rC   rC   rD   %test_predefinedsplit_with_kfold_split  s     $



r   c           	   C   sX  xPt D ]F} tt|  }}d}d}t||dd}t| t|j||| d| t| }t	| }x|j
||| dD ]\}}	t|| }
t||	 }ttt|| | ttt||	 |
 t|| j||	 j |j tt||	g  ttt|t|t|  dk ttt|
td| t|  dk qzW qW d S )Nrc   gUUUUUU?r   )r   rf   )ru   r9   g      ?)test_groupsrP   r   rK   r$   rq   r   rj   rQ   rn   rm   r
   anyZin1dr   r   r   r	   r[   round)groups_irT   rR   rs   r   ZsloZl_uniquelr|   r}   Zl_train_uniqueZl_test_uniquerC   rC   rD   test_group_shuffle_split  s(    



r   c           
   C   sX  t  } tdd}tdd}tt| d tt|d tt|d tttddd xt| df|df|dffD ]\}\}}xttD ]\}}tt|}|dkr|n||d  d }	t	t| }
}t|j
|
||d	|	 t|}xp|j|
||d	D ]\\}}tt|| ||  g  tt|t| t| tt|| jd
 | qW qW qvW t| 
d d dddddgd t| j
ddddgd	d t|
d d tdd t|j
tdd	d tt | 
d d dtjdg W d Q R X tt |
d d dtjdg W d Q R X d}tt|| j
d d d  tt||j
d d d  d S )Nr9   )r   r7   zLeaveOneGroupOut()zLeavePGroupsOut(n_groups=1)zLeavePGroupsOut(n_groups=2)r:   zLeavePGroupsOut(n_groups=3))ru   r   r@   bcg      ?g?g333333?r;   rc   g        z*The 'groups' parameter should not be None.)r    r"   r   rq   ri   r   rK   rP   rQ   r   rj   rn   rm   r   r   tolistr	   rN   r   r   rM   naninfr   )ZlogoZlpgo_1Zlpgo_2jr{   Zp_groups_outrg   r   r   rs   rT   rR   Z
groups_arrr|   r}   r~   rC   rC   rD   test_leave_one_p_group_out*  sH    


(
 
 r   c           
   C   s  t ddddddddg} t t| }t j| dd}t j|| d}t j|| d}tddj|| d}tddj|| d}d|d d < xP||f||fgD ]<\}}x2t||D ]$\\}	}
\}}t|	| t|
| qW qW t	dtddj
||| d	 t	dt j
||| d	 d S )
Nr   r9   r7   T)copy)ru   )r   r:   )rR   ru   )rP   rh   r   rK   r    rm   r"   r3   r   r   rj   )ru   rT   Zgroups_changingrx   Zlolo_changingZlploZlplo_changingZlloZllo_changingr|   r}   Z
train_chanZ	test_chanrC   rC   rD   $test_leave_group_out_changing_groupsd  s"    

r   c           	   C   s   t d }  }}ttdtt | || t d }  }}d|}tt|tt | || t d }  }}d|}tt|ttdd| || t 	d }  }}d|}tt|ttdd| || d S )Nr   zFound array with 0 sample(s)r9   zcThe groups parameter contains fewer than 2 unique groups ({}). LeaveOneGroupOut expects at least 2.zThe groups parameter contains fewer than (or equal to) n_groups (3) numbers of unique groups ({}). LeavePGroupsOut expects that at least n_groups + 1 (4) unique groups be presentr:   )r   )
rP   r   r   rM   r   r    rm   rO   r"   r   )rT   rR   ru   r~   rC   rC   rD   :test_leave_one_p_group_out_error_on_fewer_number_of_groups}  s$    r  c              C   s2   x,t tfD ] } tt| dd tt| dd q
W d S )Nr   )	n_repeatsg      ?)r*   r+   r   rM   )r{   rC   rC   rD   test_repeated_cv_value_errors  s    r  c              C   s  ddgddgddgddgd	d
gg} d}t dd|d}xtdD ]}|| }t|\}}t|ddg t|dddg t|\}}t|dddg t|ddg t|\}}t|ddg t|dddg t|\}}t|dddg t|ddg ttt| q>W d S )Nr9   r7   r:   r;   r8   rc   rd   re   r   r6   i{ic)rs   r  rf   r   )r*   r   rm   r   r   r   StopIteration)rT   rf   rkfr   r   r|   r}   rC   rC   rD   &test_repeated_kfold_determinstic_split  s*    "
r  c              C   s,   d} d}t | |}| | }t||  d S )Nr:   r;   )r*   r   rj   )rs   r  r  r   rC   rC   rD   $test_get_n_splits_for_repeated_kfold  s
    
r  c              C   s,   d} d}t | |}| | }t||  d S )Nr:   r;   )r+   r   rj   )rs   r  rskfr   rC   rC   rD   /test_get_n_splits_for_repeated_stratified_kfold  s
    
r	  c              C   s  ddgddgddgddgd	d
gg} dddddg}d}t dd|d}xtdD ]}|| |}t|\}}t|ddg t|dddg t|\}}t|dddg t|ddg t|\}}t|ddg t|dddg t|\}}t|dddg t|ddg ttt| qLW d S )Nr9   r7   r:   r;   r8   rc   rd   re   r   r6   r   iqs)rs   r  rf   )r+   r   rm   r   r   r   r  )rT   rR   rf   r  r   r   r|   r}   rC   rC   rD   1test_repeated_stratified_kfold_determinstic_split  s,    "r
  c               C   s   t tt t tttddd t tttdddd t tttdtdtdd t tttddd t tttddd	d t tttddd
 t tttdtd t tttdddd d S )Nr:   g?)r   g333333?)r   r   Z
wrong_type)r   r7   r;   )Zsome_argument*   r6   FT)r   stratify)r   rM   r(   r   rP   float32r   rC   rC   rC   rD   test_train_test_split_errors  s    
r  c              C   s.  t dd} t| }t d}t| |d dd}|\}}}}tt|t| t|d d df |d  t|d d df |d  t| || }|\}}}}	}}t	t
|t t	t
|t t dddd	d
}
t dddd}t|
|}t|d jd t|d jd t|d
 jd t|d	 jd t ddddd
d
d
d
g}xtd
ddddgd
dd
ddgD ]d\}}t|||dd\}}tt|| tt|t| t| tt |dkt |d
k qnW t d}xHdD ]@}t|d|d\}}t|ddg t|ddd
d	ddddg qW d S )Nr   )r6   r6   r6   g      ?)r   r   r   i,  r8   r:   r7   i  rd   r   )rd   r8   r:   r7   r9   )r:   r8   r:   r7   )rd   rd   r   )r:   rd   r   r;   g      ?g      ?rc   )r   r  rf   )r7   g?F)r   r   re   r   )rP   r   rJ   r   r(   r   rK   r   r   r	   
isinstancerl   rN   rh   r3   r   )rT   X_srR   rm   X_trainX_testr   r   Z	X_s_trainZX_s_testZX_4dZy_3dr   Zexp_test_sizer|   r}   rC   rC   rD   test_train_test_split  sB    

$

r  c              C   sx   t g} yddlm} | | W n tk
r4   Y nX x<| D ]4}|t}t|\}}tt|| tt|| q<W d S )Nr   )	DataFrame)	r   Zpandasr  r   ImportErrorrT   r(   r	   r  )typesr  InputFeatureTypeX_dfr  r  rC   rC   rD   train_test_split_pandas/  s    
r  c              C   s\   t dd} tttg}x<|D ]4}|| }t|\}}tt|t tt|t q W d S )Nr   )r6   r6   )	rP   r   rJ   r   r   r   r(   r	   r  )rT   Zsparse_typesr  r  r  r  rC   rC   rD   train_test_split_sparse@  s    

r  c              C   s@   t t} t| \}}tt|t  tt|t  t| \}}d S )N)r   rT   r(   r	   r  )r  r  r  ZX_train_arrZ
X_test_arrrC   rC   rD   train_test_split_mock_pandasL  s
    r  c              C   s   t d} dgd dgd  }t t dt df}| }xdD ]}t| ||rX|nd dd\}}}}t| ||rx|nd dd\}	}
}}t| ||r|nd dd\}}}}t j||	 t j|| t j|| t j|| qFW d S )	Nrd   r<   r;   r   r:   )TFr   )r  rf   )rP   r   hstackr   r   r(   rk   r   )rT   r   r   y3r  ZX_train1ZX_test1Zy_train1Zy_test1ZX_train2ZX_test2Zy_train2Zy_test2ZX_train3ZX_test3Zy_train3Zy_test3rC   rC   rD   train_test_split_list_inputU  s    

r  c               C   s   t ttd d d t ttdd t ttdd t ttddd t ttdd t tttd	dt t tttd
dt t tttdddt d S )N)r   r   g       @)r   g      ?g?gffffff?y              ?)r   r   r6   re   r:   )r   rM   r#   r   rm   rT   rC   rC   rC   rD   test_shufflesplit_errorsj  s    
r  c              C   s@   t dd} ttdd | tD tdd | tD  d S )N   )rf   c             s   s   | ]\}}|V  qd S )NrC   )r   r@   r   rC   rC   rD   	<genexpr>  s    z1test_shufflesplit_reproducible.<locals>.<genexpr>c             s   s   | ]\}}|V  qd S )NrC   )r   r@   r   rC   rC   rD   r!    s    )r#   r   rl   rm   rT   )ry   rC   rC   rD   test_shufflesplit_reproducible{  s    
r"  c              C   s   t ddd} td}dgd dgd  }ttdtdf}| }tjt| 	||t| 	|| tjt| 	||t| 	|| d S )	Nr7   r  )r   rf   rd   r<   r;   r   r:   )
r%   rP   r   r  r   r   rk   r   rl   rm   )r   rT   r   r   r  rC   rC   rD   &test_stratifiedshufflesplit_list_input  s    
r#  c              C   sX   t jdt jddd} t j| dd d f< t ddg| jd d }t| |dd	d
 d S )N   )ro   r6   rF   r7   r   r9   g?r  )r   rf   )rP   r   float64rJ   r   repeatrN   r(   )rT   rR   rC   rC   rD    test_train_test_split_allow_nans  s    r'  c        
      C   s  t d} tddd}t jttd| t||  t dddddddddg	}td|dd}t jtt	d| |t|| | t dddddddddg	}td|dd}t jtt	d| |t|| | |
d	d}td|dd}t jtt	d| |t|| | tt tt	d| |d ttd| |d k t d
} t ddddgddddgddddgddddgddddgg}td|dd}t jttd| t||  t ddgddgddgddgddgg}td|dd}t jttd| t||  t d} t dddddddddg	}td|dd}tjdd ddlm	} W d Q R X t||dd}	t jt|| |t|	  tttdd d S )Nr   r:   F)
classifierr   r9   Tr7   rF   r8   )record)r   )r   rx   )r{   )rP   r   r'   rk   r   rl   r   rm   rh   r   rJ   r
   allr   r   r   sklearn.cross_validationr   rM   )
rT   r{   Zy_binaryy_multiclassZy_multiclass_2dr   Zy_multioutputZcv1OldSKFZcv2rC   rC   rD   test_check_cv  sF    
&
"&(&
r.  c        	      C   sZ  t dddddddddg	} tjdd ddlm} W d Q R X || dd}t|}t jt	|t	|
  tt||  td	d

tt}t|}t jt	|
ttt	|
tt td	dd
tt}t|}t jt	|
ttt	|
tt y.t jt	|
ttt	|
tt d}W n tk
rJ   d}Y nX t|d d S )Nr   r9   r7   T)r)  )r   r:   )r   r8   )rs   )rs   r   FzVIf the splits are randomized, successive calls to split should yield different results)rP   rh   r   r   r+  r   r.   rk   r   rl   rm   rK   rj   r   rT   rR   r'   r   r
   )	r,  r-  r{   Zwrapped_old_skfZkf_iterZkf_iter_wrappedZkf_randomized_iterZkf_randomized_iter_wrappedZsplits_are_equalrC   rC   rD   test_cv_iterable_wrapper  s,    
r/  c           (   C   s  t jd} d}d}d}t | }}d| }| d||}|| }tt | t |}	t|d}
x*t	|

|||D ]\}\}}||	|< qzW tt|	t| x,t |	D ]}t|tt|	|k|  qW x.t |D ] }ttt |	||k d qW t j|td}x:|

|||D ](\}}ttt || || d q W t d	d
ddd
dddddddddd
ddddddddddddddd d!ddd"d#d$d%d&g&}tt |}t|}d}d| }|| }t | }}t |}	x,t	|

|||D ]\}\}}||	|< qW tt|	t| x.t |	D ] }t|tt|	|k|  q0W t D td't x0t |D ]"}ttt |	||k d qvW W d Q R X t j|td}x:|

|||D ](\}}ttt || || d qW t|

||| }x>t|

||||D ]&\\}}\}}t|| t|| qW t dddd(d(g}t t| }}ttd)ttd*d
||| d S )+Nr   r   i  r8   g?)rs   r9   )ro   ZAlbertZJeanZBertrandZMichelZFrancisZRobertZRachelZLoisZMichelleZBernardZMarionZLauraZFranckZJohnZGaelZAnnaZAlixZDavidZTonyZAbelZBeckyZMadmoodZCaryZMaryZ	AlexandreZBarackZAbdoulZRashaXiZSilviar   r7   z%Cannot have number of splits.*greaterr:   )rP   r   r   r   r   rK   rQ   r   r   ri   rm   r   r   r[   r   rn   objectr   rh   r   r   r   DeprecationWarningrl   r   r3   r   r   rM   r   )r   r   rr   rs   rT   rR   Z	toleranceru   Zideal_n_groups_per_foldr   Zlkfrg   r   r}   groupr|   Zcv_iterZtrain1r   Ztrain2Ztest2rC   rC   rD   test_group_kfold  st    

  $



 
,$
r4  c              C   sR  ddgddgddgddgd	d
gddgddgg} t tdttdd|  td}|| d d }t|\}}t|ddg t|ddg t|\}}t|ddddg t|ddg td| }t|\}}t|dddg t|ddg t|\}}t|dddddg t|ddg td| }tt|}t||	  t|d d S )Nr9   r7   r:   r;   r8   rc   rd   re   r   r6   r         r   z$Cannot have number of folds.*greater)rs   rF   r   )
r   rM   r   r   rm   r   rK   rl   r   rj   )rT   Ztscvr   r|   r}   r   rC   rC   rD   test_time_series_cvO  s.    .r7  c             C   sb   x\t | |D ]N\\}}\}}t|| tt||k tt|| d}t|||d   qW d S )Nr   )r3   r   r	   rK   r   )r   check_splitsmax_train_sizer|   r}   Zcheck_trainZ
check_testZsuffix_startrC   rC   rD   !_check_time_series_max_train_sizeu  s
    
r:  c              C   s~   t d} tdd| }tddd| }t||dd tddd| }t||dd tddd| }t||dd d S )N)rc   r9   r:   )rs   )rs   r9  )r9  r7   r8   )rP   r   r   rm   r:  )rT   r   r8  rC   rC   rD   test_time_series_max_train_size}  s    
r;  c           	   C   s   t jd} tdddd\}}| ddd}t t t t t	dddg}xFt
|dD ]8\}}tt dd	d
gi|d}t|||||d|id qVW d S )Nr   r   r7   )rr   Z	n_classesrf   r8   r:   )rs   rf   alphar9   g?)Z
param_gridr{   ru   )rT   rR   ru   r{   Z
fit_params)rP   r   r   r1   r   r    r   r   r   r%   r   r)   r,   r   )r   rT   rR   ru   ZcvsZinner_cvZouter_cvZgsrC   rC   rD   test_nested_cv  s    r=  c               C   sB   t ttdd t ttdd t ttdd t tttddd d S )Ng      ?)r   r:   )r   FutureWarningr#   r$   r%   r(   r   rC   rC   rC   rD   test_train_test_default_warning  s
    r?  c              C   s&   G dd d} t t| ddd d S )Nc               @   s   e Zd ZdddZdd ZdS )z%test_build_repr.<locals>.MockSplitterr   Nc             S   s   || _ || _|| _d S )N)r@   r   r   )rB   r@   r   r   rC   rC   rD   rE     s    z.test_build_repr.<locals>.MockSplitter.__init__c             S   s   t | S )N)r/   )rB   rC   rC   rD   __repr__  s    z.test_build_repr.<locals>.MockSplitter.__repr__)r   N)r_   r`   ra   rE   r@  rC   rC   rC   rD   MockSplitter  s   
rA  r8   rc   zMockSplitter(a=5, b=6, c=None))r   rq   )rA  rC   rC   rD   test_build_repr  s    	rB  )N)N)rb   
__future__r   r   numpyrP   Zscipy.sparser   r   r   Zscipyr   	itertoolsr   r   Zsklearn.utils.testingr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   Zsklearn.utils.validationr   Zsklearn.utils.mockingr   Zsklearn.model_selectionr   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   Zsklearn.linear_modelr,   Zsklearn.model_selection._splitr-   r.   r/   Zsklearn.datasetsr0   r1   Zsklearn.externalsr2   Zsklearn.externals.six.movesr3   Zsklearn.utils.fixesr4   Zsklearn.svmr5   r   rT   r   rR   ZeyerS   rh   r   r   r1  r?   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r	  r
  r  r  r  r  r  r  r  r"  r#  r'  r.  r/  r4  r7  r:  r;  r=  r?  rB  rC   rC   rC   rD   <module>   s   
  &(?>

/$ ,#3%: 0	3([&