B
    ²ô`õ^  ã               @   s  d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlm	Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddgZ!G dd„ dej"ƒZ#G dd„ de#ƒZ$dS )zThe Wishart distribution class.é    )Úabsolute_import)Údivision)Úprint_functionN)Úv2)Úchain)Úcholesky_outer_product)Úfill_scale_tril)Úsoftplus)Útransform_diagonal)Údistribution)Úassert_util)Ú
dtype_util)Úprefer_static)Úreparameterization)Úsamplers)Útensor_util)Útensorshape_utilÚWishartLinearOperatorÚWishartTriLc                   sü   e Zd ZdZd<‡ fdd„	Zedd„ ƒZd	d
„ Zdd„ Zedd„ ƒZ	edd„ ƒZ
dd„ Zdd„ Zd=dd„Zdd„ Zdd„ Zdd„ Zdd„ Zdd „ Zd!d"„ Zd#d$„ Zd>d&d'„Zd?d)d*„Zd@d+d,„ZdAd.d/„ZdBd1d2„ZdCd4d5„Zd6d7„ Zd8d9„ Zd:d;„ Z‡  ZS )Dr   a}  The matrix Wishart distribution on positive definite matrices.

  This distribution is defined by a scalar number of degrees of freedom `df` and
  an instance of `LinearOperator`, which provides matrix-free access to a
  symmetric positive definite operator, which defines the scale matrix.

  #### Mathematical Details

  The probability density function (pdf) is,

  ```none
  pdf(X; df, scale) = det(X)**(0.5 (df-k-1)) exp(-0.5 tr[inv(scale) X]) / Z
  Z = 2**(0.5 df k) |det(scale)|**(0.5 df) Gamma_k(0.5 df)
  ```

  where:

  * `df >= k` denotes the degrees of freedom,
  * `scale` is a symmetric, positive definite, `k x k` matrix,
  * `Z` is the normalizing constant, and,
  * `Gamma_k` is the [multivariate Gamma function](
    https://en.wikipedia.org/wiki/Multivariate_gamma_function).

  #### Examples

  See the `Wishart` class for examples of initializing and using this class.
  FTNc       	   	      st   t tƒ ƒ}|| _t |¡P}tj||gtjd}|| _t	j
|d|d| _tt| ƒj|||tj||d W dQ R X dS )a  Construct Wishart distributions.

    Args:
      df: `float` or `double` tensor, the degrees of freedom of the
        distribution(s). `df` must be greater than or equal to `k`.
      scale: `float` or `double` instance of `LinearOperator`.
      input_output_cholesky: Python `bool`. If `True`, functions whose input or
        output have the semantics of samples assume inputs are in Cholesky form
        and return outputs in Cholesky form. In particular, if this flag is
        `True`, input to `log_prob` is presumed of Cholesky form and output from
        `sample`, `mean`, and `mode` are of Cholesky form.  Setting this
        argument to `True` is purely a computational optimization and does not
        change the underlying distribution; for instance, `mean` returns the
        Cholesky of the mean, not the mean of Cholesky factors. The `variance`
        and `stddev` methods are unaffected by this flag.
        Default value: `False` (i.e., input/output does not have Cholesky
        semantics).
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value '`NaN`' to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      TypeError: if scale is not floating-type
      TypeError: if scale.dtype != df.dtype
      ValueError: if df < k, where scale operator event shape is
        `(k, k)`
    )Z
dtype_hintÚdf)ÚnameÚdtype)r   Úvalidate_argsÚallow_nan_statsZreparameterization_typeÚ
parametersr   N)ÚdictÚlocalsÚ_input_output_choleskyÚtfÚ
name_scoper   Úcommon_dtypeÚfloat32Ú_scaler   Úconvert_nonref_to_tensorÚ_dfÚsuperr   Ú__init__r   ZFULLY_REPARAMETERIZED)	Úselfr   ÚscaleÚinput_output_choleskyr   r   r   r   r   )Ú	__class__© úi/home/dcms/DCMS/lib/python3.7/site-packages/tensorflow_probability/python/distributions/_numpy/wishart.pyr&   L   s    (

zWishartLinearOperator.__init__c             C   s   | j S )z*Wishart distribution degree(s) of freedom.)r$   )r'   r+   r+   r,   r   „   s    zWishartLinearOperator.dfc             C   s   | j }|j|dd ¡ S )NT)Úadjoint_arg)r"   ÚmatmulÚto_dense)r'   r(   r+   r+   r,   Ú_square_scale‰   s    z#WishartLinearOperator._square_scalec             C   s   | j r| j ¡ S |  ¡ S dS )z"Wishart distribution scale matrix.N)r)   r"   r/   r0   )r'   r+   r+   r,   Úscale_matrix   s    
z"WishartLinearOperator.scale_matrixc             C   s   | j S )z8Wishart distribution scale matrix as an Linear Operator.)r"   )r'   r+   r+   r,   r(   ”   s    zWishartLinearOperator.scalec             C   s   | j S )zEBoolean indicating if `Tensor` input/outputs are Cholesky factorized.)r   )r'   r+   r+   r,   r)   ™   s    z+WishartLinearOperator.input_output_choleskyc             C   s   | j  ¡ }t ||g¡S )N)r"   Údomain_dimension_tensorr   Ústack)r'   Ú	dimensionr+   r+   r,   Ú_event_shape_tensorž   s    
z)WishartLinearOperator._event_shape_tensorc             C   s   | j j}t ||g¡S )N)r"   Zdomain_dimensionr   ZTensorShape)r'   r4   r+   r+   r,   Ú_event_shape¢   s    z"WishartLinearOperator._event_shapec             C   s0   |d krt  | j¡n|}t  t  |¡| j ¡ ¡S )N)r   Úconvert_to_tensorr   Zbroadcast_dynamic_shapeÚshaper"   Úbatch_shape_tensor)r'   r   r+   r+   r,   Ú_batch_shape_tensor¦   s    z)WishartLinearOperator._batch_shape_tensorc             C   s   t  | jj| jj¡S )N)r   Úbroadcast_static_shaper   r8   r"   Úbatch_shape)r'   r+   r+   r,   Ú_batch_shape«   s    z"WishartLinearOperator._batch_shapec             C   sª  t  | j¡}|  |¡}|  ¡ }t  |¡d }|d }t  |g||gd¡}tj|dd\}	}
tj	|dd| j
|	d}|t j| j ¡ t |j
¡d }tj|g|  d	| |  ¡ ¡d	| j
|
d
}t j |dd¡}t j |t  |¡¡}t  t  d|¡dggd¡}t j||d}t  ||d g|d | ggd¡}t  ||¡}| j |¡}t  |||ggd¡}t  ||¡}t  |d gt  d|d ¡gd¡}t j||d}| js¦t j||dd}|S )Nr   é   ZWishart)Zsaltg        g      ð?)r8   ZmeanÚstddevr   Úseed)r   g      à?)r8   ÚalphaÚbetar   r@   éÿÿÿÿé   )ÚaÚpermT)Ú	adjoint_b)r   r7   r   r:   r5   r8   Úconcatr   Z
split_seedÚnormalr   Úonesr"   r9   r   Z
base_dtypeÚgammaÚ_multi_gamma_sequenceÚ
_dimensionÚlinalgZ	band_partZset_diagÚsqrtÚrangeÚ	transposeÚreshaper.   r)   )r'   Únr@   r   r<   Úevent_shapeZbatch_ndimsÚndimsr8   Znormal_seedZ
gamma_seedÚxZexpanded_dfÚgrF   r+   r+   r,   Ú	_sample_n¯   s@    
""zWishartLinearOperator._sample_nc             C   sb  | j r|}ntj |¡}t | j¡}|  |¡}|  ¡ }|  ¡ }t 	|¡}t 
t |¡d |¡| }t tj|gtjdt |¡gd¡}	t ||	¡}t 	|¡}
|
t |¡ d }t |¡d |… }|}t t ||
¡t d|¡gd¡}tj||d}tj|tjdt |	d |… ¡ }tj|	|d… tj|tjd|ggdd}t ||¡}| j |¡}tjt |¡d d… ||gdd}t ||¡}t t |
| |
¡t d|
| ¡gd¡}tj||d}tjt |¡ddgd}tjtj tj |¡¡dgd}|| d | d	|  | j|| jd
 }t 	|j¡d k	r^t 	| j¡d k	r^t |t  |jd d… | j¡¡ |S )Né   )r   r   )rE   rF   éþÿÿÿ)ÚaxisrC   g      ð?g      à?)r   r(   )!r)   r   rN   Zcholeskyr7   r   r:   r5   rM   ZrankÚmaximumÚsizerH   rJ   Úint32r8   rR   rP   rQ   ÚcastZreduce_prodr"   ZsolveÚ
reduce_sumÚsquareÚmathÚlogÚ	diag_partÚ_log_normalizationr   r<   Z	set_shaper;   )r'   rV   Zx_sqrtr   r<   rT   r4   Zx_ndimsZnum_singleton_axes_to_prependZ!x_with_prepended_singletons_shaperU   Zsample_ndimsZsample_shapeZscale_sqrt_inv_x_sqrtrF   Zlast_dim_sizer8   Ztrace_scale_inv_xZhalf_log_det_xZlog_probr+   r+   r,   Ú	_log_probê   sd    




	zWishartLinearOperator._log_probc             C   sp   |   ¡ }d| d }t | j¡}d| }|||t d¡   d| | j ¡   |  ||¡ || |  	||¡  S )Ng      à?g       @rY   )
rM   r   r7   r   rb   rc   r"   Úlog_abs_determinantÚ_multi_lgammaÚ_multi_digamma)r'   r4   Zhalf_dp1r   Zhalf_dfr+   r+   r,   Ú_entropyJ  s    2zWishartLinearOperator._entropyc             C   sD   t  | j¡}|dt jt jf }| jr8t  |¡| j ¡  S ||  ¡  S )N.)	r   r7   r   Únewaxisr)   rO   r"   r/   r0   )r'   r   r+   r+   r,   Ú_meanT  s
    zWishartLinearOperator._meanc             C   sh   t  | j¡}|dt jt jf }| jj| jdd}| ¡ dt jf }|t  | ¡ ¡t j||dd  }|S )N.T)r-   )rG   )	r   r7   r   rk   r"   r.   rd   ra   r/   )r'   r   rV   ÚdÚvr+   r+   r,   Ú	_variance^  s    "zWishartLinearOperator._variancec             C   st   t  | j¡}|dt jt jf }||  ¡  d }t  |dk t |j¡t	j
ƒ|¡}| jrht  |¡| j ¡  S ||  ¡  S )N.g      ð?g        )r   r7   r   rk   rM   Úwherer   Zas_numpy_dtyper   ÚnpÚnanr)   rO   r"   r/   r0   )r'   r   Úsr+   r+   r,   Ú_modei  s    zWishartLinearOperator._modeÚmean_log_detc          	   C   sL   |   |¡8 |  ¡ }|  d| j |¡|t d¡  d| j ¡   S Q R X dS )z8Computes E[log(det(X))] under this Wishart distribution.g      à?g       @rY   N)Ú_name_and_control_scoperM   ri   r   rb   rc   r"   rg   )r'   r   r4   r+   r+   r,   ru   t  s    z"WishartLinearOperator.mean_log_detÚlog_normalizationc             C   sd   |d krt  | j¡n|}|d kr&| jn|}|  ¡ }|| ¡  d| | t d¡  |  d| |¡ S )Ng      à?g       @)	r   r7   r   r"   rM   rg   rb   rc   rh   )r'   r   r(   r   r4   r+   r+   r,   re   |  s
     z(WishartLinearOperator._log_normalizationc          	   C   s$   |   |¡ | j||dS Q R X dS )z.Computes the log normalizing constant, log(Z).)r   r   N)rv   re   )r'   r   r   r+   r+   r,   rw   „  s    z'WishartLinearOperator.log_normalizationÚmulti_gamma_sequencec          	   C   sT   t  |¡@ t  t jd| jddd|  t  |t j¡¡}||dt jf  S Q R X dS )zFCreates sequence used in multivariate (di)gamma; shape = shape(a)+[p].g        )r   g      à?.N)r   r   ZlinspaceZconstantr   r_   r^   rk   )r'   rE   Úpr   Úseqr+   r+   r,   rL   ‰  s
    z+WishartLinearOperator._multi_gamma_sequenceÚmulti_lgammac          	   C   sV   t  |¡B |  ||¡}d| |d  t tj¡ t jt j |¡dgd S Q R X dS )z>Computes the log multivariate gamma function; log(Gamma_p(a)).g      Ð?g      ð?rC   )r[   N)r   r   rL   rb   rc   Úpir`   Úlgamma)r'   rE   ry   r   rz   r+   r+   r,   rh   ’  s    z#WishartLinearOperator._multi_lgammaÚmulti_digammac          	   C   s:   t  |¡& |  ||¡}t jt j |¡dgdS Q R X dS )z5Computes the multivariate digamma function; Psi_p(a).rC   )r[   N)r   r   rL   r`   rb   Zdigamma)r'   rE   ry   r   rz   r+   r+   r,   ri   ™  s    z$WishartLinearOperator._multi_digammac          	   C   sp   t  d¡\ t j | jjd ¡dkr>t j| j ¡ | jjddS t j	t j | jjd ¡| jjddS W dQ R X dS )z,Scalar dimension of underlying vector space.r4   rC   N)r   r   )
r   r   ÚcompatÚdimension_valuer"   r8   r_   r2   r   r7   )r'   r+   r+   r,   rM   Ÿ  s    z WishartLinearOperator._dimensionc             C   s\   t jtjtj| jd| jdtj| jdg| jd}| j	r>|S t jt
j| jd|g| jdS )N)r   )Zdiag_bijectorr   )Úchain_bijectorZChainÚtransform_diagonal_bijectorZTransformDiagonalÚsoftplus_bijectorZSoftplusr   Úfill_scale_tril_bijectorZFillScaleTriLr)   Úcholesky_outer_product_bijectorZCholeskyOuterProduct)r'   Ztril_bijectorr+   r+   r,   Ú_default_event_space_bijector­  s    

z3WishartLinearOperator._default_event_space_bijectorc          	   C   s@  g }|rJt  | jj¡s(td | jj¡ƒ‚| jjs8tdƒ‚t  | j	| jg¡ t
 | j	¡}t
j | jjd ¡}d}|rÞ|d k	rÞ|d k	rÞt |¡}t |¡}|jsª|tjdf }|js¾|tjdf }t ||k ¡rÜt| ||¡ƒ‚n^| jr<|t | j	¡ks
|t | j¡kr<t
 | j	¡}|  ¡ }| tj||| ||¡d¡ |S )Nz,scale.dtype={} is not a floating-point type.zscale must be square.rC   zfDegrees of freedom (`df = {}`) cannot be less than dimension of scale matrix (`scale.dimension = {}`)..)Úmessage)r   Zis_floatingr"   r   Ú	TypeErrorÚformatÚ	is_squareÚ
ValueErrorZassert_same_float_dtyper$   r   Zget_static_valuer   r€   r8   rq   Zasarrayrk   Úanyr   r   Úis_refr7   rM   Úappendr   Zassert_less_equal)r'   Úis_initÚ
assertionsZdf_valZdim_valÚmsgr   r4   r+   r+   r,   Ú_parameter_control_dependencies¿  s:    

z5WishartLinearOperator._parameter_control_dependencies)FFTN)N)ru   )NNrw   )Nrw   )rx   )r{   )r~   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r&   Úpropertyr   r0   r1   r(   r)   r5   r6   r:   r=   rX   rf   rj   rl   ro   rt   ru   re   rw   rL   rh   ri   rM   r†   r’   Ú__classcell__r+   r+   )r*   r,   r   /   s:      2
;`





	

c                   sF   e Zd ZdZd‡ fdd„	Zedd„ ƒZed	d
„ ƒZ‡ fdd„Z	‡  Z
S )r   a¾  The matrix Wishart distribution parameterized with Cholesky factors.

  This distribution is defined by a scalar degrees of freedom `df` and a scale
  matrix, expressed as a lower triangular Cholesky factor.

  #### Mathematical Details

  The probability density function (pdf) is,

  ```none
  pdf(X; df, scale) = det(X)**(0.5 (df-k-1)) exp(-0.5 tr[inv(scale) X]) / Z
  Z = 2**(0.5 df k) |det(scale)|**(0.5 df) Gamma_k(0.5 df)
  ```

  where:
  * `df >= k` denotes the degrees of freedom,
  * `scale` is a symmetric, positive definite, `k x k` matrix equivalent to
    `scale_tril * scale_tril.T`,
  * `Z` is the normalizing constant, and,
  * `Gamma_k` is the [multivariate Gamma function](
    https://en.wikipedia.org/wiki/Multivariate_gamma_function).


  #### Examples

  ```python
  # Initialize a single 3x3 Wishart with Cholesky factored scale matrix and 5
  # degrees-of-freedom.(*)
  df = 5
  chol_scale = tf.linalg.cholesky(...)  # Shape is [3, 3].
  dist = tfd.WishartTriL(df=df, scale_tril=chol_scale)

  # Evaluate this on an observation in R^3, returning a scalar.
  x = ...  # A 3x3 positive definite matrix.
  dist.prob(x)  # Shape is [], a scalar.

  # Evaluate this on a two observations, each in R^{3x3}, returning a length two
  # Tensor.
  x = [x0, x1]  # Shape is [2, 3, 3].
  dist.prob(x)  # Shape is [2].

  # (*) - To efficiently create a trainable covariance matrix, see the example
  #   in tfp.distributions.matrix_diag_transform.
  ```
  NFTc       	   	      sŒ   t tƒ ƒ}t |¡n}t ||gtj¡}tj|d|d}tj|d|d| _	t
t| ƒj|tjj| j	dddd||||d || _W dQ R X dS )a¹  Construct Wishart distributions.

    Args:
      df: `float` or `double` `Tensor`. Degrees of freedom, must be greater than
        or equal to dimension of the scale matrix.
      scale_tril: `float` or `double` `Tensor`. The Cholesky factorization
        of the symmetric positive definite scale matrix of the distribution.
      input_output_cholesky: Python `bool`. If `True`, functions whose input or
        output have the semantics of samples assume inputs are in Cholesky form
        and return outputs in Cholesky form. In particular, if this flag is
        `True`, input to `log_prob` is presumed of Cholesky form and output from
        `sample`, `mean`, and `mode` are of Cholesky form.  Setting this
        argument to `True` is purely a computational optimization and does not
        change the underlying distribution; for instance, `mean` returns the
        Cholesky of the mean, not the mean of Cholesky factors. The `variance`
        and `stddev` methods are unaffected by this flag.
        Default value: `False` (i.e., input/output does not have Cholesky
        semantics).
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value '`NaN`' to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    r   )r   r   Ú
scale_trilT)ZtrilZis_non_singularZis_positive_definiterŠ   )r   r(   r)   r   r   r   N)r   r   r   r   r   r    r!   r   r#   Ú_scale_trilr%   r   r&   rN   ZLinearOperatorLowerTriangularÚ_parameters)	r'   r   r™   r)   r   r   r   r   r   )r*   r+   r,   r&     s$    #

zWishartTriL.__init__c             C   s   t dddS )Nr   rY   )r   r™   )r   )Úclsr+   r+   r,   Ú_params_event_ndimsJ  s    zWishartTriL._params_event_ndimsc             C   s   | j S )z/Cholesky decomposition of Wishart scale matrix.)rš   )r'   r+   r+   r,   r™   N  s    zWishartTriL.scale_trilc                sx   t t| ƒ |¡}| js"|rt‚g S |t | j¡krtt 	| j¡}| 
tjtj | j¡ddtj|d |d ddg¡ |S )Nz'`scale_tril` must be positive definite.)r‡   rC   rZ   z`scale_tril` must be square.)r%   r   r’   r   ÚAssertionErrorr   r   rš   r   r8   Úextendr   Zassert_positiver   rN   rd   Zassert_equal)r'   r   r   r8   )r*   r+   r,   r’   S  s     z+WishartTriL._parameter_control_dependencies)NFFTr   )r“   r”   r•   r–   r&   Úclassmethodr   r—   r™   r’   r˜   r+   r+   )r*   r,   r   ã  s   -    2)%r–   Ú
__future__r   r   r   rb   Únumpyrq   Z;tensorflow_probability.python.internal.backend.numpy.compatr   r   Z.tensorflow_probability.python.bijectors._numpyr   r   r   r…   r   r„   r	   rƒ   r
   r‚   Z2tensorflow_probability.python.distributions._numpyr   Z-tensorflow_probability.python.internal._numpyr   r   r   Z&tensorflow_probability.python.internalr   r   r   r   Ú__all__ÚDistributionr   r   r+   r+   r+   r,   Ú<module>   s4      7