B
    `7^                 @   s$  d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlZddlZddl	Z
ddlmZ ddlm  mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z% ddl&m'Z' ddl&m(Z( ddl&m)Z) ddl*m+Z+ dddddddddddd d!d"d#gZ,dDd$dZ-dEd&d Z.dFd'dZ/ej0d(d) Z1dGd,dZ2dHd-dZ3d.d/ Z4dId1dZ5dJd2dZ6dKd3dZ7d4d" Z8d5d! Z9dLd6d#Z:ddddej;d7d8fd9dZ<ddddej;d:d;fd<dZ=ddddej;d=d>fd?dZ>d@ej;dfdAdZ?dBdC Z@dS )Mz1Utilitity functions for building neural networks.    )absolute_import)division)print_functionN)zip)Chain)Shift)Softplus)Categorical)Independent)JointDistributionSequential)MixtureSameFamily)Normal)Sample)initializers)
dtype_util)prefer_static)tensorshape_util)TransformedVariabledisplay_imgsexpand_dimsflatten_rightmosthalflife_decaymake_fit_opmake_kernel_bias#make_kernel_bias_posterior_mvn_diag%make_kernel_bias_prior_spike_and_slab	negloglik	tfcompiletracetune_datasetvariables_loadvariables_savevariables_summaryc          
   C   sP  ddl m} t stddd }|| } |dk	r<||}t| d| jdd  } | j\}}}}}	tt| ddd	d
dg|| || |	g} |	  t
d	d	|td|f|dd}
y|jf |
\}}W n. tk
r   |
d |jf |
\}}Y nX |j|  ddd |d |dk	r0|t| |  |  |  ||fS )zDisplay images as a grid.r   Nz,`display_imgs` can only be executed eagerly.c                s   t t d fdd S )Nnumpyc                  s    S )N r$   )zr$   g/home/dcms/DCMS/lib/python3.7/site-packages/tensorflow_probability/python/experimental/nn/util/utils.py<lambda>H       z3display_imgs.<locals>._preprocess.<locals>.<lambda>)nparraygetattr)r%   r$   )r%   r&   _preprocessG   s    z!display_imgs.<locals>._preprocess)            T)nrowsncolsZfigsizenumclearr6   noneZgray)interpolationZcmapoff)Zmatplotlib.pyplotZpyplottfexecuting_eagerlyNotImplementedErrorr)   reshapeshapeZ	transposeZioffdictmaxZsubplots	TypeErrorpopZimshowZsqueezeaxis	set_titlestrZtight_layoutshowZion)xtitleZfignumZpltr,   r3   r4   hwcZsubplots_kwargsZfigZaxsr$   r$   r&   r   B   s<    ,


r-   c             C   sr   |dk	r| j |tjjjd} |  } |dk	r6| |} |dk	rH| |} |dk	r^| j|dd} | 	tjjj} | S )aE  Sets generally recommended parameters for a `tf.data.Dataset`.

  Args:
    dataset: `tf.data.Dataset`-like instance to be tuned according to this
      functions arguments.
    batch_size: Python `int` representing the number of elements in each
      minibatch.
    shuffle_size: Python `int` representing the number of elements to shuffle
      (at a time).
    preprocess_fn: Python `callable` applied to each item in `dataset`.
    repeat_count: Python `int`, representing the number of times the dataset
      should be repeated. The default behavior (`repeat_count = -1`) is for the
      dataset to be repeated indefinitely. If `repeat_count is None` repeat is
      "off;" note that this is a deviation from `tf.data.Dataset.repeat` which
      interprets `None` as "repeat indefinitely".
      Default value: `-1` (i.e., repeat indefinitely).

  Returns:
    tuned_dataset: `tf.data.Dataset` instance tuned according to this functions
      arguments.

  #### Example

  ```python
  [train_dataset, eval_dataset], datasets_info = tfds.load(
       name='mnist',
       split=['train', 'test'],
       with_info=True,
       as_supervised=True,
       shuffle_files=True)

  def _preprocess(image, label):
    image = tf.cast(image, dtype=tf.int32)
    u = tf.random.uniform(shape=tf.shape(image), maxval=256, dtype=image.dtype)
    image = tf.cast(u < image, dtype=tf.float32)   # Randomly binarize.
    return image, label

  # TODO(b/144500779): Cant use `experimental_compile=True`.
  @tf.function(autograph=False)
  def one_step(iter):
    x, y = next(iter)
    return tf.reduce_mean(x)

  ds = tune_dataset(
      train_dataset,
      batch_size=32,
      shuffle_size=int(datasets_info.splits['train'].num_examples / 7),
      preprocess_fn=_preprocess)
  it = iter(ds)
  [one_step(it)]*3  # Build graph / burn-in.
  %time one_step(it)
  ```

  N)Znum_parallel_callsT)Zdrop_remainder)
mapr:   dataexperimentalZAUTOTUNEcacheshufflerepeatbatchZprefetch)ZdatasetZ
batch_sizeZshuffle_sizeZpreprocess_fnZrepeat_countr$   r$   r&   r   e   s    >

c             C   s   t j|| ||d S )zNegative log-likelihood.)rC   )r:   Zreduce_meanZlog_prob)rG   yZmodel_fnrC   r$   r$   r&   r      s    c               c   s
   dV  dS )z%A context manager which does nothing.Nr$   r$   r$   r$   r&   _dummy_context   s    rT   TFc                sh   |ss|s fddS |p|}o(| |r<t jd|dnt fdd} dkr\|S | S dS )a8  Centralizes TF compilation related options.

  Args:
    func: Python `callable` to wrapped with the specified TF compilation
      directives.
      Default value: `None`.
    tf_function: `bool` representing whether the resulting function should be
      `tf.function` decoreated.
      Default value: `True`.
    xla_best_effort: `bool` representing whether XLA auto-clustering compilation
      should be performed. (This argument is ignored if the function is executed
      eagerly.)
      Default value: `True`.
    xla_compile_all: `bool` representing whether XLA compilation should be
      performed. (This argument overrides both `tf_function` and
      `xla_best_effort`.
      Default value: `False`.

  Returns:
    wrapped_func: A Python `callable` with the specified compilation directives
      embedded.

  ### Example Usage

  ```python
  tfn = tfp.experimental.nn

  # Use style #1.
  @tfn.util.tfcompile(xla_compile_all=True)
  def foo(...):
       ...

  # Use style #2.
  def foo(...):
    ...
  foo = tfn.util.tfcompile(xla_compile_all=True)(foo)
  ```

  c                s    d kr| S  S )Nr$   )fn)funcr$   r&   r'      r(   ztfcompile.<locals>.<lambda>F)Z	autographZexperimental_compilec                s    t   fdd}|S )Nc           	      s<   t  srt jjjddnt }|  | |S Q R X d S )NT)Zcompile_ops)r:   r;   ZxlarN   Z	jit_scoperT   )argskwargsZmaybe_xla_best_effort)fxla_best_effortr$   r&   wrapped   s    z-tfcompile.<locals>.decorator.<locals>.wrapped)	functoolswraps)rY   r[   )maybe_tf_functionrZ   )rY   r&   	decorator   s    ztfcompile.<locals>.decoratorN)r:   functionrT   )rV   tf_functionrZ   Zxla_compile_allr_   r$   )rV   r^   rZ   r&   r      s    +
c                s"   t ||d fdd}|S )a@  One training step.

  Args:
    loss_fn: Python `callable` which returns the pair `loss` (`tf.Tensor`) and
      any other second result such that
      `tf.nest.map_structure(tf.convert_to_tensor, other)` will succeed.
    optimizer: `tf.optimizers.Optimizer`-like instance which has members
      `gradient` and `apply_gradients`.
    trainable_variables: `tf.nest.flatten`-able structure of `tf.Variable`
      instances.
    grad_summary_fn: Python `callable` which takes a `trainable_variables`-like
      structure of `tf.Tensor`s representing the gradient of the result of
      `loss_fn` with respect to `trainable_variables`. For example,
      `lambda grads: tf.nest.map_structure(
         lambda x: 0. if x is None else tf.norm(x), grads)`.
      Default value: `None` (i.e., no summarization is made).
    tf_function: `bool` representing whether the resulting function should be
      `tf.function` decoreated.
      Default value: `True`.
    xla_compile: `bool` representing whether XLA compilation should be
      performed. (This argument is ignored if the function is executed eagerly.)
      Default value: `True`.

  Returns:
    fit_op: A Python `callable` taking args which are forwarded to `loss_fn` and
      such that when called `trainable_variables` are updated per the logic of
      `optimizer.apply_gradients`.
  )ra   rZ   c        
   	      s   t jdd$}t j|j | |\}}W dQ R X t j||t j}ytj	j
}W n tk
rz   tj
}Y nX t|rx4t|D ]\}}}	t|||	 qW nt|  dk	r|| |fS ||fS )z>Performs one gradient descent update to `trainable_variables`.F)Zwatch_accessed_variablesN)r:   ZGradientTapenestZmap_structureZwatchZpack_sequence_asZgradientflattencollectionsabcSequenceAttributeError
isinstancer   _apply_gradients)
rW   rX   ZtapeZlossotherZgradsZseq_typeoptgv)grad_summary_fnloss_fn	optimizertrainable_variablesr$   r&   fit_op+  s"    
zmake_fit_op.<locals>.fit_op)r   )ro   rp   rq   rn   ra   Zxla_compilerr   r$   )rn   ro   rp   rq   r&   r     s    c             C   s:   t dd ttj|tj|D }|r6| | d S )Nc             s   s"   | ]\}}|d k	r||fV  qd S )Nr$   ).0Zg_Zv_r$   r$   r&   	<genexpr>F  s    z#_apply_gradients.<locals>.<genexpr>)tupler   r:   rb   rc   Zapply_gradients)rk   rl   rm   Zgvsr$   r$   r&   ri   E  s    ri   r1   c                s    fdd}|S )zFlatten rightmost dims.c                s   t |   }t jt | d| ddggdd}t| |}| jjdk	r| j|d }| rjt	|nd}|
| jd| | |S )z&Implementation of `flatten_rightmost`.Nr   r0   r-   )paddingsconstant_values)r   rankpadr>   r:   r=   ndimsZis_fully_definedr)   prodZ	set_shapeZconcatenate)rG   Zleftmost_ndimsZ	new_shaperS   d)rz   r$   r&   flatten_rightmost_O  s    z-flatten_rightmost.<locals>.flatten_rightmost_r$   )rz   r}   r$   )rz   r&   r   M  s    c                s&    dkrdnd    fdd}|S )z6Returns a function which prints info related to input.N zname:{:10}  c                sZ   t | dr<t | dr<td tt| jt| j	 ntd |  t
j  | S )zPrints something.dtyper>   z&--- TRACE:  {}shape:{:16}  dtype:{:10}z--- TRACE:  {}value:{})hasattrprintformatrE   r   as_listr>   r   namer   sysstdoutflush)rG   )r   r$   r&   trace_b  s    
ztrace.<locals>.trace_)r   )r   r   r$   )r   r&   r   _  s    c                s    fdd}|S )z=Like `tf.expand_dims` but accepts a vector of axes to expand.c       
   	      s   t p
d t j| dd} t j t jdd}t| }t|}|dk }tt||j	}t
||| |}t|}t||dg\}}tjtj|t||gddd	d
}tjt| || |ggdd}	t|	|}	t | |	S Q R X dS )z Implementation of `expand_dims`.r   rG   )r   rC   )
dtype_hintr   r   r-   )rC   T)Zstabler0   )rv   rw   N)r:   
name_scopeconvert_to_tensorint32r   rx   sizeZ
reduce_sumcastr   wheresortsplitZargsortconcatrangery   r>   Zgatherr=   )
rG   Znew_axisZnxnaZis_neg_axiskZaxis_negZaxis_posidxr>   )rC   r   r$   r&   expand_dims_r  s*    


z!expand_dims.<locals>.expand_dims_r$   )rC   r   r   r$   )rC   r   r&   r   p  s    c             C   s6   t  stdtj| fdd t j|D   dS )z0Saves structure of `tf.Variable`s to `filename`.z$Can only `save` while in eager mode.c             S   s   g | ]}|  qS r$   )r#   )rs   rm   r$   r$   r&   
<listcomp>  s    z"variables_save.<locals>.<listcomp>N)r:   r;   
ValueErrorr)   Zsavez_compressedrb   rc   )filename	variablesr$   r$   r&   r!     s    c          	   C   sn   t | Z}tj|}t|t|krBtd| t|t|tdd t	|t
| D S Q R X dS )z>Assigns values to structure of `tf.Variable`s from `filename`.zDFile "{}" has incorrect number of variables (saw: {}, expected: {}).c             S   s   g | ]\}\}}| |qS r$   )Zassign)rs   rm   _rG   r$   r$   r&   r     s    z"variables_load.<locals>.<listcomp>N)r)   loadr:   rb   rc   lenr   r   groupr   listitems)r   r   rM   Zvars_r$   r$   r&   r      s    c       	      C   s  t dd }g }|dk	r0|dd|dg d}||dd	d
d xdtj| D ]T}t	|j
}|jr||jj  |7  < |||tt|j
t|j|j qXW tdd | D }tdd | D }|d||d dddd | D  d  d|S )z%Returns a list of summarizing `str`s.c               S   s   dS )Nr   r$   r$   r$   r$   r&   r'     r(   z#variables_summary.<locals>.<lambda>N z===z2==================================================z{: >6} {:20} {:5} {:40}ZSIZEZSHAPEZTRAINNAMEc             S   s   g | ]\}}|j | qS r$   )r   )rs   r   rm   r$   r$   r&   r     s    z%variables_summary.<locals>.<listcomp>c             S   s   g | ]}|qS r$   r$   )rs   rm   r$   r$   r&   r     s    z(trainable size: {}  /  {:.3f} MiB  /  {}i   {z, c             S   s   g | ]\}}d  |j|qS )z{}: {})r   r   )rs   r   rm   r$   r$   r&   r     s   }
)rd   defaultdictappendjoinr   r:   rb   rc   r   num_elementsr>   Z	trainabler   Z
base_dtyperE   r   r   sumr   values)	r   r   Ztrainable_sizelinesfmtrm   r   bytes_Zcntr$   r$   r&   r"     s4    $kernelZbiasc	       	      C   sR   |dkrt  }|dkr"tj }tjt|| |||dtjt|||||dfS )a  Creates kernel and bias as `tf.Variable`s.

  Args:
    kernel_shape: ...
    bias_shape: ...
    kernel_initializer: ...
      Default value: `None` (i.e., `tf.initializers.glorot_uniform()`).
    bias_initializer: ...
      Default value: `None` (i.e., `tf.initializers.zeros()`).
    kernel_batch_ndims: ...
      Default value: `0`.
    bias_batch_ndims: ...
      Default value: `0`.
    dtype: ...
      Default value: `tf.float32`.
    kernel_name: ...
      Default value: `"kernel"`.
    bias_name: ...
      Default value: `"bias"`.

  Returns:
    kernel: ...
    bias: ...

  #### Recomendations:

  ```python
  #   tf.nn.relu    ==> tf.initializers.he_*
  #   tf.nn.elu     ==> tf.initializers.he_*
  #   tf.nn.selu    ==> tf.initializers.lecun_*
  #   tf.nn.tanh    ==> tf.initializers.glorot_*
  #   tf.nn.sigmoid ==> tf.initializers.glorot_*
  #   tf.nn.softmax ==> tf.initializers.glorot_*
  #   None          ==> tf.initializers.glorot_*
  # https://towardsdatascience.com/hyper-parameters-in-action-part-ii-weight-initializers-35aee1a28404
  # https://stats.stackexchange.com/a/393012/1835

  def make_uniform(size):
    s = tf.math.rsqrt(size / 3.)
    return tfd.Uniform(low=-s, high=s)

  def make_normal(size):
    # Constant is: `scipy.stats.truncnorm.var(loc=0., scale=1., a=-2., b=2.)`.
    s = tf.math.rsqrt(size) / 0.87962566103423978
    return tfd.TruncatedNormal(loc=0, scale=s, low=-2., high=2.)

  # He.  https://arxiv.org/abs/1502.01852
  he_uniform = make_uniform(fan_in / 2.)
  he_normal  = make_normal (fan_in / 2.)

  # Glorot (aka Xavier). http://proceedings.mlr.press/v9/glorot10a.html
  glorot_uniform = make_uniform((fan_in + fan_out) / 2.)
  glorot_normal  = make_normal ((fan_in + fan_out) / 2.)
  ```

  N)r   )nn_init_libglorot_uniformr:   r   zerosVariable_try_call_init_fn)	kernel_shape
bias_shapekernel_initializerbias_initializerkernel_batch_ndimsbias_batch_ndimsr   kernel_name	bias_namer$   r$   r&   r     s    D
Zprior_kernelZ
prior_biasc	       
      C   sL   t tddgdtdtjddg|ddd}	tt|	| |d	t|	||d	gS )
a_  Create prior for Variational layers with kernel and bias.

  Note: Distribution scale is inversely related to regularization strength.
  Consider a "Normal" prior; bigger scale corresponds to less L2 regularization.
  I.e.,
  ```python
  scale    = (2. * l2weight)**-0.5
  l2weight = scale**-2. / 2.
  ```
  have a similar regularizing effect.

  The std. deviation of each of the component distributions returned by this
  function is approximately `1415` (or approximately `l2weight = 25e-6`). In
  other words this prior is extremely "weak".

  Args:
    kernel_shape: ...
    bias_shape: ...
    kernel_initializer: Ignored.
      Default value: `None` (i.e., `tf.initializers.glorot_uniform()`).
    bias_initializer: Ignored.
      Default value: `None` (i.e., `tf.initializers.zeros()`).
    kernel_batch_ndims: ...
      Default value: `0`.
    bias_batch_ndims: ...
      Default value: `0`.
    dtype: ...
      Default value: `tf.float32`.
    kernel_name: ...
      Default value: `"prior_kernel"`.
    bias_name: ...
      Default value: `"prior_bias"`.

  Returns:
    kernel_and_bias_distribution: ...
  g      ?)Zprobsg        g      ?g     @@)r   )locscale)Zmixture_distributionZcomponents_distribution)r   )r   r	   r   r:   constantr   r   )
r   r   r   r   r   r   r   r   r   rJ   r$   r$   r&   r     s    .Zposterior_kernelZposterior_biasc	          	      s   |dkrt  }|dkr"tj } fdd}	 fdd}
ttt|	|| |||
| |dt	| |dtt|	|||||
||dt	||dgS )a  Create learnable posterior for Variational layers with kernel and bias.

  Args:
    kernel_shape: ...
    bias_shape: ...
    kernel_initializer: ...
      Default value: `None` (i.e., `tf.initializers.glorot_uniform()`).
    bias_initializer: ...
      Default value: `None` (i.e., `tf.initializers.zeros()`).
    kernel_batch_ndims: ...
      Default value: `0`.
    bias_batch_ndims: ...
      Default value: `0`.
    dtype: ...
      Default value: `tf.float32`.
    kernel_name: ...
      Default value: `"posterior_kernel"`.
    bias_name: ...
      Default value: `"posterior_bias"`.

  Returns:
    kernel_and_bias_distribution: ...
  Nc                s   t jt| | ||d dS )NZ_loc)r   )r:   r   r   )Zinit_fnr>   Zbatch_ndimsr   )r   r$   r&   r'   y  s   z5make_kernel_bias_posterior_mvn_diag.<locals>.<lambda>c                s4   t tj| tjd ddttdt g|d dS )NgMbP?)r   )valuegh㈵>Z_scale)r   )r   r:   fillr   r   r   r   )r>   r   )r   r$   r&   r'   ~  s   )r   r   )Zreinterpreted_batch_ndimsr   )
r   r   r:   r   r   r   r
   r   r   r   )r   r   r   r   r   r   r   r   r   Zmake_locZ
make_scaler$   )r   r&   r   T  s.    !
g        c          	   C   s   t |p
dp tj|||gt jd}t j||dd}t j||dd}t j||dd}t j| |dd} ||| d| |    S Q R X d	S )
zEInterpolates `initial` to `final` using halflife (exponential) decay.r   )r   initial)r   r   final	half_life	time_stepg      ?N)r:   r   r   Zcommon_dtypefloat32r   r   )r   r   r   r   r   r   r$   r$   r&   r     s    
c             G   s.   y| | S  t k
r(   | |dd  S X dS )z;Try to call function with first num_args else num_args - 1.Nr-   )rA   )rU   rW   r$   r$   r&   r     s    r   )NN)NNNr-   )r-   )NTTF)NTT)r1   )N)N)N)A__doc__
__future__r   r   r   rd   
contextlibr\   r   r#   r)   Z	six.movesr   Ztensorflow.compat.v2compatZv2r:   Z-tensorflow_probability.python.bijectors.chainr   Z-tensorflow_probability.python.bijectors.shiftr   Z0tensorflow_probability.python.bijectors.softplusr   Z7tensorflow_probability.python.distributions.categoricalr	   Z7tensorflow_probability.python.distributions.independentr
   ZItensorflow_probability.python.distributions.joint_distribution_sequentialr   Z?tensorflow_probability.python.distributions.mixture_same_familyr   Z2tensorflow_probability.python.distributions.normalr   Z2tensorflow_probability.python.distributions.sampler   Z-tensorflow_probability.python.experimental.nnr   r   Z&tensorflow_probability.python.internalr   r   r   Z2tensorflow_probability.python.util.deferred_tensorr   __all__r   r   r   contextmanagerrT   r   r   ri   r   r   r   r!   r    r"   r   r   r   r   r   r   r$   r$   r$   r&   <module>   s   
$   
M
   
J
7



(P39