B
    `                 @   s  d Z ddlmZ ddlmZ ddlmZ ddlZddlm  m	Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ dddgZeZejZejZG dd dejZG dd dejZG dd dej Z!dd Z"dd Z#dd Z$dd Z%dd Z&dddZ'dS )z0Convolution layers for building neural networks.    )absolute_import)division)print_functionN)distribution)layers)util)variational_base)
dtype_util)prefer_staticConvolutionConvolutionVariationalFlipout(ConvolutionVariationalReparameterizationc                   s<   e Zd ZdZddddddejejdddf fdd	Z  Z	S )	r   a3  Convolution layer.

  This layer creates a Convolution kernel that is convolved (actually
  cross-correlated) with the layer input to produce a tensor of outputs.

  This layer has two learnable parameters, `kernel` and `bias`.
  - The `kernel` (aka `filters` argument of `tf.nn.convolution`) is a
    `tf.Variable` with `rank + 2` `ndims` and shape given by
    `concat([filter_shape, [input_size, output_size]], axis=0)`. Argument
    `filter_shape` is either a  length-`rank` vector or expanded as one, i.e.,
    `filter_size * tf.ones(rank)` when `filter_shape` is an `int` (which we
    denote as `filter_size`).
  - The `bias` is a `tf.Variable` with `1` `ndims` and shape `[output_size]`.

  In summary, the shape of learnable parameters is governed by the following
  arguments: `filter_shape`, `input_size`, `output_size` and possibly `rank` (if
  `filter_shape` needs expansion).

  For more information on convolution layers, we recommend the following:
  - [Deconvolution Checkerboard][https://distill.pub/2016/deconv-checkerboard/]
  - [Convolution Animations][https://github.com/vdumoulin/conv_arithmetic]
  - [What are Deconvolutional Layers?][
    https://datascience.stackexchange.com/questions/6107/what-are-deconvolutional-layers]

  #### Examples

  ```python
  import tensorflow as tf
  import tensorflow_probability as tfp
  tfb = tfp.bijectors
  tfd = tfp.distributions
  tfn = tfp.experimental.nn

  Convolution1D = functools.partial(tfn.Convolution, rank=1)
  Convolution2D = tfn.Convolution
  Convolution3D = functools.partial(tfn.Convolution, rank=3)
  ```

        VALIDN c                s   t |dd}|dkr&tjg tjdntj|dgd}t|}t|dkrp|||f }|g}t	 }n>tj
||||ggdd}tj
||ggdd} fd	d
}|
||||	|||\}}|
| _tt| j||||||d dS )al
  Constructs layer.

    Note: `data_format` is not supported since all nn layers operate on
    the rightmost column. If your channel dimension is not rightmost, use
    `tf.transpose` before calling this layer. For example, if your channel
    dimension is second from the left, the following code will move it
    rightmost:

    ```python
    inputs = tf.transpose(inputs, tf.concat([
        [0], tf.range(2, tf.rank(inputs)), [1]], axis=0))
    ```

    Args:
      input_size: ...
        In Keras, this argument is inferred from the rightmost input shape,
        i.e., `tf.shape(inputs)[-1]`. This argument specifies the size of the
        second from the rightmost dimension of both `inputs` and `kernel`.
        Default value: `None`.
      output_size: ...
        In Keras, this argument is called `filters`. This argument specifies the
        rightmost dimension size of both `kernel` and `bias`.
      filter_shape: ...
        In Keras, this argument is called `kernel_size`. This argument specifies
        the leftmost `rank` dimensions' sizes of `kernel`.
      rank: An integer, the rank of the convolution, e.g. "2" for 2D
        convolution. This argument implies the number of `kernel` dimensions,
        i.e.`, `kernel.shape.rank == rank + 2`.
        In Keras, this argument has the same name and semantics.
        Default value: `2`.
      strides: An integer or tuple/list of n integers, specifying the stride
        length of the convolution.
        In Keras, this argument has the same name and semantics.
        Default value: `1`.
      padding: One of `"VALID"` or `"SAME"` (case-insensitive).
        In Keras, this argument has the same name and semantics (except we don't
        support `"CAUSAL"`).
        Default value: `'VALID'`.
      dilations: An integer or tuple/list of `rank` integers, specifying the
        dilation rate to use for dilated convolution. Currently, specifying any
        `dilations` value != 1 is incompatible with specifying any `strides`
        value != 1.
        In Keras, this argument is called `dilation_rate`.
        Default value: `1`.
      init_kernel_fn: ...
        Default value: `None` (i.e.,
        `tfp.experimental.nn.initializers.glorot_uniform()`).
      init_bias_fn: ...
        Default value: `None` (i.e., `tf.initializers.zeros()`).
      make_kernel_bias_fn: ...
        Default value: `tfp.experimental.nn.util.make_kernel_bias`.
      dtype: ...
        Default value: `tf.float32`.
      batch_shape: ...
        Default value: `()`.
      activation_fn: ...
        Default value: `None`.
      name: ...
        Default value: `None` (i.e., `'Convolution'`).
    filter_shape)arg_nameN)dtype)shaper   )axisc          	      s   t | |d dS )NNHWBC)rankstridespaddingdata_format	dilations)convolution_batch)xk)r   r   r   r   r   q/home/dcms/DCMS/lib/python3.7/site-packages/tensorflow_probability/python/experimental/nn/convolutional_layers.py<lambda>   s   z&Convolution.__init__.<locals>.<lambda>)kernelbiasapply_kernel_fnr   activation_fnname)prepare_tuple_argumentnparrayint32r
   reshapesizetfget_static_value_make_convolution_fnconcatZ_make_kernel_bias_fnsuperr   __init__)self
input_sizeoutput_sizer   r   r   r   r   init_kernel_fninit_bias_fnZmake_kernel_bias_fnr   batch_shaper&   r'   batch_ndimskernel_shapeZ
bias_shaper%   r#   r$   )	__class__)r   r   r   r   r!   r3   V   s8    P


zConvolution.__init__)
__name__
__module____qualname____doc__nn_util_libZmake_kernel_biasr.   float32r3   __classcell__r   r   )r<   r!   r   -   s   'c                   sL   e Zd ZdZddddddejejejj	e
ejdedddf fdd	Z  ZS )r   a9  Convolution layer class with reparameterization estimator.

  This layer implements the Bayesian variational inference analogue to
  a Convolution layer by assuming the `kernel` and/or the `bias` are
  drawn from distributions. By default, the layer implements a stochastic
  forward pass via sampling from the kernel and bias posteriors,

  ```none
  kernel, bias ~ posterior
  outputs = matmul(inputs, kernel) + bias
  ```

  It uses the reparameterization estimator [(Kingma and Welling, 2014)][1],
  which performs a Monte Carlo approximation of the distribution integrating
  over the `kernel` and `bias`.

  The arguments permit separate specification of the surrogate posterior
  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
  distributions.

  Upon being built, this layer adds losses (accessible via the `losses`
  property) representing the divergences of `kernel` and/or `bias` surrogate
  posteriors and their respective priors. When doing minibatch stochastic
  optimization, make sure to scale this loss such that it is applied just once
  per epoch (e.g. if `kl` is the sum of `losses` for each element of the batch,
  you should pass `kl / num_examples_per_epoch` to your optimizer).

  You can access the `kernel` and/or `bias` posterior and prior distributions
  after the layer is built via the `kernel_posterior`, `kernel_prior`,
  `bias_posterior` and `bias_prior` properties.

  #### Examples

  We illustrate a Bayesian neural network with [variational inference](
  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
  assuming a dataset of images and length-10 one-hot `targets`.

  ```python
  import functools
  import tensorflow.compat.v2 as tf
  import tensorflow_probability as tfp
  import tensorflow_datasets as tfds
  tfb = tfp.bijectors
  tfd = tfp.distributions
  tfn = tfp.experimental.nn

  # 1  Prepare Dataset

  [train_dataset, eval_dataset], datasets_info = tfds.load(
      name='mnist',
      split=['train', 'test'],
      with_info=True,
      as_supervised=True,
      shuffle_files=True)
  def _preprocess(image, label):
    # image = image < tf.random.uniform(tf.shape(image))   # Randomly binarize.
    image = tf.cast(image, tf.float32) / 255.  # Scale to unit interval.
    lo = 0.001
    image = (1. - 2. * lo) * image + lo  # Rescale to *open* unit interval.
    return image, label
  batch_size = 32
  train_size = datasets_info.splits['train'].num_examples
  train_dataset = tfn.util.tune_dataset(
      train_dataset,
      batch_shape=(batch_size,),
      shuffle_size=int(train_size / 7),
      preprocess_fn=_preprocess)
  train_iter = iter(train_dataset)
  eval_iter = iter(eval_dataset)
  x, y = next(train_iter)
  evidence_shape = x.shape[1:]
  targets_shape = y.shape[1:]

  # 2  Specify Model

  n = tf.cast(train_size, tf.float32)

  BayesConv2D = functools.partial(
      tfn.ConvolutionVariationalReparameterization,
      rank=2,
      padding='same',
      filter_shape=5,
      # Use `he_uniform` because we'll use the `relu` family.
      init_kernel_fn=tf.initializers.he_uniform(),
      penalty_weight=1. / n)

  BayesAffine = functools.partial(
      tfn.AffineVariationalReparameterization,
      penalty_weight=1. / n)

  scale = tfp.util.TransformedVariable(1., tfb.Softplus())
  bnn = tfn.Sequential([
      BayesConv2D(evidence_shape[-1], 32, filter_shape=7, strides=2,
                  activation_fn=tf.nn.leaky_relu),           # [b, 14, 14, 32]
      tfn.util.flatten_rightmost(ndims=3),                   # [b, 14 * 14 * 32]
      BayesAffine(14 * 14 * 32, np.prod(target_shape) - 1),  # [b, 9]
      tfn.Lambda(
          eval_fn=lambda loc: tfb.SoftmaxCentered()(
              tfd.Independent(tfd.Normal(loc, scale),
                              reinterpreted_batch_ndims=1)),
          also_track=scale),                                 # [b, 10]
  ], name='bayesian_neural_network')

  print(bnn.summary())

  # 3  Train.

  def loss_fn():
    x, y = next(train_iter)
    nll = -tf.reduce_mean(bnn(x).log_prob(y), axis=-1)
    kl = bnn.extra_loss  # Already normalized via `penalty_weight` arg.
    loss = nll + kl
    return loss, (nll, kl)
  opt = tf.optimizers.Adam()
  fit_op = tfn.util.make_fit_op(loss_fn, opt, bnn.trainable_variables)
  for _ in range(200):
    loss, (nll, kl), g = fit_op()
  ```

  This example uses reparameterization gradients to minimize the
  Kullback-Leibler divergence up to a constant, also known as the negative
  Evidence Lower Bound. It consists of the sum of two terms: the expected
  negative log-likelihood, which we approximate via Monte Carlo; and the KL
  divergence, which is added via regularizer terms which are arguments to the
  layer.

  #### References

  [1]: Diederik Kingma and Max Welling. Auto-Encoding Variational Bayes. In
       _International Conference on Learning Representations_, 2014.
       https://arxiv.org/abs/1312.6114
  r   r   r   Nc                s   t ||dd}|||f }|
| _|| _d}tt| j|
||g||	||||||g||	|||t||||||||||||d dS )an  Constructs layer.

    Note: `data_format` is not supported since all nn layers operate on
    the rightmost column. If your channel dimension is not rightmost, use
    `tf.transpose` before calling this layer. For example, if your channel
    dimension is second from the left, the following code will move it
    rightmost:

    ```python
    inputs = tf.transpose(inputs, tf.concat([
        [0], tf.range(2, tf.rank(inputs)), [1]], axis=0))
    ```

    Args:
      input_size: ...
        In Keras, this argument is inferred from the rightmost input shape,
        i.e., `tf.shape(inputs)[-1]`. This argument specifies the size of the
        second from the rightmost dimension of both `inputs` and `kernel`.
        Default value: `None`.
      output_size: ...
        In Keras, this argument is called `filters`. This argument specifies the
        rightmost dimension size of both `kernel` and `bias`.
      filter_shape: ...
        In Keras, this argument is called `kernel_size`. This argument specifies
        the leftmost `rank` dimensions' sizes of `kernel`.
      rank: An integer, the rank of the convolution, e.g. "2" for 2D
        convolution. This argument implies the number of `kernel` dimensions,
        i.e.`, `kernel.shape.rank == rank + 2`.
        In Keras, this argument has the same name and semantics.
        Default value: `2`.
      strides: An integer or tuple/list of n integers, specifying the stride
        length of the convolution.
        In Keras, this argument has the same name and semantics.
        Default value: `1`.
      padding: One of `"VALID"` or `"SAME"` (case-insensitive).
        In Keras, this argument has the same name and semantics (except we don't
        support `"CAUSAL"`).
        Default value: `'VALID'`.
      dilations: An integer or tuple/list of `rank` integers, specifying the
        dilation rate to use for dilated convolution. Currently, specifying any
        `dilations` value != 1 is incompatible with specifying any `strides`
        value != 1.
        In Keras, this argument is called `dilation_rate`.
        Default value: `1`.
      init_kernel_fn: ...
        Default value: `None` (i.e.,
        `tfp.experimental.nn.initializers.glorot_uniform()`).
      init_bias_fn: ...
        Default value: `None` (i.e., `tf.initializers.zeros()`).
      make_posterior_fn: ...
        Default value:
          `tfp.experimental.nn.util.make_kernel_bias_posterior_mvn_diag`.
      make_prior_fn: ...
        Default value:
          `tfp.experimental.nn.util.make_kernel_bias_prior_spike_and_slab`.
      posterior_value_fn: ...
        Default valye: `tfd.Distribution.sample`
      unpack_weights_fn:
        Default value: `unpack_kernel_and_bias`
      dtype: ...
        Default value: `tf.float32`.
      penalty_weight: ...
        Default value: `None` (i.e., weight is `1`).
      posterior_penalty_fn: ...
        Default value: `kl_divergence_monte_carlo`.
      activation_fn: ...
        Default value: `None`.
      seed: ...
        Default value: `None` (i.e., no seed).
      name: ...
        Default value: `None` (i.e.,
        `'ConvolutionVariationalReparameterization'`).
    r   )r   r   )	posteriorpriorr%   posterior_value_fnunpack_weights_fnr   penalty_weightposterior_penalty_fnr&   seedr'   N)r(   _make_posterior_fn_make_prior_fnr2   r   r3   r0   )r4   r5   r6   r   r   r   r   r   r7   r8   make_posterior_fnmake_prior_fnrF   rG   r   rH   rI   r&   rJ   r'   r;   r:   )r<   r   r!   r3   U  s6    c

z1ConvolutionVariationalReparameterization.__init__)r=   r>   r?   r@   rA   #make_kernel_bias_posterior_mvn_diag%make_kernel_bias_prior_spike_and_slabtfdDistributionsampleunpack_kernel_and_biasr.   rB   kl_divergence_monte_carlor3   rC   r   r   )r<   r!   r      s$    c                   sL   e Zd ZdZddddddejejejj	e
ejdedddf fdd	Z  ZS )r   a;
  Convolution layer class with Flipout estimator.

  This layer implements the Bayesian variational inference analogue to
  a Convolution layer by assuming the `kernel` and/or the `bias` are
  drawn from distributions. By default, the layer implements a stochastic
  forward pass via sampling from the kernel and bias posteriors,

  ```none
  kernel, bias ~ posterior
  outputs = tf.nn.convolution(inputs, kernel) + bias
  ```

  It uses the Flipout estimator [(Wen et al., 2018)][1], which performs a Monte
  Carlo approximation of the distribution integrating over the `kernel` and
  `bias`. Flipout uses roughly twice as many floating point operations as the
  reparameterization estimator but has the advantage of significantly lower
  variance.

  The arguments permit separate specification of the surrogate posterior
  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
  distributions.

  Upon being built, this layer adds losses (accessible via the `losses`
  property) representing the divergences of `kernel` and/or `bias` surrogate
  posteriors and their respective priors. When doing minibatch stochastic
  optimization, make sure to scale this loss such that it is applied just once
  per epoch (e.g. if `kl` is the sum of `losses` for each element of the batch,
  you should pass `kl / num_examples_per_epoch` to your optimizer).

  ```python
  inputs = tf.transpose(inputs, tf.concat([
      [0], tf.range(2, tf.rank(inputs)), [1]], axis=0))
  ```

  #### Examples

  We illustrate a Bayesian neural network with [variational inference](
  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
  assuming a dataset of images and length-10 one-hot `targets`.

  ```python
  # Using the following substitution, see:
  tfn = tfp.experimental.nn
  help(tfn.ConvolutionVariationalReparameterization)
  BayesConv2D = functools.partial(
      tfn.ConvolutionVariationalFlipout,
      penalty_weight=1. / n)
  ```

  This example uses reparameterization gradients to minimize the
  Kullback-Leibler divergence up to a constant, also known as the negative
  Evidence Lower Bound. It consists of the sum of two terms: the expected
  negative log-likelihood, which we approximate via Monte Carlo; and the KL
  divergence, which is added via regularizer terms which are arguments to the
  layer.

  #### References

  [1]: Yeming Wen, Paul Vicol, Jimmy Ba, Dustin Tran, and Roger Grosse. Flipout:
       Efficient Pseudo-Independent Weight Perturbations on Mini-Batches. In
       _International Conference on Learning Representations_, 2018.
       https://arxiv.org/abs/1803.04386
  r   r   r   Nc                s   t ||dd}|||f }|
| _|| _d}tt| j|
||g||	||||||g||	|||t||||||||||||d dS )ac  Constructs layer.

    Note: `data_format` is not supported since all nn layers operate on
    the rightmost column. If your channel dimension is not rightmost, use
    `tf.transpose` before calling this layer. For example, if your channel
    dimension is second from the left, the following code will move it
    rightmost:

    ```python
    inputs = tf.transpose(inputs, tf.concat([
        [0], tf.range(2, tf.rank(inputs)), [1]], axis=0))
    ```

    Args:
      input_size: ...
        In Keras, this argument is inferred from the rightmost input shape,
        i.e., `tf.shape(inputs)[-1]`. This argument specifies the size of the
        second from the rightmost dimension of both `inputs` and `kernel`.
        Default value: `None`.
      output_size: ...
        In Keras, this argument is called `filters`. This argument specifies the
        rightmost dimension size of both `kernel` and `bias`.
      filter_shape: ...
        In Keras, this argument is called `kernel_size`. This argument specifies
        the leftmost `rank` dimensions' sizes of `kernel`.
      rank: An integer, the rank of the convolution, e.g. "2" for 2D
        convolution. This argument implies the number of `kernel` dimensions,
        i.e.`, `kernel.shape.rank == rank + 2`.
        In Keras, this argument has the same name and semantics.
        Default value: `2`.
      strides: An integer or tuple/list of n integers, specifying the stride
        length of the convolution.
        In Keras, this argument has the same name and semantics.
        Default value: `1`.
      padding: One of `"VALID"` or `"SAME"` (case-insensitive).
        In Keras, this argument has the same name and semantics (except we don't
        support `"CAUSAL"`).
        Default value: `'VALID'`.
      dilations: An integer or tuple/list of `rank` integers, specifying the
        dilation rate to use for dilated convolution. Currently, specifying any
        `dilations` value != 1 is incompatible with specifying any `strides`
        value != 1.
        In Keras, this argument is called `dilation_rate`.
        Default value: `1`.
      init_kernel_fn: ...
        Default value: `None` (i.e.,
        `tfp.experimental.nn.initializers.glorot_uniform()`).
      init_bias_fn: ...
        Default value: `None` (i.e., `tf.initializers.zeros()`).
      make_posterior_fn: ...
        Default value:
          `tfp.experimental.nn.util.make_kernel_bias_posterior_mvn_diag`.
      make_prior_fn: ...
        Default value:
          `tfp.experimental.nn.util.make_kernel_bias_prior_spike_and_slab`.
      posterior_value_fn: ...
        Default valye: `tfd.Distribution.sample`
      unpack_weights_fn:
        Default value: `unpack_kernel_and_bias`
      dtype: ...
        Default value: `tf.float32`.
      penalty_weight: ...
        Default value: `None` (i.e., weight is `1`).
      posterior_penalty_fn: ...
        Default value: `kl_divergence_monte_carlo`.
      activation_fn: ...
        Default value: `None`.
      seed: ...
        Default value: `None` (i.e., no seed).
      name: ...
        Default value: `None` (i.e.,
        `'ConvolutionVariationalFlipout'`).
    r   )r   r   )rD   rE   r%   rF   rG   r   rH   rI   r&   rJ   r'   N)r(   rK   rL   r2   r   r3   r0   )r4   r5   r6   r   r   r   r   r   r7   r8   rM   rN   rF   rG   r   rH   rI   r&   rJ   r'   r;   r:   )r<   r   r!   r3     s6    c

z&ConvolutionVariationalFlipout.__init__)r=   r>   r?   r@   rA   rO   rP   rQ   rR   rS   rT   r.   rB   rU   r3   rC   r   r   )r<   r!   r     s"   ?c                s8   t \  fddfddS )z#Helper to create tf convolution op.c                sL   t j| |gtjd}tj| |dd} tj||dd}tjj| | dS )N)
dtype_hintr   )r   r'   r#   )r   r   r   r   )r	   common_dtyper.   rB   convert_to_tensornnZconvolution)r   r#   r   )r   r   r   r   r   r!   op  s    z _make_convolution_fn.<locals>.opc                s   t  d | |S )Nr   )batchify_op)r   r#   )rZ   r   r   r!   r"         z&_make_convolution_fn.<locals>.<lambda>)prepare_conv_args)r   r   r   r   r   )r   r   rZ   r   r   r   r!   r0     s    
r0   c       	      G   s   |j j|d kr| |f| S tjt |d|gd\}}tj|ddggdd}t||}| |f| }tj|t |dd gdd}t||}|S )zHReshape `op` input `x` to be a vec of `op_min_input_ndims`-rank tensors.r   r   )num_or_size_splitsr   )ZpaddingsZconstant_valuesN)r   )r   r   r
   splitpadr.   r,   r1   )	rZ   Zop_min_input_ndimsr   Zother_op_argsr9   Zop_shapeZ
flat_shapeyZunflat_shaper   r   r!   r[     s"    r[   c             C   s   yt t| } W n tk
r.   tdY nX dddh}| |krPtd|t|| dd}t|}t|| dd}d	d
dd| }| ||||fS )zSanitizes use provided input.z/Argument `rank` must be statically known `int`.r   r      zArgument `rank` must be in {}.r   )r   r   ZNWCNHWCZNDHWC)r   r   rb   )	intr.   r/   	TypeError
ValueErrorformatr(   _prepare_padding_argumentget)r   r   r   r   Z
valid_rankr   r   r   r!   r]     s    
r]   c          
   C   s   t | tr| f| S yt| } W n" tk
rB   td|Y nX |t| krftd||t| xB| D ]:}yt| W ql ttfk
r   td||Y qlX qlW | S )z*Helper which puts tuples in standard form.z)Argument {} must be convertible to tuple.z4Argument {} has invalid length; expected:{}, saw:{}.z0Argument {} contains non-integer input; saw: {}.)
isinstancerd   tuplere   rf   rg   len)r   nr   Zx_r   r   r!   r(     s"    


r(   c             C   sT   t | dst| S |  }|dkr0td|ddh}||krPtd|||S )z,Helper which processes the padding argument.upper>   CAUSALFULLzArgument `padding` value "{}" currently not supported. If you require this feature, please create an issue on `https://github.com/tensorflow/probability` or email `tfprobability@tensorflow.org`.r   ZSAMEzJArgument `padding` must be convertible to a tuple or one of {}; saw: "{}".)hasattrrk   rn   NotImplementedErrorrg   rf   )r   r   Zvalid_valuesr   r   r!   rh     s    

rh   c          
   C   s0  |dkrt d||dk	r8| dkr8td|t|pBd t||||\}}}}}t||d dd}tj	| |gtj
d	}	tj| |	d
d} tj||	dd}t|}
tj|
d|d gd\}}t|}t|}|| d }tjt||| td|t|| |gdd}tj||d}tj|tj|d| ||d  |d ggddd}t| }tj|d||dgd\}}}}tj| tjt|g|t|t| ggddd} tjj| |||d|d}t|}tj|tj||dd ||dd gddd}tj|dd}|S Q R X dS )zDLike `tf.nn.conv2d` except applies batch of kernels to batch of `x`.r   z6Argument `rank` currently only supports `2`; saw "{}".Nr   zAArgument `data_format` currently only supports "NHWBC"; saw "{}".Zconv2d_nhwbcr   )r   )rV   r   )r   r'   r#   r   )r^   r   )r   )perm)r   r   rc   )r   r   r   r   )rr   rg   rn   rf   r.   Z
name_scoper]   r(   r	   rW   rB   rX   r
   r   r_   Zreduce_prodr   r1   rangeZ	transposer,   rY   Zdepthwise_conv2dZ
reduce_sum)r   r#   r   r   r   r   r   r'   _r   r;   Zkernel_batch_shapeZkernel_event_shapeZkernel_batch_sizeZkernel_ndimsZkernel_batch_ndimsrs   Zx_shapeZx_sample_shapeZx_rank_shapeZx_batch_shapeZx_channel_shapera   Zy_shaper   r   r!   r     sz    










r   )NNN)(r@   
__future__r   r   r   numpyr)   Ztensorflow.compat.v2compatZv2r.   Z+tensorflow_probability.python.distributionsr   Zdistribution_libZ-tensorflow_probability.python.experimental.nnr   Z
layers_libr   rA   r   Zvi_libZ&tensorflow_probability.python.internalr	   r
   __all__rQ   rU   rT   ZKernelBiasLayerr   Z,VariationalReparameterizationKernelBiasLayerr   Z!VariationalFlipoutKernelBiasLayerr   r0   r[   r]   r(   rh   r   r   r   r   r!   <module>   s@    "
   B 