B
    )`0              	   @   s  d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ dd	l
mZ dd
l
mZ ddl
mZ ddl
mZ ddl
mZ ddl
mZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ edgdG dd deZdd ZdGddZdd Zedgdej d d!e	j!j"ej#fd"d#Z$ed$gdej d d!e	j!j"ej#fd%d&Z%ed'gdej ed!d(d)d!d d!e	j!j"ej#d!fd*d+Z&ed,gdej d d!e	j!j"ej#fd-d.Z'ed/gdej d d d!e	j!j"ej#fd0d1Z(ed2gdej d d3d!e	j!j"ej#fd4d5Z)ed6gdej d d!e	j!j"fd7d8Z*ed9gdej d d!e	j!j"ej#fd:d;Z+ed<gdej d dd!e	j!j"ej#fd=d>Z,ed?gdej d dd!e	j!j"ej#fd@dAZ-dHdBdCZ.edDgdej d d!e	j!j"ej#fdEdFZ/d!S )Iz=Implementation of Loss operations for use in neural networks.    )absolute_import)division)print_function)context)dtypes)ops)	array_ops)confusion_matrix)control_flow_ops)math_ops)nn)nn_ops)weights_broadcast_ops)util)dispatch)deprecated_args)deprecated_argument_lookup)	tf_exportzlosses.Reduction)Zv1c               @   s@   e Zd ZdZdZdZdZdZdZeZ	e
dd Ze
d	d
 ZdS )	Reductiona  Types of loss reduction.

  Contains the following values:

  * `NONE`: Un-reduced weighted losses with the same shape as input.
  * `SUM`: Scalar sum of weighted losses.
  * `MEAN`: Scalar `SUM` divided by sum of weights. DEPRECATED.
  * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses.
  * `SUM_OVER_NONZERO_WEIGHTS`: Scalar `SUM` divided by number of non-zero
     weights. DEPRECATED.
  * `SUM_BY_NONZERO_WEIGHTS`: Same as `SUM_OVER_NONZERO_WEIGHTS`. DEPRECATED.
  noneZweighted_sumZweighted_sum_over_batch_sizeZweighted_meanZweighted_sum_by_nonzero_weightsc             C   s   | j | j| j| j| j| jfS )N)NONESUMMEANSUM_OVER_BATCH_SIZESUM_OVER_NONZERO_WEIGHTSSUM_BY_NONZERO_WEIGHTS)cls r   W/home/dcms/DCMS/lib/python3.7/site-packages/tensorflow/python/ops/losses/losses_impl.pyall<   s    zReduction.allc             C   s   ||   krtd| d S )NzInvalid Reduction Key %s.)r   
ValueError)r   keyr   r   r   validateF   s    zReduction.validateN)__name__
__module____qualname____doc__r   r   r   r   r   r   classmethodr   r"   r   r   r   r   r   &   s   
r   c             C   s   t | }t j||ddS )a,  Computes a safe mean of the losses.

  Args:
    losses: `Tensor` whose elements contain individual loss measurements.
    num_present: The number of measurable elements in `losses`.

  Returns:
    A scalar representing the mean of `losses`. If `num_present` is zero,
      then zero is returned.
  value)name)r   
reduce_sum
div_no_nan)lossesnum_presentZ
total_lossr   r   r   
_safe_meanL   s    
r.   Fc          	   C   s   t |tr|dks2t r:| dkr:t|ds:t| S t	dd| |fr}tj
|tjd}tt|dt|t|}t|| }|rtj|tdt|d|dS tj||d	S Q R X dS )
a  Computes the number of elements in the loss function induced by `weights`.

  A given weights tensor induces different numbers of usable elements in the
  `losses` tensor. The `weights` tensor is broadcast across `losses` for all
  possible dimensions. For example, if `losses` is a tensor of dimension
  `[4, 5, 6, 3]` and `weights` is a tensor of shape `[4, 5]`, then `weights` is,
  in effect, tiled to match the shape of `losses`. Following this effective
  tile, the total number of present elements is the number of non-zero weights.

  Args:
    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
    weights: `Tensor` of shape `[]`, `[batch_size]` or
      `[batch_size, d1, ... dK]`, where K < N.
    per_batch: Whether to return the number of elements per batch or as a sum
      total.

  Returns:
    The number of present (non-zero) elements in the losses tensor. If
      `per_batch` is `True`, the value is returned as a tensor of size
      `[batch_size]`. Otherwise, a single scalar tensor is returned.
  g        r   Nr-   )dtype   T)axiskeepdimsr)   )r)   )
isinstancefloatr   Zexecuting_eagerlyZ_rankr   equal_num_elementsr   
name_scopecastr   float32r   where
zeros_like	ones_liker   Zbroadcast_weightsr*   rangerank)r,   weights	per_batchscopeZpresentr   r   r   _num_present[   s$    
rB   c          	   C   s8   t jdd| gd}tjtj| |d| jdS Q R X dS )z3Computes the number of elements in `losses` tensor.NZnum_elements)values)r)   )r/   )r   r7   r   r8   r   sizer/   )r,   rA   r   r   r   r6      s    r6   zlosses.compute_weighted_lossg      ?Nc          
   C   s*  t | t|d| |f |t _tt|| f t	| } | j
}tj| tjd} tj|tjd}t| |}|t jkr|}nnt|}|t jkrt|tt| | }n>|t jks|t jkrt|t| |}n|t jkrt|t| }t||}t|| |S Q R X W dQ R X dS )a  Computes the weighted loss.

  Args:
    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `losses`, and must be broadcastable to `losses` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: the scope for the operations performed in computing the loss.
    loss_collection: the loss will be added to these collections.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
    `NONE`, this has the same shape as `losses`; otherwise, it is scalar.

  Raises:
    ValueError: If `weights` is `None` or the shape is not compatible with
      `losses`, or if the number of dimensions (rank) of either `losses` or
      `weights` is missing.

  Note:
    When calculating the gradient of a weighted loss contributions from
    both `losses` and `weights` are considered. If your `weights` depend
    on some model parameters but you do not want this to affect the loss
    gradient, you need to apply `tf.stop_gradient` to `weights` before
    passing them to `compute_weighted_loss`.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  Zweighted_loss)r/   N)r   r"   r   r7   Zget_default_graphZ_last_loss_reductioncontrol_dependenciesr   assert_broadcastableconvert_to_tensorr/   r   r8   r   r9   multiplyr   r*   r   r.   r   r<   r   r   rB   r   r6   r   add_loss)r,   r?   rA   loss_collection	reductionZinput_dtypeweighted_losseslossr   r   r   compute_weighted_loss   s0    %








rN   zlosses.absolute_differencec          	   C   s   | dkrt d|dkr t dt|d|| |fX}tj|tjd}tj| tjd} | |   t	t
|| }t|||||dS Q R X dS )aE  Adds an Absolute Difference loss to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a `Tensor` of
  shape `[batch_size]`, then the total loss for each sample of the batch is
  rescaled by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which this loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of
      `labels` or if the shape of `weights` is invalid or if `labels`
      or `predictions` is None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  Nzlabels must not be None.zpredictions must not be None.absolute_difference)r/   )rK   )r    r   r7   r   r8   r   r9   	get_shapeassert_is_compatible_withabssubtractrN   )labelspredictionsr?   rA   rJ   rK   r,   r   r   r   rO      s    (rO   zlosses.cosine_distancez#dim is deprecated, use axis insteaddimc       
   	   C   s   t d|d|}|dkrtd| dkr.td|dkr>tdt|d|| |fh}tj|tjd}tj| tjd} | 	|   t
|| }d	tj||fd
d }	t|	||||dS Q R X dS )a  Adds a cosine-distance loss to the training procedure.

  Note that the function assumes that `predictions` and `labels` are already
  unit-normalized.

  Args:
    labels: `Tensor` whose shape matches 'predictions'
    predictions: An arbitrary matrix.
    axis: The dimension along which the cosine distance is computed.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which this loss will be added.
    reduction: Type of reduction to apply to loss.
    dim: The old (deprecated) name for `axis`.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If `predictions` shape doesn't match `labels` shape, or
      `axis`, `labels`, `predictions` or `weights` is `None`.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  r1   rV   NzYou must specify 'axis'.zlabels must not be None.zpredictions must not be None.Zcosine_distance_loss)r/   r0   T)r1   r2   )rK   )r   r    r   r7   r   r8   r   r9   rP   rQ   rH   r*   rN   )
rT   rU   r1   r?   rA   rJ   rK   rV   Zradial_diffsr,   r   r   r   cosine_distance  s     &rW   zlosses.hinge_lossc          
   C   s   | dkrt d|dkr t dt|d|| |fz}tj|tjd}tj| tjd} | |   t	
| }td|  |} tt|t| |}t|||||dS Q R X dS )a^  Adds a hinge loss to the training procedure.

  Args:
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0. Internally
      the {0,1} labels are converted to {-1,1} when calculating the hinge loss.
    logits: The logits, a float tensor. Note that logits are assumed to be
      unbounded and 0-centered. A value > 0 (resp. < 0) is considered a positive
      (resp. negative) binary prediction.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match or
      if `labels` or `logits` is None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  Nzlabels must not be None.zlogits must not be None.
hinge_loss)r/      )rK   )r    r   r7   r   r8   r   r9   rP   rQ   r   r<   rS   r   ZrelurH   rN   )rT   logitsr?   rA   rJ   rK   all_onesr,   r   r   r   rX   ?  s    "
rX   zlosses.huber_lossc          
   C   s   | dkrt d|dkr t dt|d|| |f}tj|tjd}tj| tjd} | |   t	|| }t
|}t||}	t	||	}
tttjd|	jdt|	|	t||
}t|||||dS Q R X dS )a  Adds a [Huber Loss](https://en.wikipedia.org/wiki/Huber_loss) term to the training procedure.

  For each value x in `error=labels-predictions`, the following is calculated:

  ```
    0.5 * x^2                  if |x| <= d
    0.5 * d^2 + d * (|x| - d)  if |x| > d
  ```

  where d is `delta`.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  `[batch_size]`, then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    delta: `float`, the point where the huber loss function changes from a
      quadratic to linear.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.  Also if `labels` or
     `predictions` is None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  Nzlabels must not be None.zpredictions must not be None.
huber_loss)r/   g      ?)rK   )r    r   r7   r   r8   r   r9   rP   rQ   rS   rR   ZminimumaddrH   rG   r/   rN   )rT   rU   r?   deltarA   rJ   rK   errorZ	abs_errorZ	quadraticZlinearr,   r   r   r   r\   r  s(    2
r\   zlosses.log_lossgHz>c          	   C   s   | dkrt d|dkr t dt|d|| |f|}tj|tjd}tj| tjd} | |   t	| t
||  t	d|  t
d| |  }t|||||dS Q R X dS )a~  Adds a Log Loss term to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  `[batch_size]`, then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    epsilon: A small increment to add to avoid taking a log of zero.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
      is None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  Nzlabels must not be None.zpredictions must not be None.log_loss)r/   r0   )rK   )r    r   r7   r   r8   r   r9   rP   rQ   rH   logrN   )rT   rU   r?   epsilonrA   rJ   rK   r,   r   r   r   r`     s    (r`   z"losses.mean_pairwise_squared_errorc             C   s  | dkrt d|dkr t dt|d|| |fR}tj|tjd}tj| tjd} tt	|| f tj|tjd}|
 | 
  t|| }tdt|}tjt||dd}t||dd	}d
tj|t|d ddd }	tj||dd}
d
tjt|
tt||d ddd }t|	| |}t|}tjt|dk|t|dd}t|| |S Q R X W dQ R X dS )a  Adds a pairwise-errors-squared loss to the training procedure.

  Unlike `mean_squared_error`, which is a measure of the differences between
  corresponding elements of `predictions` and `labels`,
  `mean_pairwise_squared_error` is a measure of the differences between pairs of
  corresponding elements of `predictions` and `labels`.

  For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are
  three pairs of differences are summed to compute the loss:
    loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3

  Note that since the inputs are of shape `[batch_size, d0, ... dN]`, the
  corresponding pairs are computed within each batch sample but not across
  samples within a batch. For example, if `predictions` represents a batch of
  16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
  is drawn from each image, but not across images.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  `[batch_size]`, then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector.

  Args:
    labels: The ground truth output tensor, whose shape must match the shape of
      `predictions`.
    predictions: The predicted outputs, a tensor of size
      `[batch_size, d0, .. dN]` where N+1 is the total number of dimensions in
      `predictions`.
    weights: Coefficients for the loss a scalar, a tensor of shape
      `[batch_size]` or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
      is None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  Nzlabels must not be None.zpredictions must not be None.mean_pairwise_squared_error)r/   r0   T)r1   r2   )r@   g       @r   r(   )r)   )r    r   r7   r   r8   r   r9   rE   r   rF   rP   rQ   rS   r=   r   r>   r*   ZsquarerB   r+   maximumrH   r:   r;   r   rI   )rT   rU   r?   rA   rJ   Zdiffsr1   Zsum_squares_diff_per_batchZnum_present_per_batchZterm1Zsum_diffZterm2rL   rM   Z	mean_lossr   r   r   rc     sJ    3


rc   zlosses.mean_squared_errorc          	   C   s   | dkrt d|dkr t dt|d|| |fR}tj|tjd}tj| tjd} | |   t	|| }t
|||||dS Q R X dS )a?  Adds a Sum-of-Squares loss to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  `[batch_size]`, then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
      is None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  Nzlabels must not be None.zpredictions must not be None.mean_squared_error)r/   )rK   )r    r   r7   r   r8   r   r9   rP   rQ   Zsquared_differencerN   )rT   rU   r?   rA   rJ   rK   r,   r   r   r   re   Z  s    (re   zlosses.sigmoid_cross_entropyc          	   C   s   | dkrt d|dkr t dt|d|| |fj}t|}t| |j} | |   |dkr|| d|  d|  } t	j
| |dd	}t|||||d
S Q R X dS )a  Creates a cross-entropy loss using tf.nn.sigmoid_cross_entropy_with_logits.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape `[batch_size]`, then the loss weights apply to each
  corresponding sample.

  If `label_smoothing` is nonzero, smooth the labels towards 1/2:

      new_multiclass_labels = multiclass_labels * (1 - label_smoothing)
                              + 0.5 * label_smoothing

  Args:
    multi_class_labels: `[batch_size, num_classes]` target integer labels in
      `{0, 1}`.
    logits: Float `[batch_size, num_classes]` logits outputs of the network.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    label_smoothing: If greater than `0` then smooth the labels.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
    `NONE`, this has the same shape as `logits`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `logits` doesn't match that of
      `multi_class_labels` or if the shape of `weights` is invalid, or if
      `weights` is None.  Also if `multi_class_labels` or `logits` is None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  Nz$multi_class_labels must not be None.zlogits must not be None.Zsigmoid_cross_entropy_lossr   r0   g      ?xentropy)rT   rZ   r)   )rK   )r    r   r7   rG   r   r8   r/   rP   rQ   r   Z!sigmoid_cross_entropy_with_logitsrN   )Zmulti_class_labelsrZ   r?   label_smoothingrA   rJ   rK   r,   r   r   r   sigmoid_cross_entropy  s"    ,


rh   zlosses.softmax_cross_entropyc          	   C   s   | dkrt d|dkr t dt|d|| |f}t|}t| |j} | |   |dkrtt	
| d |j}d| }|| }	| | |	 } t	j| dd	} tj| |d
d}
t|
||||dS Q R X dS )a  Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits_v2.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape `[batch_size]`, then the loss weights apply to each
  corresponding sample.

  If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
      new_onehot_labels = onehot_labels * (1 - label_smoothing)
                          + label_smoothing / num_classes

  Note that `onehot_labels` and `logits` must have the same shape,
  e.g. `[batch_size, num_classes]`. The shape of `weights` must be
  broadcastable to loss, whose shape is decided by the shape of `logits`.
  In case the shape of `logits` is `[batch_size, num_classes]`, loss is
  a `Tensor` of shape `[batch_size]`.

  Args:
    onehot_labels: One-hot-encoded labels.
    logits: Logits outputs of the network.
    weights: Optional `Tensor` that is broadcastable to loss.
    label_smoothing: If greater than 0 then smooth the labels.
    scope: the scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
    `NONE`, this has shape `[batch_size]`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
      or if the shape of `weights` is invalid or if `weights` is None.  Also if
      `onehot_labels` or `logits` is None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  Nzonehot_labels must not be None.zlogits must not be None.Zsoftmax_cross_entropy_lossr   g      ?Zlabels_stop_gradient)r)   rf   )rT   rZ   r)   )rK   )r    r   r7   rG   r   r8   r/   rP   rQ   r   shapeZstop_gradientr   Z$softmax_cross_entropy_with_logits_v2rN   )Zonehot_labelsrZ   r?   rg   rA   rJ   rK   Znum_classesZsmooth_positivesZsmooth_negativesr,   r   r   r   softmax_cross_entropy  s*    .

rk   c                s   t j| ||d\} } dk	rt  |  j}  }|j}|dk	rv|dk	rv|| }|dkrlt dg | | fS t t|  }|dks|dkr|j	d 
drttd| fdd fdd | | fS )	aX  Internal version of _remove_squeezable_dimensions which handles weights.

  Squeezes `predictions` and `labels` if their ranks differ from expected by
  exactly 1.
  Squeezes `weights` if its rank is 1 more than the new rank of `predictions`

  This will use static shape if available. Otherwise, it will add graph
  operations, which could result in a performance hit.

  Args:
    labels: Label values, a `Tensor` whose dimensions match `predictions`.
    predictions: Predicted values, a `Tensor` of arbitrary dimensions.
    weights: Optional weight `Tensor`. It will be squeezed if it's not scalar,
      and its rank is 1 more than the new rank of `labels`.
    expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.

  Returns:
    Tuple of `predictions`, `labels` and `weights`, possibly with the last
    dimension squeezed.
  )expected_rank_diffNr0   ri   r   c                  s   t  dgS )Nri   )r   squeezer   )r?   r   r   <lambda>F      z/_remove_squeezable_dimensions.<locals>.<lambda>c                  s    S )Nr   r   )r?   r   r   rn   G  ro   )r	   Zremove_squeezable_dimensionsr   rG   rP   Zndimsr   rm   r>   ZdimsZis_compatible_withr
   Zcondr   r5   )rT   rU   r?   rl   Zlabels_rankZweights_shapeZweights_rankZ	rank_diffr   )r?   r   _remove_squeezable_dimensions  s(    




rp   z#losses.sparse_softmax_cross_entropyc          	   C   sx   | dkrt d|dkr t dt|d|| |f:}t| ||dd\} }}tj| |dd}t|||||d	S Q R X dS )
aw  Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape `[batch_size]`, then the loss weights apply to each
  corresponding sample.

  Args:
    labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of
      `labels` and result) and dtype `int32` or `int64`. Each entry in `labels`
      must be an index in `[0, num_classes)`. Other values will raise an
      exception when this op is run on CPU, and return `NaN` for corresponding
      loss and gradient rows on GPU.
    logits: Unscaled log probabilities of shape
      `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32` or
      `float64`.
    weights: Coefficients for the loss. This must be scalar or broadcastable to
      `labels` (i.e. same rank and each dimension is either 1 or the same).
    scope: the scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
    `NONE`, this has the same shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shapes of `logits`, `labels`, and `weights` are
      incompatible, or if any of them are None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  Nzlabels must not be None.zlogits must not be None.Z!sparse_softmax_cross_entropy_lossr0   )rl   rf   )rT   rZ   r)   )rK   )r    r   r7   rp   r   Z(sparse_softmax_cross_entropy_with_logitsrN   )rT   rZ   r?   rA   rJ   rK   r,   r   r   r   sparse_softmax_cross_entropyL  s    )rq   )F)Nr   )0r&   
__future__r   r   r   Ztensorflow.python.eagerr   Ztensorflow.python.frameworkr   r   Ztensorflow.python.opsr   r	   r
   r   r   r   r   Ztensorflow.python.ops.lossesr   Ztensorflow.python.utilr   Z"tensorflow.python.util.deprecationr   r   Z tensorflow.python.util.tf_exportr   objectr   r.   rB   r6   Zadd_dispatch_supportZ	GraphKeysZLOSSESr   rN   rO   rW   rX   r\   r`   rc   re   rh   rk   rp   rq   r   r   r   r   <module>   s   %
*

A
1

2
/
I
6
]
1
<
E
1
