contrib/LaneNet/loss.py

# coding: utf8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import paddle.fluid as fluid
import numpy as np
from utils.config import cfg


def unsorted_segment_sum(data, segment_ids, unique_labels, feature_dims):
    unique_labels_shape = fluid.layers.shape(unique_labels)
    zeros = fluid.layers.fill_constant(
        shape=[unique_labels_shape[0], feature_dims], dtype='float32', value=0)
    segment_ids = fluid.layers.unsqueeze(segment_ids, axes=[1])
    segment_ids.stop_gradient = True
    segment_sum = fluid.layers.scatter_nd_add(zeros, segment_ids, data)
    zeros.stop_gradient = True

    return segment_sum


def norm(x, axis=-1):
    distance = fluid.layers.reduce_sum(
        fluid.layers.abs(x), dim=axis, keep_dim=True)
    return distance


def discriminative_loss_single(prediction, correct_label, feature_dim,
                               label_shape, delta_v, delta_d, param_var,
                               param_dist, param_reg):

    correct_label = fluid.layers.reshape(correct_label,
                                         [label_shape[1] * label_shape[0]])
    prediction = fluid.layers.transpose(prediction, [1, 2, 0])
    reshaped_pred = fluid.layers.reshape(
        prediction, [label_shape[1] * label_shape[0], feature_dim])

    unique_labels, unique_id, counts = fluid.layers.unique_with_counts(
        correct_label)
    correct_label.stop_gradient = True
    counts = fluid.layers.cast(counts, 'float32')
    num_instances = fluid.layers.shape(unique_labels)

    segmented_sum = unsorted_segment_sum(
        reshaped_pred, unique_id, unique_labels, feature_dims=feature_dim)

    counts_rsp = fluid.layers.reshape(counts, (-1, 1))
    mu = fluid.layers.elementwise_div(segmented_sum, counts_rsp)
    counts_rsp.stop_gradient = True
    mu_expand = fluid.layers.gather(mu, unique_id)
    tmp = fluid.layers.elementwise_sub(mu_expand, reshaped_pred)

    distance = norm(tmp)
    distance = distance - delta_v

    distance_pos = fluid.layers.greater_equal(distance,
                                              fluid.layers.zeros_like(distance))
    distance_pos = fluid.layers.cast(distance_pos, 'float32')
    distance = distance * distance_pos

    distance = fluid.layers.square(distance)

    l_var = unsorted_segment_sum(
        distance, unique_id, unique_labels, feature_dims=1)
    l_var = fluid.layers.elementwise_div(l_var, counts_rsp)
    l_var = fluid.layers.reduce_sum(l_var)
    l_var = l_var / fluid.layers.cast(num_instances * (num_instances - 1),
                                      'float32')

    mu_interleaved_rep = fluid.layers.expand(mu, [num_instances, 1])
    mu_band_rep = fluid.layers.expand(mu, [1, num_instances])
    mu_band_rep = fluid.layers.reshape(
        mu_band_rep, (num_instances * num_instances, feature_dim))

    mu_diff = fluid.layers.elementwise_sub(mu_band_rep, mu_interleaved_rep)

    intermediate_tensor = fluid.layers.reduce_sum(
        fluid.layers.abs(mu_diff), dim=1)
    intermediate_tensor.stop_gradient = True
    zero_vector = fluid.layers.zeros([1], 'float32')
    bool_mask = fluid.layers.not_equal(intermediate_tensor, zero_vector)
    temp = fluid.layers.where(bool_mask)
    mu_diff_bool = fluid.layers.gather(mu_diff, temp)

    mu_norm = norm(mu_diff_bool)
    mu_norm = 2. * delta_d - mu_norm
    mu_norm_pos = fluid.layers.greater_equal(mu_norm,
                                             fluid.layers.zeros_like(mu_norm))
    mu_norm_pos = fluid.layers.cast(mu_norm_pos, 'float32')
    mu_norm = mu_norm * mu_norm_pos
    mu_norm_pos.stop_gradient = True

    mu_norm = fluid.layers.square(mu_norm)

    l_dist = fluid.layers.reduce_mean(mu_norm)

    l_reg = fluid.layers.reduce_mean(norm(mu, axis=1))

    l_var = param_var * l_var
    l_dist = param_dist * l_dist
    l_reg = param_reg * l_reg
    loss = l_var + l_dist + l_reg
    return loss, l_var, l_dist, l_reg


def discriminative_loss(prediction, correct_label, feature_dim, image_shape,
                        delta_v, delta_d, param_var, param_dist, param_reg):
    batch_size = int(cfg.BATCH_SIZE_PER_DEV)
    output_ta_loss = 0.
    output_ta_var = 0.
    output_ta_dist = 0.
    output_ta_reg = 0.
    for i in range(batch_size):
        disc_loss_single, l_var_single, l_dist_single, l_reg_single = discriminative_loss_single(
            prediction[i], correct_label[i], feature_dim, image_shape, delta_v,
            delta_d, param_var, param_dist, param_reg)
        output_ta_loss += disc_loss_single
        output_ta_var += l_var_single
        output_ta_dist += l_dist_single
        output_ta_reg += l_reg_single

    disc_loss = output_ta_loss / batch_size
    l_var = output_ta_var / batch_size
    l_dist = output_ta_dist / batch_size
    l_reg = output_ta_reg / batch_size
    return disc_loss, l_var, l_dist, l_reg