gpt4 book ai didi

python - 如何在 mxnet 中进行加权 softmax 输出自定义操作?

转载 作者:太空宇宙 更新时间:2023-11-03 10:52:31 25 4
gpt4 key购买 nike

我想用加权版本替换 mx.symbol.SoftmaxOutput(根据整个数据集中标签的频率分配不同的权重)

原始函数运行良好,如下所示:

cls_prob = mx.symbol.SoftmaxOutput(data=data,
label=label,
multi_output=True,
normalization='valid',
use_ignore=True,
ignore_label=-1,
name='cls_prob')

我目前写的代码如下。代码可以无错运行,但是loss很快爆炸到nan。我正在处理检测问题,当我将我的代码用作 CustomOp 时,RCNNL1 损失很快变成 nan。另一件事是我必须忽略标签 -1 并且我不确定如何正确地执行它。任何帮助将不胜感激。

import mxnet as mx
import numpy as np

class WeightedSoftmaxCrossEntropyLoss(mx.operator.CustomOp):
def __init__(self, num_class):
self.num_class = int(num_class)

def forward(self, is_train, req, in_data, out_data, aux):

data = in_data[0]
label = in_data[1]
pred = mx.nd.SoftmaxOutput(data, label, multi_output=True,
normalization='valid', use_ignore=True, ignore_label=-1,
name='rcnn_cls_prob')

self.assign(out_data[0], req[0], pred)

def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
cls_weight = np.array([
0.002852781814876101,
0.30715984513157385,
1.0932468996115976,
1.1598757152765971,
0.20739109264009636,
1.1984256112776808,
0.18746186040248036,
2.9009928470737023,
0.92140970338602113,
1.200317380251021
])
label = in_data[1]
pred = out_data[0]
label = label.asnumpy().astype('int32').reshape((-1))
pred = pred.asnumpy().reshape((pred.shape[0], pred.shape[1], -1)).transpose((0, 2, 1))
pred = pred.reshape((label.shape[0], -1))

# Need to ignore label (how)
out_inds = np.where(label == -1)[0]
#label = label[keep_inds]
one_hot = np.zeros((label.shape[0], self.num_class))
one_hot[np.arange(label.shape[0]), label] = 1
# gradient
dx = pred - one_hot
#dx[out_inds] = 0.0
weighted_dx = cls_weight * dx / 4
self.assign(in_grad[0], req[0], weighted_dx)

@mx.operator.register("weighted_softmax_ce_loss")
class WeightedSoftmaxCrossEntropyLossProp(mx.operator.CustomOpProp):
def __init__(self, num_class):
super(WeightedSoftmaxCrossEntropyLossProp, self).__init__(need_top_grad=False)
self.num_class = num_class

def list_arguments(self):
return ['data', 'label']

def list_outputs(self):
return ['output']

def infer_shape(self, in_shapes):
data_shape = in_shapes[0]
label_shape = (in_shapes[0][0],)
output_shape = in_shapes[0]
return [data_shape, label_shape], [output_shape], []

def create_operator(self, ctx, in_shapes, in_dtypes):
# create and return the CustomOp class.
`enter code here`return WeightedSoftmaxCrossEntropyLoss(self.num_class)

最佳答案

我不确定在这里使用 customop 是否最好,因为它可能很慢。因为 SoftmaxOuput 在向后传递中计算梯度,所以不方便按你想的那样乘以损失。但是,使用符号 API 并不太复杂。我附上了一个玩具示例,希望对您有所帮助。

import mxnet as mx
import numpy as np
import logging

# learn floor function from random numbers in [-1, -1 + num_classes]
n = 10000
batch_size = 128
num_classes = 10
x = (np.random.random((n,)) * num_classes) - 1
y = np.floor(x)
print(x[:2])
print(y[:2])

# define graph
data = mx.symbol.Variable('data')
label = mx.symbol.Variable('label')
class_weights = mx.symbol.Variable('class_weights')
fc = mx.sym.FullyConnected(data=data, num_hidden=num_classes)
fc = mx.sym.Activation(data=fc, act_type='relu')
proba = mx.sym.FullyConnected(data=fc, num_hidden=num_classes)
proba = mx.sym.softmax(proba)

# multipy cross entropy loss by weight
cross_entropy = -mx.sym.pick(proba, label) * mx.sym.pick(class_weights, label)

# mask the loss to zero when label is -1
mask = mx.sym.broadcast_not_equal(label, mx.sym.ones_like(label) * -1)
cross_entropy = cross_entropy * mask

# fit module
class_weights = np.array([np.arange(1, 1 + num_classes)]*n)
data_iter = mx.io.NDArrayIter(data={'data': x, 'class_weights': class_weights}, label={'label': y}, batch_size=batch_size)
mod = mx.mod.Module(
mx.sym.Group([mx.sym.MakeLoss(cross_entropy, name='ce_loss'), mx.sym.BlockGrad(proba)]),
data_names=[v.name for v in data_iter.provide_data],
label_names=[v.name for v in data_iter.provide_label]
)
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
mod.bind(data_shapes=data_iter.provide_data, label_shapes=data_iter.provide_label)
mod.init_params()
mod.fit(
data_iter,
num_epoch=200,
optimizer=mx.optimizer.Adam(learning_rate=0.01, rescale_grad=1.0/batch_size),
batch_end_callback=mx.callback.Speedometer(batch_size, 200),
eval_metric=mx.metric.Loss(name="loss", output_names=["ce_loss_output"]))

# show result, -1 are not predicted correctly as we did not compute their loss
probas = mod.predict(data_iter)[1].asnumpy()
print(zip(x, np.argmax(probas, axis=1)))

关于python - 如何在 mxnet 中进行加权 softmax 输出自定义操作?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/47427500/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com