gpt4 book ai didi

python - mxnet 模型并行性的简单示例

转载 作者:太空宇宙 更新时间:2023-11-03 14:34:53 27 4
gpt4 key购买 nike

Guon mxnet 教程中的简单示例对于我们这些刚刚开始使用 mxnet 的人来说非常有帮助。到目前为止,还没有一个简单的模型并行示例。我看到 LSTM 的模型并行性示例代码,但我是 mxnet 的新手,它会帮助我(也许还有其他人)拥有一个更精简的示例。因此,我通过解决胶子教程中的回归示例并混合来自 mxnet.gluon.Trainer 的一些代码,创建了一个模型并行性示例。 .

但是,我显然弄错了。渐变似乎没有更新。任何人都可以帮助识别问题吗?这里的目标是创建一个具有三层的线性回归模型,每层都保存​​在不同的 GPU 上。模型本身没有什么用处,除非作为一个示例来展示在使用自定义 block 和命令式编程时如何进行模型并行性的初始化和训练。

据我了解,Trainer() 是为数据并行性而编写的。它不适用于模型并行性,因为它需要在所有 GPU 上初始化所有参数。

import os
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon
from mxnet.gluon import Block

# make some data
num_inputs = 2
num_outputs = 1
num_examples = 10000

def real_fn(X):
return 2 * X[:, 0] - 3.4 * X[:, 1] + 4.2

X = np.random.normal(0,1, (num_examples, num_inputs))
noise = 0.001 * np.random.normal(0,1, (num_examples))
y = real_fn(X) + noise
y = y.reshape(-1,1)

# configuration
hidden_layers = 2
num_gpus = hidden_layers + 1
ctxList = [mx.gpu(i) for i in range(num_gpus)]
#ctxList = [mx.gpu() for i in range(num_gpus)]

#os.environ["MXNET_ENGINE_TYPE"] = "NaiveEngine"
print("\n")

# ======================================================================
class myDenseBlock(Block):
"""
A custom layer
"""
def __init__(self, layer_number, size_input, size_output, **kwargs):
super(myDenseBlock, self).__init__(**kwargs)

self.layer_number = layer_number
self.size_input = size_input
self.size_output = size_output

with self.name_scope():
# add parameters to the Block's ParameterDict.
self.w = self.params.get(
'weight',
init= mx.init.Xavier(magnitude=2.24),
shape=(size_input, size_output),
grad_req = 'write')

self.b = self.params.get(
'bias',
init= mx.init.Constant(0.5),
shape=(size_output,),
grad_req = 'write')

def forward(self, x):
x = x.as_in_context(ctxList[self.layer_number])
with x.context:
linear = nd.dot(x, self.w.data()) + self.b.data()
return linear

# ======================================================================

# create net
net = gluon.nn.Sequential()
with net.name_scope():
# initial layer, with X as input
net.add(myDenseBlock(0,
size_input = 2,
size_output = 2))

for ii in range(hidden_layers-1):
net.add(myDenseBlock(ii+1,
size_input = 2,
size_output = 2))

# final block, Y is nx1
net.add(myDenseBlock(ii+2,
size_input = 2,
size_output = 1))


# ititialize paramerters for different blocks (layers) on different gpus.
params = net.collect_params()

"""
The parameters are:
sequential0_mydenseblock0_weight
sequential0_mydenseblock0_bias
sequential0_mydenseblock1_weight
sequential0_mydenseblock1_bias
sequential0_mydenseblock2_weight
sequential0_mydenseblock2_bias
"""

print("\ninitializing:")
for i, param in enumerate(params):
if 'mydenseblock0' in param:
params[param].initialize(ctx=ctxList[0])
elif 'mydenseblock1' in param:
params[param].initialize(ctx=ctxList[1])
elif 'mydenseblock2' in param:
params[param].initialize(ctx=ctxList[2])
print(" ", i, param, " ", params[param].list_data()[0].context)
print("\n")

def square_loss(yhat, y):
return nd.mean((yhat - y) ** 2)

def mytrainer(updaters, params, ignore_stale_grad=False):
#print("\n")
for i, param in enumerate(params):
#print(i, param, " ", len(params[param].list_data()), params[param].list_data()[0].context)
if params[param].grad_req == 'null':
continue
if not ignore_stale_grad:
for data in params[param].list_data():
if not data._fresh_grad:
print(
"`%s` on context %s has not been updated"%(params[param].name, str(data.context)))
assert False

for upd, arr, grad in zip(updaters, params[param].list_data(), params[param].list_grad()):

if not ignore_stale_grad or arr._fresh_grad:
upd(i, grad, arr)
arr._fresh_grad = False
#print ("grad= ", grad)


batch_size = 100
epochs = 100000
iteration = -1

opt = mx.optimizer.create('adam', learning_rate=0.001, rescale_grad = 1 / batch_size)
updaters = [mx.optimizer.get_updater(opt)]

# the following definition for updaters does not work either
#updaters = [mx.optimizer.get_updater(opt) for _ in ctxList]

results = []
for e in range(epochs):
train_groups = np.array_split(np.arange(X.shape[0]), X.shape[0]/batch_size)
for ii, idx in enumerate(train_groups):
iteration += 1
xtrain, ytrain = X[idx,:], y[idx]

xtrain = nd.array(xtrain)
xtrain = xtrain.as_in_context(ctxList[0])

ytrain = nd.array(ytrain).reshape((-1, 1))
ytrain = ytrain.as_in_context(ctxList[0])

with autograd.record():
yhat = net(xtrain)
error = square_loss(yhat, ytrain.as_in_context(ctxList[-1]))


# Question: does the call to error.backward() go under the indent
# for autograd.record() or outside the indent? The gluon examples have
# it both ways

error.backward()

mytrainer(updaters, net.collect_params())

if iteration%10 == 0:

results.append([iteration, error.asnumpy().item()])
print(("epoch= {:5,d}, iter= {:6,d}, error= {:6.3E}").format(
e, iteration, error.asnumpy().item()))

代码在 mytrainer() 中的“if not data._fresh_grad”测试中失败。输出为:

initializing:
0 sequential0_mydenseblock0_weight gpu(0)
1 sequential0_mydenseblock0_bias gpu(0)
2 sequential0_mydenseblock1_weight gpu(1)
3 sequential0_mydenseblock1_bias gpu(1)
4 sequential0_mydenseblock2_weight gpu(2)
5 sequential0_mydenseblock2_bias gpu(2)

`sequential0_mydenseblock0_weight` on context gpu(0) has not been updated

我可以使用mx.autograd.get_symbol(error).tojson()验证计算图仅扩展到gpu(2)上的参数,并且不会到达其他gpu。

最佳答案

是的,根据 @sergei 的评论,迁移到 v1.0.0 可以解决这个问题。

关于python - mxnet 模型并行性的简单示例,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/47029809/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com