gpt4 book ai didi

python - 神经网络反向传播算法在 Python 中不起作用

转载 作者:太空狗 更新时间:2023-10-29 20:58:24 25 4
gpt4 key购买 nike

我正在按照示例 here 用 Python 编写一个神经网络.似乎反向传播算法不起作用,因为神经网络在训练 10,000 次后未能产生正确的值(在误差范围内)。具体来说,我训练它计算以下示例中的正弦函数:

import numpy as np

class Neuralnet:
def __init__(self, neurons):
self.weights = []
self.inputs = []
self.outputs = []
self.errors = []
self.rate = .1
for layer in range(len(neurons)):
self.inputs.append(np.empty(neurons[layer]))
self.outputs.append(np.empty(neurons[layer]))
self.errors.append(np.empty(neurons[layer]))
for layer in range(len(neurons)-1):
self.weights.append(
np.random.normal(
scale=1/np.sqrt(neurons[layer]),
size=[neurons[layer], neurons[layer + 1]]
)
)

def feedforward(self, inputs):
self.inputs[0] = inputs
for layer in range(len(self.weights)):
self.outputs[layer] = np.tanh(self.inputs[layer])
self.inputs[layer + 1] = np.dot(self.weights[layer].T, self.outputs[layer])
self.outputs[-1] = np.tanh(self.inputs[-1])

def backpropagate(self, targets):
gradient = 1 - self.outputs[-1] * self.outputs[-1]
self.errors[-1] = gradient * (self.outputs[-1] - targets)
for layer in reversed(range(len(self.errors) - 1)):
gradient = 1 - self.outputs[layer] * self.outputs[layer]
self.errors[layer] = gradient * np.dot(self.weights[layer], self.errors[layer + 1])
for layer in range(len(self.weights)):
self.weights[layer] -= self.rate * np.outer(self.outputs[layer], self.errors[layer + 1])

def xor_example():
net = Neuralnet([2, 2, 1])
for step in range(100000):
net.feedforward([0, 0])
net.backpropagate([-1])
net.feedforward([0, 1])
net.backpropagate([1])
net.feedforward([1, 0])
net.backpropagate([1])
net.feedforward([1, 1])
net.backpropagate([-1])
net.feedforward([1, 1])
print(net.outputs[-1])

def identity_example():
net = Neuralnet([1, 3, 1])
for step in range(100000):
x = np.random.normal()
net.feedforward([x])
net.backpropagate([np.tanh(x)])
net.feedforward([-2])
print(net.outputs[-1])

def sine_example():
net = Neuralnet([1, 6, 1])
for step in range(100000):
x = np.random.normal()
net.feedforward([x])
net.backpropagate([np.tanh(np.sin(x))])
net.feedforward([3])
print(net.outputs[-1])

sine_example()

输出未能接近 tanh(sin(3)) = 0.140190616。我怀疑是一个涉及错误索引或对齐的错误,但 Numpy 不会引发任何此类错误。关于我哪里出错的任何提示?

编辑:我忘了添加偏差神经元。这是更新后的代码:

import numpy as np

class Neuralnet:
def __init__(self, neurons):
self.weights = []
self.outputs = []
self.inputs = []
self.errors = []
self.offsets = []
self.rate = .01
for layer in range(len(neurons)-1):
self.weights.append(
np.random.normal(
scale=1/np.sqrt(neurons[layer]),
size=[neurons[layer], neurons[layer + 1]]
)
)
self.outputs.append(np.empty(neurons[layer]))
self.inputs.append(np.empty(neurons[layer]))
self.errors.append(np.empty(neurons[layer]))
self.offsets.append(np.random.normal(scale=1/np.sqrt(neurons[layer]), size=neurons[layer + 1]))
self.inputs.append(np.empty(neurons[-1]))
self.errors.append(np.empty(neurons[-1]))

def feedforward(self, inputs):
self.inputs[0] = inputs
for layer in range(len(self.weights)):
self.outputs[layer] = np.tanh(self.inputs[layer])
self.inputs[layer + 1] = self.offsets[layer] + np.dot(self.weights[layer].T, self.outputs[layer])

def backpropagate(self, targets):
self.errors[-1] = self.inputs[-1] - targets
for layer in reversed(range(len(self.errors) - 1)):
gradient = 1 - self.outputs[layer] * self.outputs[layer]
self.errors[layer] = gradient * np.dot(self.weights[layer], self.errors[layer + 1])
for layer in range(len(self.weights)):
self.weights[layer] -= self.rate * np.outer(self.outputs[layer], self.errors[layer + 1])
self.offsets[layer] -= self.rate * self.errors[layer + 1]

def sine_example():
net = Neuralnet([1, 5, 1])
for step in range(10000):
x = np.random.uniform(-5, 5)
net.feedforward([x])
net.backpropagate([np.sin(x)])
net.feedforward([np.pi])
print(net.inputs[-1])

def xor_example():
net = Neuralnet([2, 2, 1])
for step in range(10000):
net.feedforward([0, 0])
net.backpropagate([-1])
net.feedforward([0, 1])
net.backpropagate([1])
net.feedforward([1, 0])
net.backpropagate([1])
net.feedforward([1, 1])
net.backpropagate([-1])
net.feedforward([1, 1])
print(net.outputs[-1])

def identity_example():
net = Neuralnet([1, 3, 1])
for step in range(10000):
x = np.random.normal()
net.feedforward([x])
net.backpropagate([x])
net.feedforward([-2])
print(net.outputs[-1])

identity_example()

最佳答案

我认为您以错误的方式训练神经网络。您有一个超过 10000 次迭代的循环,并在每个循环中提供一个新样本。在这种情况下,NN 永远不会接受训练。

(说法有误!请看更新!)

你需要做的是生成大量真实样本Y = sin(X)ONCE给你的网络> 并向前和向后迭代训练集,以最小化成本函数。要检查算法,您可能需要根据迭代次数绘制成本函数并确保成本下降。

另一个重点是权重的初始化。您的数字非常大,网络需要很长时间才能收敛,尤其是在使用低速率时。在一些小范围 [-eps .. eps] 中统一生成初始权重是一个很好的做法。

在我的代码中,我实现了两个不同的激活函数:sigmoid()tanh()。您需要根据所选函数缩放输入:分别为 [0 .. 1][-1 .. 1]

下面是一些图像,显示了成本函数以及对 sigmoid()tanh() 激活函数的预测结果:

sigmoid activation

tanh activation

如您所见,sigmoid() 激活比 tanh() 提供了更好的结果。

此外,与具有 4 层的更大网络 [1, 6] 相比,我在使用网络 [1, 6, 1] 时得到了更好的预测, 4, 1]。所以 NN 的大小并不总是关键因素。以下是对上述 4 层网络的预测:

sigmoid for a bigger network

这是我的代码和一些注释。我试着尽可能地使用你的符号。

import numpy as np
import math
import matplotlib.pyplot as plt

class Neuralnet:
def __init__(self, neurons, activation):
self.weights = []
self.inputs = []
self.outputs = []
self.errors = []
self.rate = 0.5
self.activation = activation #sigmoid or tanh

self.neurons = neurons
self.L = len(self.neurons) #number of layers

eps = 0.12; # range for uniform distribution -eps..+eps
for layer in range(len(neurons)-1):
self.weights.append(np.random.uniform(-eps,eps,size=(neurons[layer+1], neurons[layer]+1)))


###################################################################################################
def train(self, X, Y, iter_count):

m = X.shape[0];

for layer in range(self.L):
self.inputs.append(np.empty([m, self.neurons[layer]]))
self.errors.append(np.empty([m, self.neurons[layer]]))

if (layer < self.L -1):
self.outputs.append(np.empty([m, self.neurons[layer]+1]))
else:
self.outputs.append(np.empty([m, self.neurons[layer]]))

#accumulate the cost function
J_history = np.zeros([iter_count, 1])


for i in range(iter_count):

self.feedforward(X)

J = self.cost(Y, self.outputs[self.L-1])
J_history[i, 0] = J

self.backpropagate(Y)


#plot the cost function to check the descent
plt.plot(J_history)
plt.show()


###################################################################################################
def cost(self, Y, H):
J = np.sum(np.sum(np.power((Y - H), 2), axis=0))/(2*m)
return J

###################################################################################################
def feedforward(self, X):

m = X.shape[0];

self.outputs[0] = np.concatenate( (np.ones([m, 1]), X), axis=1)

for i in range(1, self.L):
self.inputs[i] = np.dot( self.outputs[i-1], self.weights[i-1].T )

if (self.activation == 'sigmoid'):
output_temp = self.sigmoid(self.inputs[i])
elif (self.activation == 'tanh'):
output_temp = np.tanh(self.inputs[i])


if (i < self.L - 1):
self.outputs[i] = np.concatenate( (np.ones([m, 1]), output_temp), axis=1)
else:
self.outputs[i] = output_temp

###################################################################################################
def backpropagate(self, Y):

self.errors[self.L-1] = self.outputs[self.L-1] - Y

for i in range(self.L - 2, 0, -1):

if (self.activation == 'sigmoid'):
self.errors[i] = np.dot( self.errors[i+1], self.weights[i][:, 1:] ) * self.sigmoid_prime(self.inputs[i])
elif (self.activation == 'tanh'):
self.errors[i] = np.dot( self.errors[i+1], self.weights[i][:, 1:] ) * (1 - self.outputs[i][:, 1:]*self.outputs[i][:, 1:])

for i in range(0, self.L-1):
grad = np.dot(self.errors[i+1].T, self.outputs[i]) / m
self.weights[i] = self.weights[i] - self.rate*grad

###################################################################################################
def sigmoid(self, z):
s = 1.0/(1.0 + np.exp(-z))
return s

###################################################################################################
def sigmoid_prime(self, z):
s = self.sigmoid(z)*(1 - self.sigmoid(z))
return s

###################################################################################################
def predict(self, X, weights):

m = X.shape[0];

self.inputs = []
self.outputs = []
self.weights = weights

for layer in range(self.L):
self.inputs.append(np.empty([m, self.neurons[layer]]))

if (layer < self.L -1):
self.outputs.append(np.empty([m, self.neurons[layer]+1]))
else:
self.outputs.append(np.empty([m, self.neurons[layer]]))

self.feedforward(X)

return self.outputs[self.L-1]


###################################################################################################
# MAIN PART

activation1 = 'sigmoid' # the input should be scaled into [ 0..1]
activation2 = 'tanh' # the input should be scaled into [-1..1]

activation = activation1

net = Neuralnet([1, 6, 1], activation) # structure of the NN and its activation function


##########################################################################################
# TRAINING

m = 1000 #size of the training set
X = np.linspace(0, 4*math.pi, num = m).reshape(m, 1); # input training set


Y = np.sin(X) # target

kx = 0.1 # noise parameter
noise = (2.0*np.random.uniform(0, kx, m) - kx).reshape(m, 1)
Y = Y + noise # noisy target

# scaling of the target depending on the activation function
if (activation == 'sigmoid'):
Y_scaled = (Y/(1+kx) + 1)/2.0
elif (activation == 'tanh'):
Y_scaled = Y/(1+kx)


# number of the iteration for the training stage
iter_count = 20000
net.train(X, Y_scaled, iter_count) #training

# gained weights
trained_weights = net.weights

##########################################################################################
# PREDICTION

m_new = 40 #size of the prediction set
X_new = np.linspace(0, 4*math.pi, num = m_new).reshape(m_new, 1);

Y_new = net.predict(X_new, trained_weights) # prediction

#rescaling of the result
if (activation == 'sigmoid'):
Y_new = (2.0*Y_new - 1.0) * (1+kx)
elif (activation == 'tanh'):
Y_new = Y_new * (1+kx)

# visualization
plt.plot(X, Y)
plt.plot(X_new, Y_new, 'ro')
plt.show()

raw_input('press any key to exit')

更新

我想收回有关您代码中使用的训练方法的声明。网络确实可以在每次迭代中仅使用一个样本进行训练。我在使用 sigmoid 和 tanh 激活函数的在线训练中得到了有趣的结果:

使用 Sigmoid 的在线训练(成本函数和预测)

Sigmoid

使用 Tanh 进行在线训练(成本函数和预测)

Tanh

可以看出,选择 Sigmoid 作为激活函数可提供更好的性能。成本函数看起来不如离线训练时那么好,但至少它趋于下降。

我在你的实现中绘制了成本函数,它看起来也很不稳定:

enter image description here

用 sigmoid 甚至 ReLU 函数尝试您的代码可能是个好主意。

这是更新后的源代码。要在 onlineoffline 训练模式之间切换,只需更改 method 变量即可。

import numpy as np
import math
import matplotlib.pyplot as plt

class Neuralnet:
def __init__(self, neurons, activation):
self.weights = []
self.inputs = []
self.outputs = []
self.errors = []
self.rate = 0.2
self.activation = activation #sigmoid or tanh

self.neurons = neurons
self.L = len(self.neurons) #number of layers

eps = 0.12; #range for uniform distribution -eps..+eps
for layer in range(len(neurons)-1):
self.weights.append(np.random.uniform(-eps,eps,size=(neurons[layer+1], neurons[layer]+1)))


###################################################################################################
def train(self, X, Y, iter_count):

m = X.shape[0];

for layer in range(self.L):
self.inputs.append(np.empty([m, self.neurons[layer]]))
self.errors.append(np.empty([m, self.neurons[layer]]))

if (layer < self.L -1):
self.outputs.append(np.empty([m, self.neurons[layer]+1]))
else:
self.outputs.append(np.empty([m, self.neurons[layer]]))

#accumulate the cost function
J_history = np.zeros([iter_count, 1])


for i in range(iter_count):

self.feedforward(X)

J = self.cost(Y, self.outputs[self.L-1])
J_history[i, 0] = J

self.backpropagate(Y)


#plot the cost function to check the descent
#plt.plot(J_history)
#plt.show()


###################################################################################################
def cost(self, Y, H):
J = np.sum(np.sum(np.power((Y - H), 2), axis=0))/(2*m)
return J


###################################################################################################
def cost_online(self, min_x, max_x, iter_number):
h_arr = np.zeros([iter_number, 1])
y_arr = np.zeros([iter_number, 1])

for step in range(iter_number):
x = np.random.uniform(min_x, max_x, 1).reshape(1, 1)

self.feedforward(x)
h_arr[step, 0] = self.outputs[-1]
y_arr[step, 0] = np.sin(x)



J = np.sum(np.sum(np.power((y_arr - h_arr), 2), axis=0))/(2*iter_number)
return J

###################################################################################################
def feedforward(self, X):

m = X.shape[0];

self.outputs[0] = np.concatenate( (np.ones([m, 1]), X), axis=1)

for i in range(1, self.L):
self.inputs[i] = np.dot( self.outputs[i-1], self.weights[i-1].T )

if (self.activation == 'sigmoid'):
output_temp = self.sigmoid(self.inputs[i])
elif (self.activation == 'tanh'):
output_temp = np.tanh(self.inputs[i])


if (i < self.L - 1):
self.outputs[i] = np.concatenate( (np.ones([m, 1]), output_temp), axis=1)
else:
self.outputs[i] = output_temp

###################################################################################################
def backpropagate(self, Y):

self.errors[self.L-1] = self.outputs[self.L-1] - Y

for i in range(self.L - 2, 0, -1):

if (self.activation == 'sigmoid'):
self.errors[i] = np.dot( self.errors[i+1], self.weights[i][:, 1:] ) * self.sigmoid_prime(self.inputs[i])
elif (self.activation == 'tanh'):
self.errors[i] = np.dot( self.errors[i+1], self.weights[i][:, 1:] ) * (1 - self.outputs[i][:, 1:]*self.outputs[i][:, 1:])

for i in range(0, self.L-1):
grad = np.dot(self.errors[i+1].T, self.outputs[i]) / m
self.weights[i] = self.weights[i] - self.rate*grad


###################################################################################################
def sigmoid(self, z):
s = 1.0/(1.0 + np.exp(-z))
return s

###################################################################################################
def sigmoid_prime(self, z):
s = self.sigmoid(z)*(1 - self.sigmoid(z))
return s

###################################################################################################
def predict(self, X, weights):

m = X.shape[0];

self.inputs = []
self.outputs = []
self.weights = weights

for layer in range(self.L):
self.inputs.append(np.empty([m, self.neurons[layer]]))

if (layer < self.L -1):
self.outputs.append(np.empty([m, self.neurons[layer]+1]))
else:
self.outputs.append(np.empty([m, self.neurons[layer]]))

self.feedforward(X)

return self.outputs[self.L-1]


###################################################################################################
# MAIN PART

activation1 = 'sigmoid' #the input should be scaled into [0..1]
activation2 = 'tanh' #the input should be scaled into [-1..1]

activation = activation1

net = Neuralnet([1, 6, 1], activation) # structure of the NN and its activation function


method1 = 'online'
method2 = 'offline'

method = method1

kx = 0.1 #noise parameter

###################################################################################################
# TRAINING

if (method == 'offline'):

m = 1000 #size of the training set
X = np.linspace(0, 4*math.pi, num = m).reshape(m, 1); #input training set


Y = np.sin(X) #target


noise = (2.0*np.random.uniform(0, kx, m) - kx).reshape(m, 1)
Y = Y + noise #noisy target

#scaling of the target depending on the activation function
if (activation == 'sigmoid'):
Y_scaled = (Y/(1+kx) + 1)/2.0
elif (activation == 'tanh'):
Y_scaled = Y/(1+kx)


#number of the iteration for the training stage
iter_count = 20000
net.train(X, Y_scaled, iter_count) #training

elif (method == 'online'):

sampling_count = 100000 # number of samplings during the training stage


m = 1 #batch size

iter_count = sampling_count/m

for layer in range(net.L):
net.inputs.append(np.empty([m, net.neurons[layer]]))
net.errors.append(np.empty([m, net.neurons[layer]]))

if (layer < net.L -1):
net.outputs.append(np.empty([m, net.neurons[layer]+1]))
else:
net.outputs.append(np.empty([m, net.neurons[layer]]))

J_history = []
step_history = []

for i in range(iter_count):
X = np.random.uniform(0, 4*math.pi, m).reshape(m, 1)

Y = np.sin(X) #target
noise = (2.0*np.random.uniform(0, kx, m) - kx).reshape(m, 1)
Y = Y + noise #noisy target

#scaling of the target depending on the activation function
if (activation == 'sigmoid'):
Y_scaled = (Y/(1+kx) + 1)/2.0
elif (activation == 'tanh'):
Y_scaled = Y/(1+kx)

net.feedforward(X)
net.backpropagate(Y_scaled)


if (np.remainder(i, 1000) == 0):
J = net.cost_online(0, 4*math.pi, 1000)
J_history.append(J)
step_history.append(i)

plt.plot(step_history, J_history)
plt.title('Batch size ' + str(m) + ', rate ' + str(net.rate) + ', samples ' + str(sampling_count))
#plt.ylim([0, 0.1])

plt.show()

#gained weights
trained_weights = net.weights

##########################################################################################
# PREDICTION

m_new = 40 #size of the prediction set
X_new = np.linspace(0, 4*math.pi, num = m_new).reshape(m_new, 1);

Y_new = net.predict(X_new, trained_weights) #prediction

#rescaling of the result
if (activation == 'sigmoid'):
Y_new = (2.0*Y_new - 1.0) * (1+kx)
elif (activation == 'tanh'):
Y_new = Y_new * (1+kx)

#visualization

#fake sine curve to show the ideal signal
if (method == 'online'):
X = np.linspace(0, 4*math.pi, num = 100)
Y = np.sin(X)

plt.plot(X, Y)

plt.plot(X_new, Y_new, 'ro')
if (method == 'online'):
plt.title('Batch size ' + str(m) + ', rate ' + str(net.rate) + ', samples ' + str(sampling_count))
plt.ylim([-1.5, 1.5])
plt.show()

raw_input('press any key to exit')

现在我对您当前的代码有一些评论:

您的正弦函数如下所示:

def sine_example():
net = Neuralnet([1, 6, 1])
for step in range(100000):
x = np.random.normal()
net.feedforward([x])
net.backpropagate([np.tanh(np.sin(x))])
net.feedforward([3])
print(net.outputs[-1])

我不知道你为什么在目标输入中使用 tanh。如果你真的想使用正弦的 tanh 作为目标,你需要将它缩放到 [-1..1],因为 tanh(sin(x)) 返回范围 [- 0.76..0.76].

接下来是训练集的范围。您使用 x = np.random.normal() 生成样本。这是这样一个输入的分布:

enter image description here

在此之后,您希望您的网络预测 3 的正弦值,但网络在训练阶段几乎从未见过这个数字。我会在更广泛的范围内使用均匀分布来生成样本。

关于python - 神经网络反向传播算法在 Python 中不起作用,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/34649152/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com