gpt4 book ai didi

python - 反向传播问题;总成本越来越高,直到无穷大

转载 作者:行者123 更新时间:2023-11-28 22:18:26 25 4
gpt4 key购买 nike

我根据韦尔奇实验室的视频用 numpy 制作了一个 FC 神经网络,但是当我尝试训练它时,我似乎在启动时有爆炸梯度,这很奇怪,我会放下整个代码,它可以在 python 中测试3+。只有 costfunctionprime 似乎打破了梯度下降的东西,但我不知道发生了什么。比我聪明的人可以帮忙吗?

编辑:trng_input 和 trng_output 不是我使用的,我使用了一个大数据集

import numpy as np
import random

trng_input = [[random.random() for _ in range(7)] for _ in range(100)]
trng_output = [[random.random() for _ in range(2)] for _ in range(100)]

def relu(x):
return x * (x > 0)

def reluprime(x):
return (x>0).astype(x.dtype)


class Neural_Net():
def __init__(self, data_input, data_output):
self.data_input = data_input
self.trng_output = trng_output
self.bias = 0
self.nodes = np.array([7, 2])
self.LR = 0.01
self.weightinit()
self.training(1000, self.LR)

def randomweight(self, n):
output = []
for i in range(n):
output.append(random.uniform(-1,1))
return output

def weightinit(self):
self.weights = []
for n in range(len(self.nodes)-1):
temp = []
for _ in range(self.nodes[n]+self.bias):
temp.append(self.randomweight(self.nodes[n+1]))
self.weights.append(temp)
self.weights = [np.array(tuple(self.weights[i])) for i in range(len(self.weights))]


def forward(self, data):
self.Z = []
self.A = [np.array(data)]

for layer in range(len(self.weights)):
self.Z.append(np.dot(self.A[layer], self.weights[layer]))
self.A.append(relu(self.Z[layer]))

self.output = self.A[-1]
return self.output

def costFunction(self):
self.totalcost = 0.5*sum((self.trng_output-self.output)**2)
return self.totalcost

def costFunctionPrime(self):
self.forward(self.data_input)
self.delta = [[] for x in range(len(self.weights))]
self.DcostDw = [[] for x in range(len(self.weights))]

for layer in reversed(range(len(self.weights))):
Zprime = reluprime(self.Z[layer])
if layer == len(self.weights)-1:
self.delta[layer] = np.multiply(-(self.trng_output-self.output), Zprime)
else:
self.delta[layer] = np.dot(self.delta[layer+1], self.weights[layer+1].T) * Zprime
self.DcostDw[layer] = np.dot(self.A[layer].T, self.delta[layer])

return self.DcostDw

def backprop(self, LR):
self.DcostDw = (np.array(self.DcostDw)*LR).tolist()
self.weights = (np.array(self.weights) - np.array(self.DcostDw)).tolist()

def training(self, iteration, LR):
for i in range(iteration):
self.costFunctionPrime()
self.backprop(LR)
if (i/1000.0) == (i/1000):
print(self.costFunction())
print(sum(self.costFunction())/len(self.costFunction()))

NN = Neural_Net(trng_input, trng_output)

正如所问,这是预期的结果(我使用 sigmoid 激活函数得到的结果):

如您所见,数字正在下降,因此网络正在接受训练。

这是使用 relu 激活函数的结果:

这里,网络卡住了,没有接受训练,它从未使用 relu 激活函数接受训练,想了解原因

最佳答案

如果你的成本没有随着 ReLu 激活而降低,那么你的网络似乎卡在了 ReLu 输入为负的区域,因此它的输出为常数零,并且没有梯度回流 - 神经元死了.

您可以使用 leaky ReLu 而不是简单的 ReLu 来解决这个问题。您还应该开始训练偏见。对于 ReLu,建议使用较小的正值初始化偏差,以避免这种死神经元问题。

对于某些问题,它还有助于降低学习率并使网络更深。也许,你想让学习率可调,例如如果成本没有降低,则将 LR 乘以 0.5。

使用 leaky ReLu、可训练偏差和一些重构,您的模型可能如下所示:

import numpy as np
trng_input = np.random.uniform(size=(1000, 7))
trng_output = np.column_stack([np.sin(trng_input).sum(axis=1), np.cos(trng_input).sum(axis=1)])

LEAK = 0.0001

def relu(x):
return x * (x > 0) + LEAK * x * (x < 0)

def reluprime(x):
return (x>0).astype(x.dtype) + LEAK * (x<0).astype(x.dtype)


class Neural_Net():
def __init__(self, data_input, data_output):
self.data_input = data_input
self.trng_output = trng_output
self.nodes = np.array([7, 10, 2])
self.LR = 0.00001
self.weightinit()
self.training(2000, self.LR)

def weightinit(self):
self.weights = [np.random.uniform(-1, 1, size=self.nodes[i:(i+2)]) for i in range(len(self.nodes) - 1)]
self.biases = [np.random.uniform(0, 1, size=self.nodes[i+1]) for i in range(len(self.nodes) - 1)]

def forward(self, data):
self.Z = []
self.A = [np.array(data)]
for layer in range(len(self.weights)):
self.Z.append(np.dot(self.A[layer], self.weights[layer]) + self.biases[layer])
self.A.append(relu(self.Z[layer]))
self.output = self.A[-1]
return self.output

def costFunction(self):
self.totalcost = 0.5*np.sum((self.trng_output-self.output)**2, axis=0)
return self.totalcost

def costFunctionPrime(self):
self.forward(self.data_input)
self.delta = [[] for x in range(len(self.weights))]
self.DcostDw = [[] for x in range(len(self.weights))]
self.DcostDb = [[] for x in range(len(self.weights))]
for layer in reversed(range(len(self.weights))):
Zprime = reluprime(self.Z[layer])
if layer == len(self.weights)-1:
self.delta[layer] = np.multiply(-(self.trng_output-self.output), Zprime)
else:
self.delta[layer] = np.dot(self.delta[layer+1], self.weights[layer+1].T) * Zprime
self.DcostDw[layer] = np.dot(self.A[layer].T, self.delta[layer])
self.DcostDb[layer] = np.sum(self.delta[layer], axis=0)

def backprop(self, LR):
for layer in range(len(self.weights)):
self.weights[layer] -= self.DcostDw[layer] * LR
self.biases[layer] -= self.DcostDb[layer] * LR

def training(self, iteration, LR):
for i in range(iteration):
self.costFunctionPrime()
self.backprop(LR)
if (i/100.0) == (i/100):
print(self.costFunction())
print(sum(self.costFunction())/len(self.costFunction()))

NN = Neural_Net(trng_input, trng_output)

关于python - 反向传播问题;总成本越来越高,直到无穷大,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/50534429/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com