作者热门文章
- html - 出于某种原因,IE8 对我的 Sass 文件中继承的 html5 CSS 不友好?
- JMeter 在响应断言中使用 span 标签的问题
- html - 在 :hover and :active? 上具有不同效果的 CSS 动画
- html - 相对于居中的 html 内容固定的 CSS 重复背景?
我正在使用“moons”数据集遵循本指南:https://vincentblog.xyz/posts/neural-networks-from-scratch-in-python 。我想再添加一个隐藏层(也是 4 个神经元),那么我该如何扩展它呢?如果我再添加一个隐藏层,我会特别对前馈和反向传播部分感到困惑。下面的代码仅针对一个隐藏层
def forward_propagation(X, W1, b1, W2, b2):
forward_params = {}
Z1 = np.dot(W1, X.T) + b1
A1 = relu(Z1)
Z2 = np.dot(W2, A1) + b2
A2 = sigmoid(Z2)
forward_params = {
"Z1": Z1,
"A1": A1,
"Z2": Z2,
"A2": A2,
}
return forward_params
def backward_propagation(forward_params, X, Y):
A2 = forward_params["A2"]
Z2 = forward_params["Z2"]
A1 = forward_params["A1"]
Z1 = forward_params["Z1"]
data_size = Y.shape[1]
dZ2 = A2 - Y
dW2 = np.dot(dZ2, A1.T) / data_size
db2 = np.sum(dZ2, axis=1) / data_size
dZ1 = np.dot(dW2.T, dZ2) * prime_relu(Z1)
dW1 = np.dot(dZ1, X) / data_size
db1 = np.sum(dZ1, axis=1) / data_size
db1 = np.reshape(db1, (db1.shape[0], 1))
grads = {
"dZ2": dZ2,
"dW2": dW2,
"db2": db2,
"dZ1": dZ1,
"dW1": dW1,
"db1": db1,
}
return grads
并且还要修改main函数:
def one_hidden_layer_model(X, y, epochs=1000, learning_rate=0.003):
np.random.seed(0)
input_size = X_train.shape[1]
output_size = 1
hidden_layer_nodes = 4
W1 = np.random.randn(hidden_layer_nodes, input_size) / np.sqrt(input_size)
b1 = np.zeros((hidden_layer_nodes, 1))
W2 = np.random.randn(output_size, hidden_layer_nodes) / np.sqrt(hidden_layer_nodes)
b2 = np.zeros((output_size, 1))
loss_history = []
for i in range(epochs):
forward_params = forward_propagation(X, W1, b1, W2, b2)
A2 = forward_params["A2"]
loss = loss_function(A2, y)
grads = backward_propagation(forward_params, X, y)
W1 -= learning_rate * grads["dW1"]
b1 -= learning_rate * grads["db1"]
W2 -= learning_rate * grads["dW2"]
b2 -= learning_rate * grads["db2"]
if i % 1000 == 0:
loss_history.append(loss)
print ("Costo e iteracion %i: %f" % (i, loss))
return W1, b1, W2, b2
根据 C. Leconte 的回答,它可以工作,但我得到的准确度值非常低。这是代码部分:
def predict(W1, b1, W2, b2, W3, b3, X):
data_size = X.shape[0]
forward_params = forward_propagation(X, W1, b1, W2, b2, W3, b3)
y_prediction = np.zeros((1, data_size))
A3 = forward_params["A3"]
for i in range(A3.shape[1]):
y_prediction[0, i] = 1 if A3[0, i] > 0.5 else 0
return y_prediction
train_predictions = predict(W1, b1, W2, b2, W3, b3, X_train)
validation_predictions = predict(W1, b1, W2, b2, W3, b3, X_val)
print("train accuracy: {} %".format(100 - np.mean(np.abs(train_predictions - y_train)) * 100))
print("test accuracy: {} %".format(100 - np.mean(np.abs(validation_predictions - y_val)) * 100))
我尝试了不同的学习率,但最多获得 50++% 的准确率。
最佳答案
def forward_propagation(X, W1, b1, W2, b2, W3, b3):
forward_params = {}
Z1 = np.dot(W1, X.T) + b1
A1 = relu(Z1)
Z2 = np.dot(W2, A1) + b2
A2 = relu(Z2)
Z3 = np.dot(W3, A2) + b3
A3 = sigmoid(Z3)
forward_params = {
"Z1": Z1,
"A1": A1,
"Z2": Z2,
"A2": A2,
"Z3": Z3,
"A3": A3
}
return forward_params
def backward_propagation(forward_params, X, Y):
A3 = forward_params["A3"]
Z3 = forward_params["Z3"]
A2 = forward_params["A2"]
Z2 = forward_params["Z2"]
A1 = forward_params["A1"]
Z1 = forward_params["Z1"]
data_size = Y.shape[1]
dZ3 = A3 - Y
dW3 = np.dot(dZ3, A2.T) / data_size
db3 = np.sum(dZ3, axis=1) / data_size
dZ2 = np.dot(dW3.T, dZ3) * prime_relu(Z2)
dW2 = np.dot(dZ2, A1.T) / data_size
db2 = np.sum(dZ2, axis=1) / data_size
db2 = np.reshape(db2, (db2.shape[0], 1))
dZ1 = np.dot(dW2.T, dZ2) * prime_relu(Z1)
dW1 = np.dot(dZ1, X) / data_size
db1 = np.sum(dZ1, axis=1) / data_size
db1 = np.reshape(db1, (db1.shape[0], 1))
grads = {
"dZ3": dZ3,
"dW3": dW3,
"db3": db3,
"dZ2": dZ2,
"dW2": dW2,
"db2": db2,
"dZ1": dZ1,
"dW1": dW1,
"db1": db1,
}
return grads
编辑:
类似这样的事情。请注意,我在函数backward_propagation() 的开头添加了两行。我忘记放A3和Z3了。
def one_hidden_layer_model(X, y, epochs=1000, learning_rate=0.003):
np.random.seed(0)
input_size = X_train.shape[1]
output_size = 1
hidden_layer_nodes = 4
hidden_layer_nodes2 = 4
W1 = np.random.randn(hidden_layer_nodes, input_size) / np.sqrt(input_size)
b1 = np.zeros((hidden_layer_nodes, 1))
W2 = np.random.randn(hidden_layer_nodes2, hidden_layer_nodes) / np.sqrt(hidden_layer_nodes)
b2 = np.zeros((hidden_layer_nodes2, 1))
W3 = np.random.randn(output_size, hidden_layer_nodes2) / np.sqrt(hidden_layer_nodes2)
b3 = np.zeros((output_size, 1))
loss_history = []
for i in range(epochs):
forward_params = forward_propagation(X, W1, b1, W2, b2, W3, b3)
A3 = forward_params["A3"]
loss = loss_function(A3, y)
grads = backward_propagation(forward_params, X, y)
W1 -= learning_rate * grads["dW1"]
b1 -= learning_rate * grads["db1"]
W2 -= learning_rate * grads["dW2"]
b2 -= learning_rate * grads["db2"]
W3 -= learning_rate * grads["dW3"]
b3 -= learning_rate * grads["db3"]
if i % 1000 == 0:
loss_history.append(loss)
print ("Costo e iteracion %i: %f" % (i, loss))
return W1, b1, W2, b2, W3, b3
关于python - 如何在ANN中再添加一层隐藏层?并且精度低,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59528332/
我是一名优秀的程序员,十分优秀!