gpt4 book ai didi

java - 在 Java 中实现神经网络 : Training and Backpropagation issues

转载 作者:搜寻专家 更新时间:2023-11-01 01:28:27 25 4
gpt4 key购买 nike

我正在尝试用 Java 实现前馈神经网络。我创建了三个类 NNeuron、NLayer 和 NNetwork。 “简单”的计算似乎很好(我得到了正确的总和/激活/输出),但在训练过程中,我似乎没有得到正确的结果。谁能告诉我我做错了什么?NNetwork 类的整个代码很长,所以我发布了导致问题的部分:[编辑]:这实际上是几乎所有的 NNetwork 类

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class NNetwork
{
public static final double defaultLearningRate = 0.4;
public static final double defaultMomentum = 0.8;

private NLayer inputLayer;
private ArrayList<NLayer> hiddenLayers;
private NLayer outputLayer;

private ArrayList<NLayer> layers;

private double momentum = NNetwork1.defaultMomentum; // alpha: momentum, default! 0.3

private ArrayList<Double> learningRates;

public NNetwork (int nInputs, int nOutputs, Integer... neuronsPerHiddenLayer)
{
this(nInputs, nOutputs, Arrays.asList(neuronsPerHiddenLayer));
}

public NNetwork (int nInputs, int nOutputs, List<Integer> neuronsPerHiddenLayer)
{
// the number of neurons on the last layer build so far (i.e. the number of inputs for each neuron of the next layer)
int prvOuts = 1;

this.layers = new ArrayList<>();

// input layer
this.inputLayer = new NLayer(nInputs, prvOuts, this);
this.inputLayer.setAllWeightsTo(1.0);
this.inputLayer.setAllBiasesTo(0.0);
this.inputLayer.useSigmaForOutput(false);
prvOuts = nInputs;
this.layers.add(this.inputLayer);

// hidden layers
this.hiddenLayers = new ArrayList<>();
for (int i=0 ; i<neuronsPerHiddenLayer.size() ; i++)
{
this.hiddenLayers.add(new NLayer(neuronsPerHiddenLayer.get(i), prvOuts, this));
prvOuts = neuronsPerHiddenLayer.get(i);
}
this.layers.addAll(this.hiddenLayers);

// output layer
this.outputLayer = new NLayer(nOutputs, prvOuts, this);
this.layers.add(this.outputLayer);

this.initCoeffs();
}

private void initCoeffs ()
{
this.learningRates = new ArrayList<>();
// learning rates of the hidden layers
for (int i=0 ; i<this.hiddenLayers.size(); i++)
this.learningRates.add(NNetwork1.defaultLearningRate);

// learning rate of the output layer
this.learningRates.add(NNetwork1.defaultLearningRate);
}

public double getLearningRate (int layerIndex)
{
if (layerIndex > 0 && layerIndex <= this.hiddenLayers.size()+1)
{
return this.learningRates.get(layerIndex-1);
}
else
{
return 0;
}
}

public ArrayList<Double> getLearningRates ()
{
return this.learningRates;
}

public void setLearningRate (int layerIndex, double newLearningRate)
{
if (layerIndex > 0 && layerIndex <= this.hiddenLayers.size()+1)
{
this.learningRates.set(
layerIndex-1,
newLearningRate);
}
}

public void setLearningRates (Double... newLearningRates)
{
this.setLearningRates(Arrays.asList(newLearningRates));
}

public void setLearningRates (List<Double> newLearningRates)
{
int len = (this.learningRates.size() <= newLearningRates.size())
? this.learningRates.size()
: newLearningRates.size();

for (int i=0; i<len; i++)
this.learningRates
.set(i,
newLearningRates.get(i));
}

public double getMomentum ()
{
return this.momentum;
}

public void setMomentum (double momentum)
{
this.momentum = momentum;
}

public NNeuron getNeuron (int layerIndex, int neuronIndex)
{
if (layerIndex == 0)
return this.inputLayer.getNeurons().get(neuronIndex);
else if (layerIndex == this.hiddenLayers.size()+1)
return this.outputLayer.getNeurons().get(neuronIndex);
else
return this.hiddenLayers.get(layerIndex-1).getNeurons().get(neuronIndex);
}

public ArrayList<Double> getOutput (ArrayList<Double> inputs)
{
ArrayList<Double> lastOuts = inputs; // the last computed outputs of the last 'called' layer so far

// input layer
//lastOuts = this.inputLayer.getOutput(lastOuts);
lastOuts = this.getInputLayerOutputs(lastOuts);

// hidden layers
for (NLayer layer : this.hiddenLayers)
lastOuts = layer.getOutput(lastOuts);

// output layer
lastOuts = this.outputLayer.getOutput(lastOuts);

return lastOuts;
}

public ArrayList<ArrayList<Double>> getAllOutputs (ArrayList<Double> inputs)
{
ArrayList<ArrayList<Double>> outs = new ArrayList<>();

// input layer
outs.add(this.getInputLayerOutputs(inputs));

// hidden layers
for (NLayer layer : this.hiddenLayers)
outs.add(layer.getOutput(outs.get(outs.size()-1)));

// output layer
outs.add(this.outputLayer.getOutput(outs.get(outs.size()-1)));

return outs;
}

public ArrayList<ArrayList<Double>> getAllSums (ArrayList<Double> inputs)
{
//*
ArrayList<ArrayList<Double>> sums = new ArrayList<>();
ArrayList<Double> lastOut;

// input layer
sums.add(inputs);
lastOut = this.getInputLayerOutputs(inputs);

// hidden nodes
for (NLayer layer : this.hiddenLayers)
{
sums.add(layer.getSums(lastOut));

lastOut = layer.getOutput(lastOut);
}

// output layer
sums.add(this.outputLayer.getSums(lastOut));

return sums;
}

public ArrayList<Double> getInputLayerOutputs (ArrayList<Double> inputs)
{
ArrayList<Double> outs = new ArrayList<>();
for (int i=0 ; i<this.inputLayer.getNeurons().size() ; i++)
outs.add(this
.inputLayer
.getNeuron(i)
.getOutput(inputs.get(i)));
return outs;
}

public void changeWeights (
ArrayList<ArrayList<Double>> deltaW,
ArrayList<ArrayList<Double>> inputSet,
ArrayList<ArrayList<Double>> targetSet,
boolean checkError)
{
for (int i=0 ; i<deltaW.size()-1 ; i++)
this.hiddenLayers.get(i).changeWeights(deltaW.get(i), inputSet, targetSet, checkError);

this.outputLayer.changeWeights(deltaW.get(deltaW.size()-1), inputSet, targetSet, checkError);

}

public int train2 (
ArrayList<ArrayList<Double>> inputSet,
ArrayList<ArrayList<Double>> targetSet,
double maxError,
int maxIterations)
{
ArrayList<Double>
input,
target;

ArrayList<ArrayList<ArrayList<Double>>> prvNetworkDeltaW = null;

double error;

int i = 0, j = 0, traininSetLength = inputSet.size();
do // during each itreration...
{
error = 0.0;
for (j = 0; j < traininSetLength; j++) // ... for each training element...
{
input = inputSet.get(j);
target = targetSet.get(j);
prvNetworkDeltaW = this.train2_bp(input, target, prvNetworkDeltaW); // ... do backpropagation, and return the new weight deltas

error += this.getInputMeanSquareError(input, target);
}

i++;
} while (error > maxError && i < maxIterations); // iterate as much as necessary/possible

return i;
}

public ArrayList<ArrayList<ArrayList<Double>>> train2_bp (
ArrayList<Double> input,
ArrayList<Double> target,
ArrayList<ArrayList<ArrayList<Double>>> prvNetworkDeltaW)
{
ArrayList<ArrayList<Double>> layerSums = this.getAllSums(input); // the sums for each layer
ArrayList<ArrayList<Double>> layerOutputs = this.getAllOutputs(input); // the outputs of each layer

// get the layer deltas (inc the input layer that is null)
ArrayList<ArrayList<Double>> layerDeltas = this.train2_getLayerDeltas(layerSums, layerOutputs, target);

// get the weight deltas
ArrayList<ArrayList<ArrayList<Double>>> networkDeltaW = this.train2_getWeightDeltas(layerOutputs, layerDeltas, prvNetworkDeltaW);

// change the weights
this.train2_updateWeights(networkDeltaW);

return networkDeltaW;
}

public void train2_updateWeights (ArrayList<ArrayList<ArrayList<Double>>> networkDeltaW)
{
for (int i=1; i<this.layers.size(); i++)
this.layers.get(i).train2_updateWeights(networkDeltaW.get(i));
}

public ArrayList<ArrayList<ArrayList<Double>>> train2_getWeightDeltas (
ArrayList<ArrayList<Double>> layerOutputs,
ArrayList<ArrayList<Double>> layerDeltas,
ArrayList<ArrayList<ArrayList<Double>>> prvNetworkDeltaW)
{
ArrayList<ArrayList<ArrayList<Double>>> networkDeltaW = new ArrayList<>(this.layers.size());
ArrayList<ArrayList<Double>> layerDeltaW;
ArrayList<Double> neuronDeltaW;

for (int i=0; i<this.layers.size(); i++)
networkDeltaW.add(new ArrayList<ArrayList<Double>>());

double
deltaW, x, learningRate, prvDeltaW, d;

int i, j, k;
for (i=this.layers.size()-1; i>0; i--) // for each layer
{
learningRate = this.getLearningRate(i);

layerDeltaW = new ArrayList<>();
networkDeltaW.set(i, layerDeltaW);

for (j=0; j<this.layers.get(i).getNeurons().size(); j++) // for each neuron of this layer
{
neuronDeltaW = new ArrayList<>();
layerDeltaW.add(neuronDeltaW);

for (k=0; k<this.layers.get(i-1).getNeurons().size(); k++) // for each weight (i.e. each neuron of the previous layer)
{
d = layerDeltas.get(i).get(j);
x = layerOutputs.get(i-1).get(k);
prvDeltaW = (prvNetworkDeltaW != null)
? prvNetworkDeltaW.get(i).get(j).get(k)
: 0.0;

deltaW = -learningRate * d * x + this.momentum * prvDeltaW;

neuronDeltaW.add(deltaW);
}

// the bias !!
d = layerDeltas.get(i).get(j);
x = 1;
prvDeltaW = (prvNetworkDeltaW != null)
? prvNetworkDeltaW.get(i).get(j).get(prvNetworkDeltaW.get(i).get(j).size()-1)
: 0.0;

deltaW = -learningRate * d * x + this.momentum * prvDeltaW;

neuronDeltaW.add(deltaW);
}
}

return networkDeltaW;
}

ArrayList<ArrayList<Double>> train2_getLayerDeltas (
ArrayList<ArrayList<Double>> layerSums,
ArrayList<ArrayList<Double>> layerOutputs,
ArrayList<Double> target)
{
// get ouput deltas
ArrayList<Double> outputDeltas = new ArrayList<>(); // the output layer deltas
double
oErr, // output error given a target
s, // sum
o, // output
d; // delta
int
nOutputs = target.size(), // @TODO ?== this.outputLayer.size()
nLayers = this.hiddenLayers.size()+2; // @TODO ?== layerOutputs.size()

for (int i=0; i<nOutputs; i++) // for each neuron...
{
s = layerSums.get(nLayers-1).get(i);
o = layerOutputs.get(nLayers-1).get(i);
oErr = (target.get(i) - o);
d = -oErr * this.getNeuron(nLayers-1, i).sigmaPrime(s); // @TODO "s" or "o" ??

outputDeltas.add(d);
}

// get hidden deltas
ArrayList<ArrayList<Double>> hiddenDeltas = new ArrayList<>();
for (int i=0; i<this.hiddenLayers.size(); i++)
hiddenDeltas.add(new ArrayList<Double>());

NLayer nextLayer = this.outputLayer;
ArrayList<Double> nextDeltas = outputDeltas;

int
h, k,
nHidden = this.hiddenLayers.size(),
nNeurons = this.hiddenLayers.get(nHidden-1).getNeurons().size();
double
wdSum = 0.0;
for (int i=nHidden-1; i>=0; i--) // for each hidden layer
{
hiddenDeltas.set(i, new ArrayList<Double>());
for (h=0; h<nNeurons; h++)
{
wdSum = 0.0;
for (k=0; k<nextLayer.getNeurons().size(); k++)
{
wdSum += nextLayer.getNeuron(k).getWeight(h) * nextDeltas.get(k);
}

s = layerSums.get(i+1).get(h);
d = this.getNeuron(i+1, h).sigmaPrime(s) * wdSum;

hiddenDeltas.get(i).add(d);
}

nextLayer = this.hiddenLayers.get(i);
nextDeltas = hiddenDeltas.get(i);
}

ArrayList<ArrayList<Double>> deltas = new ArrayList<>();

// input layer deltas: void
deltas.add(null);

// hidden layers deltas
deltas.addAll(hiddenDeltas);

// output layer deltas
deltas.add(outputDeltas);

return deltas;
}

public double getInputMeanSquareError (ArrayList<Double> input, ArrayList<Double> target)
{
double diff, mse=0.0;
ArrayList<Double> output = this.getOutput(input);
for (int i=0; i<target.size(); i++)
{
diff = target.get(i) - output.get(i);
mse += (diff * diff);
}

mse /= 2.0;

return mse;
}

}

有些方法的名称(及其返回值/类型)非常不言自明,例如“this.getAllSums”返回每一层的总和(每个神经元的总和(x_i*w_i)),“this.getAllOutputs "返回每一层的输出(每个神经元的 sigmoid(sum)),"this.getNeuron(i,j)"返回第 i 层的第 j 个神经元。

预先感谢您的帮助:)

最佳答案

这是一个非常简单的 java 实现,在 main 方法中进行了测试:

import java.util.Arrays;
import java.util.Random;

public class MLP {

public static class MLPLayer {

float[] output;
float[] input;
float[] weights;
float[] dweights;
boolean isSigmoid = true;

public MLPLayer(int inputSize, int outputSize, Random r) {
output = new float[outputSize];
input = new float[inputSize + 1];
weights = new float[(1 + inputSize) * outputSize];
dweights = new float[weights.length];
initWeights(r);
}

public void setIsSigmoid(boolean isSigmoid) {
this.isSigmoid = isSigmoid;
}

public void initWeights(Random r) {
for (int i = 0; i < weights.length; i++) {
weights[i] = (r.nextFloat() - 0.5f) * 4f;
}
}

public float[] run(float[] in) {
System.arraycopy(in, 0, input, 0, in.length);
input[input.length - 1] = 1;
int offs = 0;
Arrays.fill(output, 0);
for (int i = 0; i < output.length; i++) {
for (int j = 0; j < input.length; j++) {
output[i] += weights[offs + j] * input[j];
}
if (isSigmoid) {
output[i] = (float) (1 / (1 + Math.exp(-output[i])));
}
offs += input.length;
}
return Arrays.copyOf(output, output.length);
}

public float[] train(float[] error, float learningRate, float momentum) {
int offs = 0;
float[] nextError = new float[input.length];
for (int i = 0; i < output.length; i++) {
float d = error[i];
if (isSigmoid) {
d *= output[i] * (1 - output[i]);
}
for (int j = 0; j < input.length; j++) {
int idx = offs + j;
nextError[j] += weights[idx] * d;
float dw = input[j] * d * learningRate;
weights[idx] += dweights[idx] * momentum + dw;
dweights[idx] = dw;
}
offs += input.length;
}
return nextError;
}
}
MLPLayer[] layers;

public MLP(int inputSize, int[] layersSize) {
layers = new MLPLayer[layersSize.length];
Random r = new Random(1234);
for (int i = 0; i < layersSize.length; i++) {
int inSize = i == 0 ? inputSize : layersSize[i - 1];
layers[i] = new MLPLayer(inSize, layersSize[i], r);
}
}

public MLPLayer getLayer(int idx) {
return layers[idx];
}

public float[] run(float[] input) {
float[] actIn = input;
for (int i = 0; i < layers.length; i++) {
actIn = layers[i].run(actIn);
}
return actIn;
}

public void train(float[] input, float[] targetOutput, float learningRate, float momentum) {
float[] calcOut = run(input);
float[] error = new float[calcOut.length];
for (int i = 0; i < error.length; i++) {
error[i] = targetOutput[i] - calcOut[i]; // negative error
}
for (int i = layers.length - 1; i >= 0; i--) {
error = layers[i].train(error, learningRate, momentum);
}
}

public static void main(String[] args) throws Exception {
float[][] train = new float[][]{new float[]{0, 0}, new float[]{0, 1}, new float[]{1, 0}, new float[]{1, 1}};
float[][] res = new float[][]{new float[]{0}, new float[]{1}, new float[]{1}, new float[]{0}};
MLP mlp = new MLP(2, new int[]{2, 1});
mlp.getLayer(1).setIsSigmoid(false);
Random r = new Random();
int en = 500;
for (int e = 0; e < en; e++) {

for (int i = 0; i < res.length; i++) {
int idx = r.nextInt(res.length);
mlp.train(train[idx], res[idx], 0.3f, 0.6f);
}

if ((e + 1) % 100 == 0) {
System.out.println();
for (int i = 0; i < res.length; i++) {
float[] t = train[i];
System.out.printf("%d epoch\n", e + 1);
System.out.printf("%.1f, %.1f --> %.3f\n", t[0], t[1], mlp.run(t)[0]);
}
}
}
}
}

关于java - 在 Java 中实现神经网络 : Training and Backpropagation issues,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/9951487/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com