machine-learning - ANN 回归、线性函数逼近-6ren

machine-learning - ANN 回归、线性函数逼近

转载作者：行者123 更新时间：2023-11-30 08:24:53

我已经构建了一个常规的 ANN-BP 设置，其中输入和输出层上有一个单元，并且用 sigmoid 隐藏了 4 个节点。给它一个简单的任务来近似线性f(n) = n，其中n在0-100范围内。

问题:无论隐藏层中的层数、单元数，或者我是否在节点值中使用偏差，它都会学习近似 f(n) = Average(dataset)，如下所示:

代码是用 JavaScript 编写的，作为概念证明。我定义了三个类:Net、Layer 和 Connection，其中 Layer 是输入、偏差和输出值的数组，Connection 是权重和增量权重的 2D 数组。这是所有重要计算发生的层代码:

Ann.Layer = function(nId, oNet, oConfig, bUseBias, aInitBiases) {
var _oThis = this;

var _initialize = function() {
        _oThis.id        = nId;
        _oThis.length    = oConfig.nodes;
        _oThis.outputs   = new Array(oConfig.nodes);
        _oThis.inputs    = new Array(oConfig.nodes);
        _oThis.gradients = new Array(oConfig.nodes);
        _oThis.biases    = new Array(oConfig.nodes);

        _oThis.outputs.fill(0);
        _oThis.inputs.fill(0);
        _oThis.biases.fill(0);

        if (bUseBias) {
            for (var n=0; n<oConfig.nodes; n++) {
                _oThis.biases[n] = Ann.random(aInitBiases[0], aInitBiases[1]);
            }
        }
    };

/****************** PUBLIC ******************/

this.id;
this.length;
this.inputs;
this.outputs;
this.gradients;
this.biases;
this.next;
this.previous;

this.inConnection;
this.outConnection;

this.isInput  = function() { return !this.previous;     }
this.isOutput = function() { return !this.next;         }

this.calculateGradients = function(aTarget) {
    var n, n1, nOutputError,
        fDerivative = Ann.Activation.Derivative[oConfig.activation];

    if (this.isOutput()) {
        for (n=0; n<oConfig.nodes; n++) {
            nOutputError = this.outputs[n] - aTarget[n];
            this.gradients[n] = nOutputError * fDerivative(this.outputs[n]);
        }
    } else {
        for (n=0; n<oConfig.nodes; n++) {
            nOutputError = 0.0;
            for (n1=0; n1<this.outConnection.weights[n].length; n1++) {
                nOutputError += this.outConnection.weights[n][n1] * this.next.gradients[n1];
            }
            // console.log(this.id, nOutputError, this.outputs[n], fDerivative(this.outputs[n]));
            this.gradients[n] = nOutputError * fDerivative(this.outputs[n]);
        }
    }
}

this.updateInputWeights = function() {
    if (!this.isInput()) {
        var nY,
            nX,
            nOldDeltaWeight,
            nNewDeltaWeight;

        for (nX=0; nX<this.previous.length; nX++) {
            for (nY=0; nY<this.length; nY++) {
                nOldDeltaWeight = this.inConnection.deltaWeights[nX][nY];
                nNewDeltaWeight =
                    - oNet.learningRate
                    * this.previous.outputs[nX]
                    * this.gradients[nY]
                    // Add momentum, a fraction of old delta weight
                    + oNet.learningMomentum
                    * nOldDeltaWeight;

                if (nNewDeltaWeight == 0 && nOldDeltaWeight != 0) {
                    console.log('Double overflow');
                }

                this.inConnection.deltaWeights[nX][nY] = nNewDeltaWeight;
                this.inConnection.weights[nX][nY]     += nNewDeltaWeight;
            }
        }
    }
}

this.updateInputBiases = function() {
    if (bUseBias && !this.isInput()) {
        var n,
            nNewDeltaBias;

        for (n=0; n<this.length; n++) {
            nNewDeltaBias = 
                - oNet.learningRate
                * this.gradients[n];

            this.biases[n] += nNewDeltaBias;
        }
    }
}

this.feedForward = function(a) {
    var fActivation = Ann.Activation[oConfig.activation];

    this.inputs = a;

    if (this.isInput()) {
        this.outputs = this.inputs;
    } else {
        for (var n=0; n<a.length; n++) {
            this.outputs[n] = fActivation(a[n] + this.biases[n]);
        }
    }
    if (!this.isOutput()) {
        this.outConnection.feedForward(this.outputs);
    }
}

_initialize();
}

主要的 feedForward 和 backProp 函数定义如下:

this.feedForward = function(a) {
    this.layers[0].feedForward(a);
    this.netError = 0;
}

this.backPropagate = function(aExample, aTarget) {
    this.target = aTarget;

    if (aExample.length != this.getInputCount())  { throw "Wrong input count in training data"; }
    if (aTarget.length  != this.getOutputCount()) { throw "Wrong output count in training data"; }

    this.feedForward(aExample);
    _calculateNetError(aTarget);

    var oLayer = null,
        nLast  = this.layers.length-1,
        n;

    for (n=nLast; n>0; n--) {
        if (n === nLast) {
            this.layers[n].calculateGradients(aTarget);
        } else {
            this.layers[n].calculateGradients();
        }
    }

    for (n=nLast; n>0; n--) {
        this.layers[n].updateInputWeights();
        this.layers[n].updateInputBiases();
    }
}

连接代码相当简单:

Ann.Connection = function(oNet, oConfig, aInitWeights) {
var _oThis = this;

var _initialize = function() {
        var nX, nY, nIn, nOut;

        _oThis.from = oNet.layers[oConfig.from];
        _oThis.to   = oNet.layers[oConfig.to];

        nIn  = _oThis.from.length;
        nOut = _oThis.to.length;

        _oThis.weights      = new Array(nIn);
        _oThis.deltaWeights = new Array(nIn);

        for (nX=0; nX<nIn; nX++) {
            _oThis.weights[nX]      = new Array(nOut);
            _oThis.deltaWeights[nX] = new Array(nOut);
            _oThis.deltaWeights[nX].fill(0);
            for (nY=0; nY<nOut; nY++) {
                _oThis.weights[nX][nY] = Ann.random(aInitWeights[0], aInitWeights[1]);
            }
        }
    };

/****************** PUBLIC ******************/

this.weights;
this.deltaWeights;
this.from;
this.to;

this.feedForward = function(a) {
    var n, nX, nY, aOut = new Array(this.to.length);

    for (nY=0; nY<this.to.length; nY++) {
        n = 0;
        for (nX=0; nX<this.from.length; nX++) {
            n += a[nX] * this.weights[nX][nY];
        }
        aOut[nY] = n;
    }

    this.to.feedForward(aOut);
}

_initialize();
}

我的激活函数和导数的定义如下:

Ann.Activation = {
    linear : function(n) { return n; },
    sigma  : function(n) { return 1.0 / (1.0 + Math.exp(-n)); },
    tanh   : function(n) { return Math.tanh(n); }
}

Ann.Activation.Derivative = {
    linear : function(n) { return 1.0; },
    sigma  : function(n) { return n * (1.0 - n); },
    tanh   : function(n) { return 1.0 - n * n; }
}

网络的配置 JSON 如下:

var Config = {
    id : "Config1",

    learning_rate     : 0.01,
    learning_momentum : 0,
    init_weight       : [-1, 1],
    init_bias         : [-1, 1],
    use_bias          : false,

    layers: [
        {nodes : 1},
        {nodes : 4, activation : "sigma"},
        {nodes : 1, activation : "linear"}
    ],

    connections: [
        {from : 0, to : 1},
        {from : 1, to : 2}
    ]
}

或许，你有经验的眼睛可以发现我的计算的问题？

See example in JSFiddle

最佳答案

我没有广泛地查看代码(因为要查看的代码很多，稍后需要花费更多时间，而且我对 javascript 也不是 100% 熟悉)。不管怎样，我相信斯蒂芬在权重的计算方式上引入了一些变化，并且他的代码似乎给出了正确的结果，所以我建议查看它。

以下几点虽然不一定与计算的正确性有关，但仍可能有所帮助:

您展示了多少个用于训练的网络示例？您是否多次显示相同的输入？您应该多次展示您拥有(输入)的每个示例；仅显示每个示例一次不足以让基于梯度下降的算法进行学习，因为它们每次仅向正确的方向移动一点点。您的所有代码可能都是正确的，但您只需给它多一点时间来训练即可。
像 Stephen 那样引入更多隐藏层可能有助于加快训练速度，也可能有害。这通常是您想要针对您的具体情况进行试验的内容。但对于这个简单的问题来说，这绝对不是必要的。我怀疑你的配置和斯蒂芬的配置之间更重要的区别可能是隐藏层中使用的激活函数。您使用了 sigmoid，这意味着隐藏层中的所有输入值都被压缩到 1.0 以下，然后您需要非常大的权重将这些数字转换回所需的输出(最高可达以下值) 100)。 Stephen 对所有层都使用了线性激活函数，在这种特定情况下，这可能会使训练变得更加容易，因为您实际上是在尝试学习线性函数。但在许多其他情况下，引入非线性是可取的。
将输入和所需输出转换(标准化)为 [0, 1] 而不是 [0, 100] 可能会有所帮助。这将使您的 sigmoid 层更有可能产生良好的结果(尽管我仍然不确定这是否足够，因为在您打算学习线性函数的情况下，您仍然引入了非线性，并且您可能需要更多隐藏节点来纠正这一点)。在“现实世界”的情况下，如果您有多个不同的输入变量，通常也会这样做，因为它确保所有输入变量最初都被视为同等重要。您始终可以执行预处理步骤，将输入标准化为 [0, 1]，将其作为网络的输入，训练它以产生 [0, 1] 中的输出，然后添加一个后处理步骤，在其中转换输出回到原来的范围。

关于machine-learning - ANN 回归、线性函数逼近，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/40491798/

文章推荐： javascript - Puppeteer 中的 XPath 表达式而不是 CSS 选择器？

文章推荐： javascript - 在js中访问子对象

文章推荐： java - 如何调整 JOptionPane.showConfirmDialog 的大小

virtual-machine - "process virtual machine"与 "system virtual machine"的区别
进程虚拟机和系统虚拟机有什么区别？我的猜测是，进程 VM 没有为该操作系统的整个应用程序提供一种操作系统，而是为某些特定应用程序提供环境。系统虚拟机为操作系统提供了一个安装环境，就像 Virtua
C# :Does Client machine need SQL Server installed on it while connecting to other machine having SQL Server installed on it (the Server machine)
我写了一个 C# windows 应用程序表单，它在客户端机器上运行并连接到另一台机器上的 SQL 服务器。在 C# 中建立连接时，我使用了像这样的 dll 1)microsoft.sqlserver
machine-learning - 线性回归标准化的影响: Machine Learning
作为我作业的一部分，我正在处理几个数据集，并通过线性回归查找它们的训练错误。我想知道标准化是否对训练误差有影响？对于标准化前后的数据集，我的相关性和 RMSE 是相等的。谢谢最佳答案很容易证明，
docker-machine - 无法使用 docker-machine 添加主机
我在公司数据中心的 linux VM 上安装了 docker-engine。我在 Windows 上安装了 docker-machine。我想通过我的 Windows 机器管理这个 docker-en
SAS 服务器 : How to get machine name of client machine?
我在我的 PC 上运行 SAS Enterprise Guide 以连接到位于我们网络内的服务器上的 SAS 实例。我正在编写一个将在服务器上运行的 SAS 程序，该程序将使用 ODS 将 HTML
machine.config - ASP.Net Machine.Config 转换
我正在创建一个包含 ASP.Net HttpModule 和 HttpHandler 的强签名类库。我已经为我的库创建了一个 visual studio 安装项目，该项目在 GAC 中安装了该库，但
docker-machine - 如何将现有的 Docker 服务器导入到 Docker Machine？
我试过 docker-machine create -d none --url tcp://:2376 remote并复制 {ca,key,cert}.pem (客户端证书)到机器目录。然后我做了 e
LLVM 代码生成器 : is Machine code representation machine-agnostic?
请注意 : 这个问题不是关于 LLVM IR , 但 LLVM 的 MIR ，一种低于前一种的内部中间表示。本文档关于 LLVM Machine code description classes ，
turing-machines - 有没有解决 "Construct a Turing machine ..."问题的简单方法？
我理解图灵机的逻辑。当给出图灵机时，我可以理解它是如何工作的以及它是如何停止的。但是当它被要求构造图灵机，难度更大。有什么简单的方法可以找到问题的答案，例如: Construct a Turing
math - "finite state machine"和 "state machine"之间有区别吗？
我不确定我是否理解有限状态机和状态机之间是否有区别？我是不是想得太难了？最佳答案 I'm not sure I understand if there is a difference between
docker-machine - 无法成功创建 docker 机器 : Error creating machine
我遵循 docker 入门教程并到达第 4 部分，您需要使用 virtualbox ( https://docs.docker.com/get-started/part4/#create-a-clus
virtual-machine - 如何在 Virtual Machine Manager 中启用 QEMU-Monitor 控制台？
我使用 Virtual Machine Manager 通过 QEMU-KVM 运行多个客户操作系统。我在某处读到，通过输入 ctrl+alt+2 应该会弹出监视器控制台。它不工作或禁用。有什么办法可
c - LNK1112 : module machine type 'IA64' conflicts with target machine type 'X86'
当我尝试在项目中包含 libc.lib 时，会出现此错误，即使我的 Windows 是 32 位，也会出现此错误。不知道我是否必须从某个地方下载它或什么。最佳答案您正在尝试链接为 IA64 架构编
machine-learning - 短语 "a machine learning algorithm learn a probability distribution"是什么意思？这里究竟发生了什么
生成模型和判别模型似乎可以学习条件 P(x|y) 和联合 P(x,y) 概率分布。但从根本上讲，我无法说服自己“学习概率分布”意味着什么。最佳答案这意味着您的模型要么充当训练样本的分布估计器，要么
opencv - 'LNK1112 : module machine type 'x64' conflicts with target machine type 'X86'
我正在使用 visual studio 2012.我得到了错误 LNK1112: module machine type 'x64' conflicts with target machine typ
macos - 如何修复 "error in run: Failed to get machine "boot2docker-vm": machine does not exist"?
使用 start|info|stop|delete 参数运行 boot2docker导致错误消息: snowch$ boot2docker start error in run: Failed to
azure - Vagrant-Azure : Guest machine can't connect to host machine (Unable to copy SMB files)
到目前为止，我一直只在本地使用 Vagrant，现在我想使用 Azure 作为提供程序来创建 VM，但不幸的是，我遇到了错误，可以在通过链接访问的图像上看到该错误。我明白它说的是什么，但我完全不知道如
c++ - 错误 LNK1112 : module machine type 'x64' conflicts with target machine type 'X86'
这个问题在这里已经有了答案: 关闭 10 年前。 Possible Duplicate: linking problem: fatal error LNK1112: module machine t
Node.js DGRAM 模块 : Cannot send UDP message to remote machine but can to local machine
我正在使用 Nodejs 的 dgram 模块运行一个简单的 UDP 服务器。相关代码很简单: server = dgram.createSocket('udp4'); serve
wix - 错误 LGHT0204 : ICE57: Component has both per-user and per-machine data with a per-machine KeyPath
嗨，我收到以下错误，导致构建失败，但在 bin 中创建了 Wix 安装程序 MSI。我怎样才能避免这些错误或抑制？错误 LGHT0204:ICE57:组件 'cmp52CD5A4CB5D668097

行者123

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

machine-learning - ANN 回归、线性函数逼近