gpt4 book ai didi

c++ - 在 Release 中获取 std::bad_alloc 但在 Debug 中不获取

转载 作者:行者123 更新时间:2023-11-28 04:55:05 25 4
gpt4 key购买 nike

我只是不知道该怎么办...

这些函数在调试中运行良好,但在发布中运行不佳。

我正在尝试学习人工神经网络和 C++ vector 。

这是我用 C++ 编写的代码(在 Python 2.7 中):

http://neuralnetworksanddeeplearning.com/chap1.html#exercise_852508

(只需滚动一点即可到达)

我正在使用来自 MSYS2 (C++11) 的 MinGW 7.2.0。

在反向传播方法中有一些“teste”打印,这就是问题的来源(我猜)。我还重载了运算符 +、- 和 * 以简化操作。

我知道有一些像 Armadillo 这样的库可以让事情变得更容易,但我真的想利用这个问题来更好地学习。

这是文件:

神经网络.h

(我把所有东西都公开了,方便大家看)

#define MIN_NUMBER_TOLERANCE 1e-8

namespace nn
{
class neuralnetwork
{
//private:
public:
//total number of weights. useful to reserve memory
int numWeights;
//total number of biases. useful to reserve memory
int numBiases;
//total number of layers: 1 for input, n hidden layers and 1 for output
int numLayers;
//a vector to store the number of neurons in each layer: 0 index is about the input layer, last index is about the output layer
std::vector<int> sizes;
//stores all biases: num of neurons of layer 1 + ... + num of neurons of layer (numLayers - 1) (input layer has no bias)
std::vector<std::vector<double>> biases;
//stores all weights: (num of neurons of layer 1) x (num of neurons of layer ) + ... + ( num of neurons of layer (numLayers - 1) ) x ( num of neurons of layer (numLayers - 2) ) (input layer has no bias)
std::vector<std::vector<std::vector<double>>> weights;
//stores the output of each neuron of each layer
std::vector<std::vector<double>> layersOutput;

std::vector<std::vector<std::vector<double>>> derivativeWeights;
std::vector<std::vector<double>> derivativeBiases;

std::default_random_engine generator;
std::normal_distribution<double> distribution;

double randomNormalNumber(void);

double costDerivatives(const double&, const double&);

std::vector<double> costDerivatives(const std::vector<double> &, const std::vector<double> &);

void backPropagation(const std::vector<double>& neuralNetworkInputs, const std::vector<double>& expectedOutputs, // inputs
std::vector<std::vector<std::vector<double>>>& derivativeWeights, std::vector<std::vector<double>>& derivativeBiases); // outputs

void update_mini_batch( const std::vector<std::pair<std::vector<double>,std::vector<double>>> & mini_batch, double eta);

//public:

neuralnetwork(const std::vector<int>& sizes);

std::vector<double> feedforward(const std::vector<double>&);
};


std::vector<double> sigmoid(const std::vector<double> &);
double sigmoid(double);
std::vector<double> sigmoid_prime(const std::vector<double> &);
//double sigmoid_prime(double);


}

神经网络.cpp

#include "neuralnetwork.h"
#include <iostream>
#include <assert.h>
#include <algorithm>

namespace nn
{
int counter = 0;

neuralnetwork::neuralnetwork(const std::vector<int> &sizes)
{
this->distribution = std::normal_distribution<double>( 0.0 , 1.0 );

this->numLayers = sizes.size();
this->sizes = sizes;

this->numWeights = 0;
this->numBiases = 0;

for ( int i = 1 ; i < this->numLayers ; i++ )
{
numWeights += this->sizes[ i ] * this->sizes[ i - 1 ];
numBiases += this->sizes[ i ];
}

this->weights.reserve( numWeights );
this->biases.reserve( numBiases );

this->derivativeWeights.reserve( numWeights );
this->derivativeBiases.reserve( numBiases );

this->layersOutput.reserve( this->sizes[ 0 ] + numBiases );

std::vector<double> auxVectorWeights;
std::vector<std::vector<double> > auxMatrixWeights;

std::vector<double> auxVectorBiases;

#ifdef DEBUG_BUILD
std::cout << "debugging!\n";
#endif

//just to accommodate the input layer with null biases and inputs (makes things easier to iterate and reading :D).
this->layersOutput.push_back( std::vector<double>( this->sizes[ 0 ] ) );
std::vector<std::vector<double>> matrixNothing( 0 );
this->weights.push_back( matrixNothing );
this->biases.push_back( std::vector<double>( 0 ) );

//since the second layer (index 1) because there is no weights (nor biases) for the neurons of the first layer
for ( int layer = 1 ; layer < this->numLayers ; layer++ )
{
//preallocate memory for the output of each layer.
layersOutput.push_back( std::vector<double>( this->sizes[ layer ] ) );

//-----------weights begin--------------
//auxMatrixWeights will store the weights connections between one layer (number of columns) and its subsequent layer (number of rows)
//auxMatrixWeights = new std::vector(this->sizes[layer], std::vector<double>( this->sizes[layer - 1] )); // it is not working...

//size[layer] stores the number of neurons on the layer
for ( int i = 0 ; i < this->sizes[ layer ] ; i++ )
{
//auxVectorWeights will have the size of the amount of wights necessary to connect the neuron i (from this layer) to neuron j (from next layer)
auxVectorWeights = std::vector<double>( this->sizes[ layer - 1 ] );


for ( int j = 0 ; j < auxVectorWeights.size() ; j++ )
{
auxVectorWeights[ j ] = this->randomNormalNumber();
}

auxMatrixWeights.push_back( auxVectorWeights );
}

this->weights.push_back( auxMatrixWeights );

auxMatrixWeights.clear();

//-----------weights end----------------


//-----------biases begin---------------
auxVectorBiases = std::vector<double>( this->sizes[ layer ] );

for ( int i = 0 ; i < auxVectorBiases.size() ; i++ )
{
auxVectorBiases[ i ] = this->randomNormalNumber();
}

this->biases.push_back( auxVectorBiases );
//-----------biases end-----------------
}

#ifdef _DEBUG
for ( int i = 0 ; i < this->weights.size() ; i++ )
{
std::cout << "layer " << i << "\n";
for ( int j = 0 ; j < this->weights[ i ].size() ; j++ )
{
std::cout << "neuron" << j << std::endl;
for ( const auto k : this->weights[ i ][ j ] )
{
std::cout << '\t' << k << ' ';
}
std::cout << std::endl;
}
}
#endif
}

template <class T>
inline int lastIndex(std::vector<T> vector , int tail)
{
return (vector.size() - tail);
}

double neuralnetwork::randomNormalNumber(void)
{
return this->distribution( this->generator );
}

double sigmoid(double z)
{
return 1.0 / ( 1.0 + exp( -z ) );
}

std::vector<double> sigmoid(const std::vector<double> & z)
{
int max = z.size();
std::vector<double> output;
output.reserve(max);

for(int i=0;i<max;i++)
{
output.push_back(0);
output[i] = 1.0 / ( 1.0 + exp( -z[i] ) );
}

return output;
}

/*double sigmoid_prime(double z)
{
return sigmoid( z ) * ( 1 - sigmoid( z ) );
}*/

std::vector<double> sigmoid_prime(const std::vector<double>& z)
{
int max = z.size();
std::vector<double> output;
output.reserve(max);

for(int i=0;i<max;i++)
{
output.push_back(sigmoid( z[i] ) * ( 1 - sigmoid( z[i] ) ) );
}

return output;
}

//scalar times vector
std::vector<double> operator* (double a , const std::vector<double> & b)
{
int size = b.size();

std::vector<double> result(size);

for ( int i = 0 ; i < size ; i++ )
{
result[i] = a * b[ i ];
}

return result;
}

// inner product
std::vector<double> operator* (const std::vector<double> & a , const std::vector<double> & b)
{

#ifdef _DEBUG
assert(a.size() == b.size());
#endif

int size = a.size(); // or b.size(). they should have the same size.

std::vector<double> result;
result.reserve(size); // or b.size(). they should have the same size.


for ( int i = 0 ; i < size ; i++ )
{
result.push_back( a[ i ] * b[ i ] );
}

return result;
}

//matrix times columns vector
std::vector<double> operator* (const std::vector<std::vector<double>> & a , const std::vector<double> & b)
{
#ifdef _DEBUG
assert(a[0].size() == b.size());

for(int i = 0 ; i < ( lastIndex( a , 1 )) ; i++)
{
assert(a[i].size() == a[i+1].size());
}
#endif

int lines = a.size();
int columns = a[0].size();

std::vector<double> result;
result.reserve(lines);

int j = 0;

for ( int i = 0 ; i < lines ; i++ )
{
result.push_back(0);
for(j = 0 ; j < columns ; j++)
{
result[i] += a[ i ][ j ] * b[ j ];
}
}

return result;
}

//scalar times matrix (calls scalar times vector)
std::vector<std::vector<double>> operator* (double a , const std::vector<std::vector<double>> & b)
{
#ifdef _DEBUG
for(int i = 0 ; i < b.size()-1 ; i++)
{
assert(b[i].size() == b[i+1].size());
}
#endif

int lines = b.size();
int columns = b[0].size();

std::vector<std::vector<double>> result;

int j = 0;

for ( int i = 0 ; i < lines ; i++ )
{
result.push_back(a * b[ j ]);
}

return result;
}

std::vector<double> operator+(const std::vector<double>& a, const std::vector<double>& b)
{
assert(a.size() == b.size());

int size = a.size();

std::vector<double> result;
result.reserve(size);

for(int i = 0 ; i < size ; i++)
{
result.push_back(0);
result[i] = a[i] + b[i];
}

return result;
}

//sum of matrices
std::vector<std::vector<double>> operator+(const std::vector<std::vector<double>>& a, const std::vector<std::vector<double>>& b)
{
#ifdef _DEBUG
assert(a.size() == b.size());
#endif

int size = a.size();

#ifdef _DEBUG
for(int i = 0 ; i < size ; i++)
{
assert(a[i].size() == b[i].size());
}
#endif

std::vector<std::vector<double>> result;
result.resize(size);

for(int i = 0 ; i < size ; i++)
{
result.push_back(a[i] + b[i]);
}

return result;
}


//subtraction of vectors
std::vector<double> operator-(const std::vector<double>& a, const std::vector<double>& b)
{
#ifdef _DEBUG
assert(a.size() == b.size());
#endif

int size = a.size();

std::vector<double> result;
result.resize(size);

for(int i = 0 ; i < size ; i++)
{
result[i] = a[i] - b[i];
}

return result;
}

//subtraction of matrices (calls subtraction of vectors)
std::vector<std::vector<double>> operator-(const std::vector<std::vector<double>>& a, const std::vector<std::vector<double>>& b)
{
#ifdef _DEBUG
assert(a.size() == b.size());
#endif

int size = a.size();

#ifdef _DEBUG
for(int i = 0 ; i < size ; i++)
{
assert(a[i].size() == b[i].size());
}
#endif

std::vector<std::vector<double>> result;
result.resize(size);

for(int i = 0 ; i < size ; i++)
{
result.push_back(a[i] - b[i]);
}

return result;
}

//elementwise division
std::vector<double> operator/(const std::vector<double>& a, const std::vector<double>& b)
{
assert(a.size() == b.size());

int size = a.size();

std::vector<double> result;
result.reserve(size);

for(int i = 0 ; i < size ; i++)
{
if(b[i] < MIN_NUMBER_TOLERANCE)
{
throw std::runtime_error("Can't divide by zero!");
}
result[i] = a[i] / b[i];
}

return result;
}

double neuralnetwork::costDerivatives(const double &networkOutput , const double &expectedOutput)
{
return expectedOutput - networkOutput;
}

std::vector<double> neuralnetwork::costDerivatives(const std::vector<double> &networkOutput , const std::vector<double> &expectedOutput)
{
assert(expectedOutput.size() == networkOutput.size());
int size = networkOutput.size();
std::vector<double> output;
output.reserve(size);

for(int i = 0 ; i < size ; i++)
{
output.push_back(networkOutput[i] - expectedOutput[i]);
}

return output;
}

void neuralnetwork::backPropagation(const std::vector<double> &neuralNetworkInputs , const std::vector<double> &expectedOutputs, // inputs
std::vector<std::vector<std::vector<double>>>& derivativeWeights , std::vector<std::vector<double>>& derivativeBiases) // outputs
{


std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");



derivativeWeights.reserve( sizes.size() - 1 );
derivativeBiases.reserve( sizes.size() - 1 );

//to store one activation layer
std::vector<double> activation = neuralNetworkInputs;
//to store each one of the activation layers
std::vector<std::vector<double>> activations;

activations.reserve(sizes.size()); // numBiases is the same as the number of neurons (except 1st layer)
activations.push_back(activation);
int maxLayerSize = 0;


std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");




for ( int i = 1 ; i < numBiases ; i++ )
{
maxLayerSize = std::max(sizes[i], maxLayerSize);
}


std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");




// to store one weighted sum
std::vector<double> z;
z.reserve(maxLayerSize);
// to store each one of the weighted sums
std::vector<std::vector<double>> zs;
zs.reserve(sizes.size());

// layer and neuron counter
int layer, neuron;

for ( layer = 1 ; layer < numLayers ; layer++ )
{
z = (weights[layer] * activation) + biases[layer];
zs.push_back(z);
activation = sigmoid(z);
activations.push_back(activation);
}


std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");




std::vector<double> delta = costDerivatives(activations[ lastIndex( activations , 1 )] , expectedOutputs) * sigmoid_prime(z);
delta.reserve(maxLayerSize);

derivativeBiases.push_back(delta);

int j;

std::vector<std::vector<double>> dummyMatrix;
dummyMatrix.reserve(maxLayerSize);


for (neuron = 0; neuron < sizes[ lastIndex( sizes , 1 )]; neuron++)
{
dummyMatrix.push_back(std::vector<double>(activations[ lastIndex( activations , 2 )].size()));
for (j = 0; j < activations[ lastIndex( activations , 2 )].size(); j++)
{
dummyMatrix[neuron][j] = delta[neuron] * activations[ lastIndex( activations , 2 )][j];
}
}


std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");



derivativeWeights.push_back(dummyMatrix);
dummyMatrix.clear();

std::vector<double> sp;
sp.reserve(maxLayerSize);

std::vector<double> dummyVector;
dummyVector.reserve(maxLayerSize);

double dummyDouble = 0;

for(layer = 2 ; layer < numLayers ; layer++)
{
z = zs[ lastIndex( zs , layer )];
sp = sigmoid_prime(z);

for(j = 0 ; j < sizes[ lastIndex( weights , layer )] ; j++)
{
for (neuron = 0; neuron < sizes[ lastIndex( sizes , layer - 1 )]; neuron++)
{
dummyDouble += weights[ lastIndex( weights , layer - 1 )][neuron][j] * delta[neuron];
}
dummyVector.push_back(dummyDouble * sp[j]);
dummyDouble = 0;
}
delta = dummyVector;
dummyVector.clear();

derivativeBiases.push_back(delta);

for (neuron = 0; neuron < sizes[ lastIndex( sizes , layer )]; neuron++)
{
dummyMatrix.push_back(std::vector<double>(sizes[ lastIndex( sizes , layer + 1 )]));
for (j = 0; j < sizes[ lastIndex( sizes , layer + 1 )]; j++)
{
dummyMatrix[neuron][j] = activations[ lastIndex( activations , layer + 1 )][j] * delta[neuron];
}
}
derivativeWeights.push_back(dummyMatrix);
dummyMatrix.clear();
}


std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");




//both derivativeWeights and derivativeBiases are reversed. so let's reverse it.
std::reverse(derivativeWeights.begin(),derivativeWeights.end());
std::reverse(derivativeBiases.begin(),derivativeBiases.end());


std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");




}
}

main.cpp

#include <stdio.h>
#include <opencv2/opencv.hpp>
#include "neuralnetwork.h"
#include <string>

void printAll(const std::vector<double> & v, const std::string & name)
{
int size = v.size();

std::cout << "\t" << name << ":\t";

for(int i = 0 ; i < size ; i++)
{
std::cout << v[i] << "\t";
}

std::cout << std::endl;
}

template<class T>
void printAll(const std::vector<T> & v, const std::string & name)
{
int size = v.size();

std::cout << name << ":" << std::endl;

for(int i = 0 ; i < size ; i++)
{
printAll(v[i], "\t" + ("[" + std::to_string(i)) + "]");
}
}

int main(int argc, char** argv )
{

nn::neuralnetwork n({2,4,3});

n.weights = {{},{{1,2},{3,4},{5,6},{7,8}} , {{9,8,7,6},{5,4,3,2},{1,2,3,4}}};
n.biases = {{},{1, 4, 6, 8} , {9, 2, 4}};

printAll(n.weights,"weights");
printAll(n.biases,"biases");

std::vector<std::vector<std::vector<double>>> derivativeWeights;
std::vector<std::vector<double>> derivativeBiases;
n.backPropagation({1,2},{1,2,3},derivativeWeights,derivativeBiases);

printAll(n.derivativeWeights,"derivativeWeights");
printAll(n.derivativeBiases,"derivativeBiases");

system("PAUSE");

return 0;
}

最佳答案

看起来你的问题是你只是在构造函数中为 vector 保留内存,而不是分配它。

reserve 方法不会调整 vector 的大小,在您知道将来会调整 vector 的大小但优化编译器可以随意忽略它的情况下,它是一种性能优化。

这不会导致此特定代码中的“权重”和“偏差”出现问题,因为您使用适当大小的 vector 初始化它们,这确实将它们设置为正确的大小。问题出在 derivativeWeights 和 derivativeBiases 上,您在其中为 vector 保留内存,但实际上从未调整它们的大小。如果您尝试取消引用它,这会使该内存可能无效。您可以使用 resize 而不是 reserve,或者将元素一个一个地推回,这也会调整 vector 的大小。

另一个评论是,您不必为类的每个成员都使用 this->,如果您不使用它,则假定为类的成员使用“this->”。

关于c++ - 在 Release 中获取 std::bad_alloc 但在 Debug 中不获取,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/47339033/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com