gpt4 book ai didi

c++ - 自定义类 vector 的类函数的段错误

转载 作者:太空宇宙 更新时间:2023-11-04 12:50:17 25 4
gpt4 key购买 nike

我正在使用以下代码在鸢尾花数据集上运行 kmeans 算法- https://github.com/marcoscastro/kmeans/blob/master/kmeans.cpp

我修改了上面的代码以从文件中读取输入。下面是我的代码-

#include <iostream>
#include <vector>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include <algorithm>
#include <fstream>
using namespace std;

class Point
{
private:
int id_point, id_cluster;
vector<double> values;
int total_values;
string name;

public:
Point(int id_point, vector<double>& values, string name = "")
{
this->id_point = id_point;
total_values = values.size();

for(int i = 0; i < total_values; i++)
this->values.push_back(values[i]);

this->name = name;
this->id_cluster = -1;
}

int getID()
{
return id_point;
}

void setCluster(int id_cluster)
{
this->id_cluster = id_cluster;
}

int getCluster()
{
return id_cluster;
}

double getValue(int index)
{
return values[index];
}

int getTotalValues()
{
return total_values;
}

void addValue(double value)
{
values.push_back(value);
}

string getName()
{
return name;
}
};

class Cluster
{
private:
int id_cluster;
vector<double> central_values;
vector<Point> points;

public:
Cluster(int id_cluster, Point point)
{
this->id_cluster = id_cluster;

int total_values = point.getTotalValues();

for(int i = 0; i < total_values; i++)
central_values.push_back(point.getValue(i));

points.push_back(point);
}

void addPoint(Point point)
{
points.push_back(point);
}

bool removePoint(int id_point)
{
int total_points = points.size();

for(int i = 0; i < total_points; i++)
{
if(points[i].getID() == id_point)
{
points.erase(points.begin() + i);
return true;
}
}
return false;
}

double getCentralValue(int index)
{
return central_values[index];
}

void setCentralValue(int index, double value)
{
central_values[index] = value;
}

Point getPoint(int index)
{
return points[index];
}

int getTotalPoints()
{
return points.size();
}

int getID()
{
return id_cluster;
}
};

class KMeans
{
private:
int K; // number of clusters
int total_values, total_points, max_iterations;
vector<Cluster> clusters;

// return ID of nearest center (uses euclidean distance)
int getIDNearestCenter(Point point)
{
double sum = 0.0, min_dist;
int id_cluster_center = 0;

for(int i = 0; i < total_values; i++)
{
sum += pow(clusters[0].getCentralValue(i) -
point.getValue(i), 2.0);
}

min_dist = sqrt(sum);

for(int i = 1; i < K; i++)
{
double dist;
sum = 0.0;

for(int j = 0; j < total_values; j++)
{
sum += pow(clusters[i].getCentralValue(j) -
point.getValue(j), 2.0);
}

dist = sqrt(sum);

if(dist < min_dist)
{
min_dist = dist;
id_cluster_center = i;
}
}

return id_cluster_center;
}

public:
KMeans(int K, int total_points, int total_values, int max_iterations)
{
this->K = K;
this->total_points = total_points;
this->total_values = total_values;
this->max_iterations = max_iterations;
}

void run(vector<Point> & points)
{
if(K > total_points)
return;

vector<int> prohibited_indexes;
printf("Inside run \n");
// choose K distinct values for the centers of the clusters
printf(" K distinct cluster\n");
for(int i = 0; i < K; i++)
{
while(true)
{
int index_point = rand() % total_points;

if(find(prohibited_indexes.begin(), prohibited_indexes.end(),
index_point) == prohibited_indexes.end())
{
printf("i= %d\n",i);
prohibited_indexes.push_back(index_point);
points[index_point].setCluster(i);
Cluster cluster(i, points[index_point]);
clusters.push_back(cluster);
break;
}
}
}

int iter = 1;
printf(" Each point to nearest cluster\n");
while(true)
{
bool done = true;

// associates each point to the nearest center
for(int i = 0; i < total_points; i++)
{
int id_old_cluster = points[i].getCluster();
int id_nearest_center = getIDNearestCenter(points[i]);

if(id_old_cluster != id_nearest_center)
{
if(id_old_cluster != -1)
clusters[id_old_cluster].removePoint(points[i].getID());

points[i].setCluster(id_nearest_center);
clusters[id_nearest_center].addPoint(points[i]);
done = false;
}
}

// recalculating the center of each cluster
for(int i = 0; i < K; i++)
{
for(int j = 0; j < total_values; j++)
{
int total_points_cluster = clusters[i].getTotalPoints();
double sum = 0.0;

if(total_points_cluster > 0)
{
for(int p = 0; p < total_points_cluster; p++)
sum += clusters[i].getPoint(p).getValue(j);
clusters[i].setCentralValue(j, sum / total_points_cluster);
}
}
}

if(done == true || iter >= max_iterations)
{
cout << "Break in iteration " << iter << "\n\n";
break;
}

iter++;
}

// shows elements of clusters
for(int i = 0; i < K; i++)
{
int total_points_cluster = clusters[i].getTotalPoints();

cout << "Cluster " << clusters[i].getID() + 1 << endl;
for(int j = 0; j < total_points_cluster; j++)
{
cout << "Point " << clusters[i].getPoint(j).getID() + 1 << ": ";
for(int p = 0; p < total_values; p++)
cout << clusters[i].getPoint(j).getValue(p) << " ";

string point_name = clusters[i].getPoint(j).getName();

if(point_name != "")
cout << "- " << point_name;

cout << endl;
}

cout << "Cluster values: ";

for(int j = 0; j < total_values; j++)
cout << clusters[i].getCentralValue(j) << " ";

cout << "\n\n";
}
}
};

int main(int argc, char *argv[])
{
srand(time(NULL));

int total_points, total_values, K, max_iterations, has_name;

ifstream inFile("datafile.txt");

if (!inFile) {
cerr << "Unable to open file datafile.txt";
exit(1); // call system to stop
}

inFile >> total_points >> total_values >> K >> max_iterations >> has_name;
cout << "Details- \n";
vector<Point> points;

string point_name,str;
int i=0;
while(inFile.eof())
{

string temp;
vector<double> values;
for(int j = 0; j < total_values; j++)
{
double value;
inFile >> value;
values.push_back(value);
}
if(has_name)
{
inFile >> point_name;
Point p(i, values, point_name);
points.push_back(p);
i++;
}
else
{
inFile >> temp;
Point p(i, values);
points.push_back(p);
i++;
}

}

inFile.close();

KMeans kmeans(K, total_points, total_values, max_iterations);
kmeans.run(points);

return 0;
}

代码的输出是 -

 Details- 
15043100000Inside run
K distinct cluster i= 0
Segmentation fault

当我在 gdb 中运行它时,显示的错误是 -

Program received signal SIGSEGV, Segmentation fault.
0x0000000000401db6 in Point::setCluster (this=0x540, id_cluster=0)
at kmeans.cpp:41
41 this->id_cluster = id_cluster;

我被困在了这里,因为我找不到这个段错误的原因。

我的数据集文件看起来像 -

150 4 3 10000 1
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
. . .
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
. . .

最佳答案

KMeans::run(vector<Point>&)你打电话points[index_point].setCluster(i);不保证 index_point在范围内。

index_pointint index_point = rand() % total_points; 决定, 和 total_points从输入文件“datafile.txt”中检索,它可以是任何东西。它当然不必匹配 points.size() ,但它应该。确保它确实如此,或者只使用 points.size()相反。

有点跑题了,但是用了 rand() and only using modulo几乎总是错的。如果您使用 C++11 或更新版本,请考虑使用 std::uniform_int_distribution .

关于c++ - 自定义类 vector 的类函数的段错误,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/49392202/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com