gpt4 book ai didi

c++ - 此 OpenMP 代码使 Linux 崩溃

转载 作者:太空狗 更新时间:2023-10-29 12:01:41 26 4
gpt4 key购买 nike

我正在编写一些代码,使用 openmp 对矩阵进行行缩减。我有两个版本,都让我的 Ubuntu 和 Fedora 安装崩溃。硬是指我的鼠标和键盘没有响应,即使我按下 PC 塔上的重置按钮,它也不会重新启动。我必须按住电源按钮。奇怪的是代码在运行几分钟后就崩溃了。它不会消耗大量内存(我认为 750 MB 很小,因为我有 16 GB 的内存)。

#include <iostream>
#include <cstddef>
#include <cstring>
#include <iomanip>
#include <cstdlib>
#include <ctime>
#include <cmath>

using namespace std;

class Matrix
{
public:

Matrix(size_t rows, size_t cols):
data(0), w(rows), h(cols)
{
data = new double[w * h];
memset(data, 0, sizeof(double) * w * h);
}

~Matrix()
{
if(data)
{
delete[] data;
w = h = 0;
data = 0;
}
}

double* operator[](size_t row)
{
return data + row * w;
}

const double* operator[](size_t row) const
{
return data + row * w;
}

size_t width() const
{
return w;
}

size_t height() const
{
return h;
}

void scale_row(size_t row, double x)
{
double* prow = (*this)[row];

for(size_t i = 0; i < w; i++)
prow[i] *= x;
}

void add_row(size_t dest_row, size_t source_row, double scaling = 1.0)
{
if(dest_row == source_row)
{
scale_row(dest_row, 1.0 + scaling);
return;
}

double* __restrict__ drow = (*this)[dest_row];
double* __restrict__ srow = (*this)[source_row];

for(size_t i = 0; i < w; i++)
drow[i] += srow[i] * scaling;
}

void swap_rows(size_t r1, size_t r2)
{
if(r1 == r2)
return;

double* __restrict__ a = (*this)[r1];
double* __restrict__ b = (*this)[r2];

#pragma omp parallel for simd
for(size_t i = 0; i < w; i++)
{
double tmp = a[i];
a[i] = b[i];
b[i] = tmp;
}
}

double* find_leading(size_t row)
{
double* ptr = (*this)[row];
for(size_t i = 0; i < w; i++)
if(ptr[i])
return ptr + i;
return 0;
}

void clamp_zeros(double threshold = 1e-12)
{
#pragma omp parallel for simd
for(size_t i = 0; i < w * h; i++)
{
if(fabs(data[i]) < threshold)
data[i] = 0;
}
}

void row_reduce(Matrix* mirror = 0)
{
for(size_t r1 = 0; r1 < h; r1++)
{
double* lead = find_leading(r1);
if(!lead)
continue;

size_t rank = lead - (*this)[r1];
if(mirror)
mirror->scale_row(r1, 1.0 / *lead);
scale_row(r1, 1.0 / *lead);

#pragma omp parallel for
for(size_t r2 = 0; r2 < h; r2++)
{
if(r2 == r1 || (*this)[r2][rank] == 0)
continue;
if(mirror)
mirror->add_row(r2, r1, -(*this)[r2][rank]);
add_row(r2, r1, -(*this)[r2][rank]);
}
clamp_zeros();
}

size_t zero_count = 0;
for(size_t r = 0; r < h; r++)
{
double* lead = find_leading(r);
if(lead)
{
size_t rank = lead - (*this)[r];
swap_rows(rank, r);
if(mirror)
mirror->swap_rows(rank, r);
}
else
{
size_t with = h - ++zero_count;
swap_rows(r, with);
if(mirror)
mirror->swap_rows(r, with);
}
}
}

private:

double* data;
size_t w, h;
};

ostream& operator<<(ostream& o, const Matrix& m)
{
o << setprecision(2);
for(size_t j = 0; j < m.width(); j++)
{
o << "----------";
}
o << "--\n";
for(size_t i = 0; i < m.height(); i++)
{
o << "|";
for(size_t j = 0; j < m.width(); j++)
{
o << setw(10) << m[i][j];
}
o << "|\n";
}
for(size_t j = 0; j < m.width(); j++)
{
o << "----------";
}
o << "--";
return o;
}

int main()
{
srand(time(0));
Matrix m (10000, 10000);

for(int i = 0; i < m.height(); i++)
{
for(int j = 0; j < m.width(); j++)
{
m[i][j] = rand() % 100;
}
}

time_t start = time(0);
m.row_reduce();
time_t end = time(0);
cout << m[0][2] << endl;
cout << "dt = " << (end - start) << endl;
return 0;
}

我还尝试了另一种愚蠢的简单 omp 程序,看看它是否会使我的系统崩溃,而这个不会。

double sum = 0.0;

double start = omp_get_wtime();
#pragma omp parallel for reduction(+:sum)
for(long long i = 1; i < 100000000000000LL; i++)
{
sum += 1.0 / ((double)i * i);
}
printf("%lf %lf\n", omp_get_wtime() - start, sum);

我尝试了第一个,当我在用 gcc 4.9 编译的 Ubuntu 15.04 和用 gcc 5.1 编译的 Fedora 22 上运行时遇到了同样的问题。

当我在没有 openmp 的情况下运行它时,它工作正常。此外,如果我尝试较小的数据,如 2000x2000 矩阵,它工作正常(当我尝试 10,000x10,000 矩阵时发生崩溃)。

似乎在我的笔记本电脑上运行良好,它也运行 ubuntu 15.04。

最佳答案

我对代码进行了一些更改以支持与 OpenMP 2.0 的兼容性,我可以告诉您,您的代码运行良好(Windows 7、Visual Studio 2008)。内存消耗约 800MB。

输出:

0

dt = 2881

这是您修改后的代码。

////////////////////////////////////////////////////////////////
// OpenMP test function
#include <iostream>
#include <cstddef>
#include <cstring>
#include <iomanip>
#include <cstdlib>
#include <ctime>
#include <cmath>
#include <omp.h>

using namespace std;

class Matrix
{
public:

Matrix(size_t rows, size_t cols):
data(0), w(rows), h(cols)
{
data = new double[w * h];
memset(data, 0, sizeof(double) * w * h);
}

~Matrix()
{
if(data)
{
delete[] data;
w = h = 0;
data = 0;
}
}

double* operator[](size_t row)
{
return data + row * w;
}

const double* operator[](size_t row) const
{
return data + row * w;
}

size_t width() const
{
return w;
}

size_t height() const
{
return h;
}

void scale_row(size_t row, double x)
{
double* prow = (*this)[row];

for(size_t i = 0; i < w; i++)
prow[i] *= x;
}

void add_row(size_t dest_row, size_t source_row, double scaling = 1.0)
{
if(dest_row == source_row)
{
scale_row(dest_row, 1.0 + scaling);
return;
}

double* drow = (*this)[dest_row];
double* srow = (*this)[source_row];

for(size_t i = 0; i < w; i++)
drow[i] += srow[i] * scaling;
}

void swap_rows(size_t r1, size_t r2)
{
if(r1 == r2)
return;

double* a = (*this)[r1];
double* b = (*this)[r2];

#pragma omp parallel for schedule(dynamic)
for(int i = 0; i < w; i++)
{
double tmp = a[i];
a[i] = b[i];
b[i] = tmp;
}
}

double* find_leading(size_t row)
{
double* ptr = (*this)[row];
for(int i = 0; i < w; i++)
if(ptr[i])
return ptr + i;
return 0;
}

void clamp_zeros(double threshold = 1e-12)
{
#pragma omp parallel for schedule(dynamic)
for(int i = 0; i < w * h; i++)
{
if(fabs(data[i]) < threshold)
data[i] = 0;
}
}

void row_reduce(Matrix* mirror = 0)
{
for(size_t r1 = 0; r1 < h; r1++)
{
double* lead = find_leading(r1);
if(!lead)
continue;

size_t rank = lead - (*this)[r1];
if(mirror)
mirror->scale_row(r1, 1.0 / *lead);
scale_row(r1, 1.0 / *lead);

#pragma omp parallel for schedule(dynamic)
for(int r2 = 0; r2 < h; r2++)
{
if(r2 == r1 || (*this)[r2][rank] == 0)
continue;
if(mirror)
mirror->add_row(r2, r1, -(*this)[r2][rank]);
add_row(r2, r1, -(*this)[r2][rank]);
}
clamp_zeros();
}

size_t zero_count = 0;
for(size_t r = 0; r < h; r++)
{
double* lead = find_leading(r);
if(lead)
{
size_t rank = lead - (*this)[r];
swap_rows(rank, r);
if(mirror)
mirror->swap_rows(rank, r);
}
else
{
size_t with = h - ++zero_count;
swap_rows(r, with);
if(mirror)
mirror->swap_rows(r, with);
}
}
}

private:

double* data;
size_t w, h;
};

ostream& operator<<(ostream& o, const Matrix& m)
{
o << setprecision(2);
for(size_t j = 0; j < m.width(); j++)
{
o << "----------";
}
o << "--\n";
for(size_t i = 0; i < m.height(); i++)
{
o << "|";
for(size_t j = 0; j < m.width(); j++)
{
o << setw(10) << m[i][j];
}
o << "|\n";
}
for(size_t j = 0; j < m.width(); j++)
{
o << "----------";
}
o << "--";
return o;
}

int main()
{
int iMaxThreads = omp_get_max_threads();
omp_set_num_threads(iMaxThreads);

omp_set_dynamic(false);
omp_set_nested(true);

srand(time(0));
Matrix m (10000, 10000);

for(int i = 0; i < m.height(); i++)
{
for(int j = 0; j < m.width(); j++)
{
m[i][j] = rand() % 100;
}
}

time_t start = time(0);
m.row_reduce();
time_t end = time(0);
cout << m[0][2] << endl;
cout << "dt = " << (end - start) << endl;
return 0;
}

关于c++ - 此 OpenMP 代码使 Linux 崩溃,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/33091435/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com