c++ - RBM 在代码上与 OpenACC 没有改进-6ren

c++ - RBM 在代码上与 OpenACC 没有改进

转载作者：行者123 更新时间：2023-11-28 05:23:37

RBM算法是开源算法源代码可在此处获得:https://github.com/yusugomori/DeepLearning/tree/master/cpp

我尝试通过不同的方式改进 OpenACC，但顺序代码仍然更好那么你能告诉我应该做什么(部分需要改进)以获得更高的改进

#include <iostream>
#include <math.h>
#include "utils.h"
#include "RBM.h"
using namespace std;
using namespace utils;


RBM::RBM(int size, int n_v, int n_h, double **w, double *hb, double *vb) {
N = size;
n_visible = n_v;
n_hidden = n_h;

#pragma acc enter data copyin ( this)

//#pragma acc enter data copy ( W[0:n_hidden][0:n_visible] )
if(w == NULL) {
W = new double*[n_hidden];
for(int i=0; i<n_hidden; i++) W[i] = new double[n_visible];
double a = 1.0 / n_visible;

for(int i=0; i<n_hidden; i++) {
for(int j=0; j<n_visible; j++) {
W[i][j] = uniform(-a, a);
}
}
} else {
W = w;
}

if(hb == NULL) {
hbias = new double[n_hidden];
for(int i=0; i<n_hidden; i++) hbias[i] = 0;
} else {
hbias = hb;
}

if(vb == NULL) {
vbias = new double[n_visible];
for(int i=0; i<n_visible; i++) vbias[i] = 0;
} else {
vbias = vb;
}
}

RBM::~RBM() {

#pragma acc exit data delete ( W[0:n_hidden][0:n_visible],this )

for(int i=0; i<n_hidden; i++) delete[] W[i];
delete[] W;
delete[] hbias;
delete[] vbias;
}


void RBM::contrastive_divergence(int *input, double lr, int k) {
double *ph_mean = new double[n_hidden];
int *ph_sample = new int[n_hidden];
double *nv_means = new double[n_visible];
int *nv_samples = new int[n_visible];
double *nh_means = new double[n_hidden];
int *nh_samples = new int[n_hidden];

/* CD-k */
sample_h_given_v(input, ph_mean, ph_sample);

for(int step=0; step<k; step++) {
if(step == 0) {
gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);
} else {
gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);
}
}

for(int i=0; i<n_hidden; i++) {
for(int j=0; j<n_visible; j++) {
// W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
}
hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
}

for(int i=0; i<n_visible; i++) {
vbias[i] += lr * (input[i] - nv_samples[i]) / N;
}

delete[] ph_mean;
delete[] ph_sample;
delete[] nv_means;
delete[] nv_samples;
delete[] nh_means;
delete[] nh_samples;
}

void RBM::sample_h_given_v(int *v0_sample, double *mean, int *sample) {
for(int i=0; i<n_hidden; i++) {
mean[i] = propup(v0_sample, W[i], hbias[i]);
sample[i] = binomial(1, mean[i]);
}
}

void RBM::sample_v_given_h(int *h0_sample, double *mean, int *sample) {
for(int i=0; i<n_visible; i++) {
mean[i] = propdown(h0_sample, i, vbias[i]);
sample[i] = binomial(1, mean[i]);
}
}

double RBM::propup(int *v, double *w, double b) {

double pre_sigmoid_activation = 0.0;
#pragma acc enter data present ( this )
#pragma acc data copyin(v[0:n_visible],w[0:n_visible])


#pragma acc parallel
{

#pragma acc loop reduction(+:pre_sigmoid_activation) 
for(int j=0; j<n_visible; j++) {
pre_sigmoid_activation += w[j] * v[j];
}
}

pre_sigmoid_activation += b;
return sigmoid(pre_sigmoid_activation);
}
double RBM::propdown(int *h, int i, double b) {

double pre_sigmoid_activation = 0.0;

#pragma acc enter data present ( this)//,W[0:n_hidden][0:n_visible] )
#pragma acc enter data copyin ( W[0:n_hidden][0:n_visible] )
#pragma acc data copyin(h[0:n_hidden]) 

#pragma acc parallel 

{
#pragma acc loop reduction(+:pre_sigmoid_activation) 
for(int j=0; j<n_hidden; j++) {
pre_sigmoid_activation += W[j][i] * h[j];
}
}

pre_sigmoid_activation += b;

return sigmoid(pre_sigmoid_activation);

}
void RBM::gibbs_hvh(int *h0_sample, double *nv_means, int *nv_samples, \
        double *nh_means, int *nh_samples) {
sample_v_given_h(h0_sample, nv_means, nv_samples);
sample_h_given_v(nv_samples, nh_means, nh_samples);
}

void RBM::reconstruct(int *v, double *reconstructed_v) {
double *h = new double[n_hidden];
double pre_sigmoid_activation;

for(int i=0; i<n_hidden; i++) {
h[i] = propup(v, W[i], hbias[i]);
}

for(int i=0; i<n_visible; i++) {
pre_sigmoid_activation = 0.0;
for(int j=0; j<n_hidden; j++) {
pre_sigmoid_activation += W[j][i] * h[j];
}
pre_sigmoid_activation += vbias[i];

reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
}

delete[] h;

//----------------------------------------------------The main
void test_rbm() {

srand(0);

double learning_rate = 0.1;

int training_epochs = 1000;

int k = 1;



int train_N = 6;

int test_N = 2;

int n_visible = 6;

int n_hidden = 3;



// training data

int train_X[6][6] = {

{1, 1, 1, 0, 0, 0},

{1, 0, 1, 0, 0, 0},

{1, 1, 1, 0, 0, 0},

{0, 0, 1, 1, 1, 0},

{0, 0, 1, 0, 1, 0},

{0, 0, 1, 1, 1, 0}

};





// construct RBM

RBM rbm(train_N, n_visible, n_hidden, NULL, NULL, NULL);



// train

for(int epoch=0; epoch<training_epochs; epoch++) {

for(int i=0; i<train_N; i++) {

rbm.contrastive_divergence(train_X[i], learning_rate, k);

}

}



// test data

int test_X[2][6] = {

{1, 1, 0, 0, 0, 0},

{0, 0, 0, 1, 1, 0}

};

double reconstructed_X[2][6];





// test

for(int i=0; i<test_N; i++) {

rbm.reconstruct(test_X[i], reconstructed_X[i]);

for(int j=0; j<n_visible; j++) {

printf("%.5f ", reconstructed_X[i][j]);

}

cout << endl;

}



}







int main() {

test_rbm();

return 0;

最佳答案

你有一些错误给了你错误的答案。我在下面更正了这些。

至于性能，您没有足够的并行性来胜过按顺序运行代码。您正在并行化的循环具有非常少的计算、使用归约并且非常小。要在主机上看到加速，您需要使用更大的尺寸(数千的长度)并且最好将并行度的组级别推到更高的循环。我试过了，但是二项式例程有一个依赖项(对 rand 的调用)，它阻止了“sample_[vh]_given[_vh]”中循环的并行化。

#include <iostream>
#include <math.h>
#include "utils.h"
#include "RBM.h"
using namespace std;
using namespace utils;

RBM::RBM(int size, int n_v, int n_h, double **w, double *hb, double *vb) {
        N = size;
        n_visible = n_v;
        n_hidden = n_h;

        if(w == NULL) {
                W = new double*[n_hidden];
                for(int i=0; i<n_hidden; i++) W[i] = new double[n_visible];
                double a = 1.0 / n_visible;

                for(int i=0; i<n_hidden; i++) {
                        for(int j=0; j<n_visible; j++) {
                                W[i][j] = uniform(-a, a);
                        }
                }
        } else {
                W = w;
        }

        if(hb == NULL) {
                hbias = new double[n_hidden];
                for(int i=0; i<n_hidden; i++) hbias[i] = 0;
        } else {
                hbias = hb;
        }

        if(vb == NULL) {
                vbias = new double[n_visible];
                for(int i=0; i<n_visible; i++) vbias[i] = 0;
        } else {
                vbias = vb;
        }
#pragma acc enter data copyin (this,W[0:n_hidden][0:n_visible],hbias[0:n_hidden],vbias[0:n_visible])
}

RBM::~RBM() {

#pragma acc exit data delete ( hbias[0:n_hidden],vbias[0:n_visible],W[0:n_hidden][0:n_visible],this )
        for(int i=0; i<n_hidden; i++) delete[] W[i];
        delete[] W;
        delete[] hbias;
        delete[] vbias;
}

void RBM::contrastive_divergence(int *input, double lr, int k) {
        double *ph_mean = new double[n_hidden];
        int *ph_sample = new int[n_hidden];
        double *nv_means = new double[n_visible];
        int *nv_samples = new int[n_visible];
        double *nh_means = new double[n_hidden];
        int *nh_samples = new int[n_hidden];

        /* CD-k */
        sample_h_given_v(input, ph_mean, ph_sample);

        for(int step=0; step<k; step++) {
                if(step == 0) {
                        gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);
                } else {
                        gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);
                }
        }

        for(int i=0; i<n_hidden; i++) {
                for(int j=0; j<n_visible; j++) {
                        // W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
                        W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
                }
                hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
        }

        for(int i=0; i<n_visible; i++) {
                vbias[i] += lr * (input[i] - nv_samples[i]) / N;
        }
#pragma acc update device(vbias[0:n_visible],hbias[0:n_hidden],W[0:n_hidden][0:n_visible])

        delete[] ph_mean;
        delete[] ph_sample;
        delete[] nv_means;
        delete[] nv_samples;
        delete[] nh_means;
        delete[] nh_samples;
}

void RBM::sample_h_given_v(int *v0_sample, double *mean, int *sample) {

#pragma acc data copyin(v0_sample[0:n_visible])
        {
                for(int i=0; i<n_hidden; i++) {
                        mean[i] = propup(v0_sample, W[i], hbias[i]);
                        sample[i] = binomial(1, mean[i]);
                }
        }
}

void RBM::sample_v_given_h(int *h0_sample, double *mean, int *sample) {
#pragma acc data copyin(h0_sample[0:n_visible])
        {
                for(int i=0; i<n_visible; i++) {
                        mean[i] = propdown(h0_sample, i, vbias[i]);
                        sample[i] = binomial(1, mean[i]);
                }
        }
}

double RBM::propup(int *v, double *w, double b) {

        double pre_sigmoid_activation = 0.0;
#pragma acc parallel present(w,v)
        {
#pragma acc loop reduction(+:pre_sigmoid_activation)
                for(int j=0; j<n_visible; j++) {
                        pre_sigmoid_activation += w[j] * v[j];
                }
        }

        pre_sigmoid_activation += b;
        return sigmoid(pre_sigmoid_activation);
}
double RBM::propdown(int *h, int i, double b) {

        double pre_sigmoid_activation = 0.0;

#pragma acc parallel present(W,h)
        {
#pragma acc loop reduction(+:pre_sigmoid_activation)
                for(int j=0; j<n_hidden; j++) {
                        pre_sigmoid_activation += W[j][i] * h[j];
                }
        }

        pre_sigmoid_activation += b;

        return sigmoid(pre_sigmoid_activation);

}
void RBM::gibbs_hvh(int *h0_sample, double *nv_means, int *nv_samples, \
                double *nh_means, int *nh_samples) {
        sample_v_given_h(h0_sample, nv_means, nv_samples);
        sample_h_given_v(nv_samples, nh_means, nh_samples);
}

void RBM::reconstruct(int *v, double *reconstructed_v) {
        double *h = new double[n_hidden];
        double pre_sigmoid_activation;

#pragma acc data copyin(v[0:n_visible])
        {

                for(int i=0; i<n_hidden; i++) {
                        h[i] = propup(v, W[i], hbias[i]);
                }

                for(int i=0; i<n_visible; i++) {
                        pre_sigmoid_activation = 0.0;
                        for(int j=0; j<n_hidden; j++) {
                                pre_sigmoid_activation += W[j][i] * h[j];
                        }
                        pre_sigmoid_activation += vbias[i];

                        reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
                }
        }
        delete[] h;
}

//----------------------------------------------------The main
void test_rbm() {

        srand(0);
        double learning_rate = 0.1;
        int training_epochs = 1000;
        int k = 1;
        int train_N = 6;
        int test_N = 2;
        int n_visible = 6;
        int n_hidden = 3;

        // training data
        int train_X[6][6] = {
                {1, 1, 1, 0, 0, 0},
                {1, 0, 1, 0, 0, 0},
                {1, 1, 1, 0, 0, 0},
                {0, 0, 1, 1, 1, 0},
                {0, 0, 1, 0, 1, 0},
                {0, 0, 1, 1, 1, 0}
        };

        // construct RBM
        RBM rbm(train_N, n_visible, n_hidden, NULL, NULL, NULL);

        // train
        for(int epoch=0; epoch<training_epochs; epoch++) {
                for(int i=0; i<train_N; i++) {
                        rbm.contrastive_divergence(train_X[i], learning_rate, k);
                }
        }

        // test data
        int test_X[2][6] = {
                {1, 1, 0, 0, 0, 0},
                {0, 0, 0, 1, 1, 0}
        };

        double reconstructed_X[2][6];

        // test
        for(int i=0; i<test_N; i++) {
                rbm.reconstruct(test_X[i], reconstructed_X[i]);
                for(int j=0; j<n_visible; j++) {
                        printf("%20.15f ", reconstructed_X[i][j]);
                }
                cout << endl;
        }
}

int main() {
        test_rbm();
        return 0;
}

关于c++ - RBM 在代码上与 OpenACC 没有改进，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/40976149/

文章推荐： css - Django-filer 在管理站点中呈现不正确

文章推荐： html - 只对一张图片应用 css 样式

c - 我尝试理解 [c 代码 -> 汇编] 代码
我尝试理解[c代码 -> 汇编]代码 void node::Check( data & _data1, vector& _data2) { -> push ebp -> mov ebp,esp ->
c# - 在当前表单(代码)的上下文中从字符串动态运行 C# 代码
我需要在当前表单(代码)的上下文中运行文本文件中的代码。其中一项要求是让代码创建新控件并将其添加到当前窗体。例如，在Form1.cs中: using System.Windows.Forms; ..
c# - c++代码(malloc方法)到c#代码
我有此 C++ 代码并将其转换为 C# (.net Framework 4) 代码。有没有人给我一些关于 malloc、free 和 sprintf 方法的提示？ int monate = ee; d
C 代码，简单的 Web 服务器(代码 OK)
我的网络服务器代码有问题 #include #include #include #include #include #include #include int
html - 将特定列表元素置于斜体的 CSS 代码(不更改 html 代码)
给定以下 html 代码，将列表中的第三个元素(即“美丽”一词)以斜体显示的 CSS 代码是什么？当然，我可以给这个元素一个 id 或一个 class，但 html 代码必须保持不变。谢谢
javascript - 是否有一些库可用于 IQR 代码(不是 QR 代码)？
关闭。这个问题不符合Stack Overflow guidelines .它目前不接受答案。我们不允许提问寻求书籍、工具、软件库等的推荐。您可以编辑问题，以便用事实和引用来回答。关闭 7 年前。
macros - 在 Inno Setup [代码] 部分将宏扩展为 Pascal 代码
我试图制作一个宏来避免重复代码和注释。我试过这个: #define GrowOnPage(any Page, any Component) Component.Width := Page.Surfa
c# - 我正在尝试将我的旧 c++ 代码 "translate"转换为 c# 代码
我正在尝试将我的旧 C++ 代码“翻译”成头条新闻所暗示的 C# 代码。问题是我是 C# 中的新手，并不是所有的东西都像 C++ 中那样。在 C++ 中这些解决方案运行良好，但在 C# 中只是不能。我
r - 让 Visual Studio 代码(自动)格式化 R 代码
在 Windows 10 上工作，R 语言的格式化程序似乎没有在 Visual Studio Code 中完成它的工作。我试过R support for Visual Studio Code和 R-T
dynamic - 是否可以在 Python 脚本中生成和执行 Python 代码？ [动态 Python 代码]
我正在处理一些报告(计数)，我必须获取不同参数的计数。非常简单但乏味。一个参数的示例查询: qCountsEmployee = ( "select count(*) from %s wher
ios - 随机和偶然的网络错误(NSURLErrorDomain 代码=-1001 和 NSURLErrorDomain 代码=-1005)
最近几天我尝试从 d00m 调试网络错误。我开始用尽想法/线索，我希望其他 SO 用户拥有可能有用的宝贵经验。我希望能够提供所有相关信息，但我个人无法控制服务器环境。整个事情始于用户注意到我们应用程
javascript - visual studio 代码 intellisense 不适用于 dojo amd 代码
我有一个 app.js 文件，其中包含如下 dojo amd 模式代码: require(["dojo/dom", ..], function(dom){ dom.byId('someId').i
cuda - 'code=sm_X' 是否仅嵌入二进制(cubin)代码，或 PTX 代码，或两者？
我对“-gencode”语句中的“code=sm_X”选项有点困惑。一个例子:NVCC 编译器选项有什么作用 -gencode arch=compute_13,code=sm_13 嵌入库中？只有
javascript - 在 Javascript 下拉列表中添加 HTML 代码，Javascript 不评估 HTML 代码
我为我的表格使用 X-editable 框架。但是我有一些问题。 $(document).ready(function() { $('.access').editable({
python - 在 linux 中运行 flask/python 代码？基本 flask 代码
我一直在通过本教程学习 flask/python http://blog.miguelgrinberg.com/post/the-flask-mega-tutorial-part-i-hello-wo
vim - G 代码 M 代码 VI 和 EMACS 的 CNC 语法
我想将 Vim 和 EMACS 用于 CNC、G 代码和 M 代码。 Vim 或 EMACS 是否有任何语法或模式来处理这种类型的代码？最佳答案一些快速搜索使我找到了 this vim 和 thi
iphone - 寻找关于将 Pre-Storyboard 代码 (XCode4) 移动到 Storyboard 代码 (XCode5) 的教程
关闭。这个问题不符合Stack Overflow guidelines .它目前不接受答案。想改进这个问题？更新问题，使其成为 on-topic对于堆栈溢出。 7年前关闭。 Improve this
vim - 如何让 Vim 理解 *.md 文件包含 Markdown 代码，而不是 Modula-2 代码？
这个问题在这里已经有了答案: Enabling markdown highlighting in Vim (5 个回答) 6年前关闭。当我在 Vim 中编辑包含 Markdown 代码的 READM
ios - 错误域=AVFoundationErrorDomain 代码=-11800 "The operation could not be completed"{错误域=NSOSStatusErrorDomain 代码=-16976 "(null)"}
我正在 Swift3 iOS 中开发视频应用程序。基本上我必须将视频 Assets 和音频与淡入淡出效果合并为一个并将其保存到 iPhone 画廊。为此，我使用以下方法: private func d
jenkins - 无法通过 Jenkins 管道作业的 jenkinsfile 中的 Groovy 代码(或 java 代码)创建文件
pipeline { agent any stages { stage('Build') { steps { e

行者123

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

c++ - RBM 在代码上与 OpenACC 没有改进