gpt4 book ai didi

R:总结相邻的矩阵元素。如何加速?

转载 作者:行者123 更新时间:2023-12-04 22:57:47 26 4
gpt4 key购买 nike

我正在处理大约 2500x2500x50 (lonxlatxtime) 的大型矩阵。矩阵只包含 1 和 0。我需要知道每个时间步长 24 个周围元素的总和。到目前为止,我是这样做的:

xdim <- 2500
ydim <- 2500
tdim <- 50
a <- array(0:1,dim=c(xdim,ydim,tdim))
res <- array(0:1,dim=c(xdim,ydim,tdim))

for (t in 1:tdim){
for (x in 3:(xdim-2)){
for (y in 3:(ydim-2)){
res[x,y,t] <- sum(a[(x-2):(x+2),(y-2):(y+2),t])
}
}
}

这有效,但对于我的需要来说太慢了。有没有人请教如何加快速度?

最佳答案

介绍

我不得不说,在阵列的设置背后有很多隐藏的东西。不过,剩下的问题是微不足道的。因此,有两种方法可以真正做到这一点:

  • @Alex 给出的蛮力(用 C++ 编写)
  • 观察复制模式

  • 使用 OpenMP 进行暴力破解

    如果我们想“蛮力”它,那么我们可以使用@Alex 给出的建议来使用 OpenMP与 Armadillo
    #include <RcppArmadillo.h>

    // [[Rcpp::depends(RcppArmadillo)]]

    // Add a flag to enable OpenMP at compile time
    // [[Rcpp::plugins(openmp)]]

    // Protect against compilers without OpenMP
    #ifdef _OPENMP
    #include <omp.h>
    #endif

    // [[Rcpp::export]]
    arma::cube cube_parallel(arma::cube a, arma::cube res, int cores = 1) {

    // Extract the different dimensions
    unsigned int tdim = res.n_slices;

    unsigned int xdim = res.n_rows;

    unsigned int ydim = res.n_cols;

    // Same calculation loop
    #pragma omp parallel for num_threads(cores)
    for (unsigned int t = 0; t < tdim; t++){
    // pop the T
    arma::mat temp_mat = a.slice(t);

    // Subset the rows
    for (unsigned int x = 2; x < xdim-2; x++){

    arma::mat temp_row_sub = temp_mat.rows(x-2, x+2);

    // Iterate over the columns with unit accumulative sum
    for (unsigned int y = 2; y < ydim-2; y++){
    res(x,y,t) = accu(temp_row_sub.cols(y-2,y+2));
    }
    }
    }

    return res;
    }

    复制模式

    然而,更聪明的方法是了解 array(0:1, dims)正在 build 中。

    最为显着地:
  • 案例1:如果xdim是偶数,则只有矩阵的行交替。
  • 情况 2:如果 xdim是奇数和 ydim是奇数,然后行交替以及矩阵交替。
  • 案例 3:如果 xdim是奇数和 ydim是偶数,则只有行交替

  • 例子

    让我们看看实际案例以观察模式。

    案例1:
    xdim <- 2
    ydim <- 3
    tdim <- 2
    a <- array(0:1,dim=c(xdim,ydim,tdim))

    输出 :
    , , 1

    [,1] [,2] [,3]
    [1,] 0 0 0
    [2,] 1 1 1

    , , 2

    [,1] [,2] [,3]
    [1,] 0 0 0
    [2,] 1 1 1

    案例2:
    xdim <- 3
    ydim <- 3
    tdim <- 3
    a <- array(0:1,dim=c(xdim,ydim,tdim))

    输出:
    , , 1

    [,1] [,2] [,3]
    [1,] 0 1 0
    [2,] 1 0 1
    [3,] 0 1 0

    , , 2

    [,1] [,2] [,3]
    [1,] 1 0 1
    [2,] 0 1 0
    [3,] 1 0 1

    , , 3

    [,1] [,2] [,3]
    [1,] 0 1 0
    [2,] 1 0 1
    [3,] 0 1 0

    案例3:
    xdim <- 3
    ydim <- 4
    tdim <- 2
    a <- array(0:1,dim=c(xdim,ydim,tdim))

    输出:
    , , 1

    [,1] [,2] [,3] [,4]
    [1,] 0 1 0 1
    [2,] 1 0 1 0
    [3,] 0 1 0 1

    , , 2

    [,1] [,2] [,3] [,4]
    [1,] 0 1 0 1
    [2,] 1 0 1 0
    [3,] 0 1 0 1

    模式黑客

    好的,基于上述讨论,我们选择编写一些代码来利用这种独特的模式。

    创建交替向量

    在这种情况下,交替矢量在两个不同的值之间切换。
    #include <RcppArmadillo.h>
    // [[Rcpp::depends(RcppArmadillo)]]

    // ------- Make Alternating Vectors

    arma::vec odd_vec(unsigned int xdim){

    // make a temporary vector to create alternating 0-1 effect by row.
    arma::vec temp_vec(xdim);

    // Alternating vector (anyone have a better solution? )
    for (unsigned int i = 0; i < xdim; i++) {
    temp_vec(i) = (i % 2 ? 0 : 1);
    }

    return temp_vec;
    }

    arma::vec even_vec(unsigned int xdim){

    // make a temporary vector to create alternating 0-1 effect by row.
    arma::vec temp_vec(xdim);

    // Alternating vector (anyone have a better solution? )
    for (unsigned int i = 0; i < xdim; i++) {
    temp_vec(i) = (i % 2 ? 1 : 0); // changed
    }

    return temp_vec;
    }

    创建矩阵的三种情况

    如上所述,矩阵存在三种情况。偶数、第一奇数和第二奇数情况。
    // --- Handle the different cases 

    // [[Rcpp::export]]
    arma::mat make_even_matrix(unsigned int xdim, unsigned int ydim){

    arma::mat temp_mat(xdim,ydim);

    temp_mat.each_col() = even_vec(xdim);

    return temp_mat;
    }

    // xdim is odd and ydim is even
    // [[Rcpp::export]]
    arma::mat make_odd_matrix_case1(unsigned int xdim, unsigned int ydim){

    arma::mat temp_mat(xdim,ydim);

    arma::vec e_vec = even_vec(xdim);
    arma::vec o_vec = odd_vec(xdim);

    // Alternating column
    for (unsigned int i = 0; i < ydim; i++) {
    temp_mat.col(i) = (i % 2 ? o_vec : e_vec);
    }

    return temp_mat;
    }

    // xdim is odd and ydim is odd
    // [[Rcpp::export]]
    arma::mat make_odd_matrix_case2(unsigned int xdim, unsigned int ydim){

    arma::mat temp_mat(xdim,ydim);

    arma::vec e_vec = even_vec(xdim);
    arma::vec o_vec = odd_vec(xdim);

    // Alternating column
    for (unsigned int i = 0; i < ydim; i++) {
    temp_mat.col(i) = (i % 2 ? e_vec : o_vec); // slight change
    }

    return temp_mat;
    }

    计算引擎

    与之前的解决方案相同,只是没有 t因为我们不再需要重复计算。
    // --- Calculation engine

    // [[Rcpp::export]]
    arma::mat calc_matrix(arma::mat temp_mat){

    unsigned int xdim = temp_mat.n_rows;

    unsigned int ydim = temp_mat.n_cols;

    arma::mat res = temp_mat;

    // Subset the rows
    for (unsigned int x = 2; x < xdim-2; x++){

    arma::mat temp_row_sub = temp_mat.rows(x-2, x+2);

    // Iterate over the columns with unit accumulative sum
    for (unsigned int y = 2; y < ydim-2; y++){
    res(x,y) = accu(temp_row_sub.cols(y-2,y+2));
    }
    }

    return res;
    }

    调用主函数

    这是将所有内容拼凑在一起的核心功能。这为我们提供了所需的距离数组。
    // --- Main Engine

    // Create the desired cube information
    // [[Rcpp::export]]
    arma::cube dim_to_cube(unsigned int xdim = 4, unsigned int ydim = 4, unsigned int tdim = 3) {

    // Initialize values in A
    arma::cube res(xdim,ydim,tdim);

    if(xdim % 2 == 0){
    res.each_slice() = calc_matrix(make_even_matrix(xdim, ydim));
    }else{

    if(ydim % 2 == 0){

    res.each_slice() = calc_matrix(make_odd_matrix_case1(xdim, ydim));

    }else{

    arma::mat first_odd_mat = calc_matrix(make_odd_matrix_case1(xdim, ydim));

    arma::mat sec_odd_mat = calc_matrix(make_odd_matrix_case2(xdim, ydim));

    for(unsigned int t = 0; t < tdim; t++){
    res.slice(t) = (t % 2 ? sec_odd_mat : first_odd_mat);
    }

    }

    }

    return res;
    }

    定时

    现在,真正的事实是它的表现如何:
    Unit: microseconds
    expr min lq mean median uq max neval
    r_1core 3538.022 3825.8105 4301.84107 3957.3765 4043.0085 16856.865 100
    alex_1core 2790.515 2984.7180 3461.11021 3076.9265 3189.7890 15371.406 100
    cpp_1core 174.508 180.7190 197.29728 194.1480 204.8875 338.510 100
    cpp_2core 111.960 116.0040 126.34508 122.7375 136.2285 162.279 100
    cpp_3core 81.619 88.4485 104.54602 94.8735 108.5515 204.979 100
    cpp_cache 40.637 44.3440 55.08915 52.1030 60.2290 302.306 100

    用于计时的脚本:
    cpp_parallel = cube_parallel(a,res, 1)
    alex_1core = alex(a,res,xdim,ydim,tdim)
    cpp_cache = dim_to_cube(xdim,ydim,tdim)
    op_answer = cube_r(a,res,xdim,ydim,tdim)

    all.equal(cpp_parallel, op_answer)
    all.equal(cpp_cache, op_answer)
    all.equal(alex_1core, op_answer)

    xdim <- 20
    ydim <- 20
    tdim <- 5
    a <- array(0:1,dim=c(xdim,ydim,tdim))
    res <- array(0:1,dim=c(xdim,ydim,tdim))


    ga = microbenchmark::microbenchmark(r_1core = cube_r(a,res,xdim,ydim,tdim),
    alex_1core = alex(a,res,xdim,ydim,tdim),
    cpp_1core = cube_parallel(a,res, 1),
    cpp_2core = cube_parallel(a,res, 2),
    cpp_3core = cube_parallel(a,res, 3),
    cpp_cache = dim_to_cube(xdim,ydim,tdim))

    关于R:总结相邻的矩阵元素。如何加速?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/37684181/

    26 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com