gpt4 book ai didi

r - 如何加速R循环

转载 作者:行者123 更新时间:2023-12-04 10:51:46 26 4
gpt4 key购买 nike

我有一些数据看起来像

dfr <- data.frame(pos=1:20,val=sample(90:120,20))

pos val
1 1 116
2 2 97
3 3 100
4 4 105
5 5 112
6 6 95
7 7 91
8 8 117
9 9 98
10 10 94
11 11 110
12 12 118
13 13 120
14 14 115
15 15 103
16 16 102
17 17 109
18 18 90
19 19 93
20 20 107

我需要在 pos 的窗口大小上计算 val 的中值。我有以下功能:

#' @param dfr A data.frame with columns pos and val
#' @param win An integer denoting window size
#'
fn_median <- function(dfr,win=5)
{
n <- nrow(dfr)
vec_start <- vector(length=floor(n/win),mode="numeric")
vec_end <- vector(length=floor(n/win),mode="numeric")
vec_median <- vector(length=floor(n/win),mode="numeric")
k <- 1
i <- 1
while(i<=n)
{
vec_start[k] <- dfr$pos[i]
vec_end[k] <- dfr$pos[i+(win-1)]
vec_median[k] <- median(dfr$val[i:(i+(win-1))])
k <- k+1
i <- i+win
}

return(data.frame(start=vec_start,end=vec_end,median=vec_median))
}

返回

> fn_median(dfr,5)
start end median
1 1 5 105
2 6 10 95
3 11 15 115
4 16 20 102

基准测试

library(microbenchmark)
library(ggplot2)

autoplot(microbenchmark("loop"=fn_median(dfr,5),times=1000))

enter image description here

这段代码太慢了。我怎样才能改进它以使其更快?也许使用 apply 函数族?

最佳答案

您可以使用 data.table 并按 pos - 1 的整数除法除以 5(或其他一些 n).

library(data.table)
fn_median <- function(df, n){
setDT(df)
df[, .(start = pos[1], end = last(pos), median = median(val))
, by = .(drop = (pos - 1) %/% n)][, -'drop']
}

fn_median(dfr, 5)

# start end median
# 1: 1 5 105
# 2: 6 10 95
# 3: 11 15 115
# 4: 16 20 102

编辑:基准

library(microbenchmark)
dfr <- data.frame(pos = seq_len(1e4), val = sample(1e4))
microbenchmark(fn_median(dfr, 5), fn_median2(dfr, 5), times = 10)
# Unit: milliseconds
# expr min lq mean median uq max neval
# fn_median(dfr, 5) 113.324354 131.217695 147.213517 139.283545 167.387556 188.76767 10
# fn_median2(dfr, 5) 2.896002 3.026053 4.554341 3.448822 3.687797 15.40021 10

dfr <- data.frame(pos = seq_len(1e6), val = sample(1e6))
microbenchmark(fn_median(dfr, 5), fn_median2(dfr, 5), times = 5)
# Unit: milliseconds
# expr min lq mean median uq max neval
# fn_median(dfr, 5) 13295.8565 13710.4458 13729.029 13734.9328 13876.7450 14027.1664 5
# fn_median2(dfr, 5) 97.7186 103.9742 120.471 119.3268 121.1799 160.1556 5

使用的函数:

library(data.table)
fn_median2 <- function(df, n){
setDT(df)
df[, .(start = pos[1], end = last(pos), median = median(val))
, by = .(drop = (pos - 1) %/% n)][, -'drop']
}



fn_median <- function(dfr,win=5)
{
n <- nrow(dfr)
vec_start <- vector(length=floor(n/win),mode="numeric")
vec_end <- vector(length=floor(n/win),mode="numeric")
vec_median <- vector(length=floor(n/win),mode="numeric")
k <- 1
i <- 1
while(i<=n)
{
vec_start[k] <- dfr$pos[i]
vec_end[k] <- dfr$pos[i+(win-1)]
vec_median[k] <- median(dfr$val[i:(i+(win-1))])
k <- k+1
i <- i+win
}

return(data.frame(start=vec_start,end=vec_end,median=vec_median))
}

关于r - 如何加速R循环,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/54710061/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com