gpt4 book ai didi

R 嵌套 foreach %dopar% 外循环和 %do% 内循环

转载 作者:行者123 更新时间:2023-12-04 11:02:44 38 4
gpt4 key购买 nike

我在 R 中运行以下脚本。
如果我使用 %do% 而不是 %dopar% 脚本工作正常。但是,如果在外循环中我使用 %dopar% 循环将永远运行而不会引发任何错误(内存使用量不断增加,直到内存不足)。
我正在使用 16 核。

library(parallel)
library(foreach)
library(doSNOW)
library(dplyr)


NumberOfCluster <- 16
cl <- makeCluster(NumberOfCluster)
registerDoSNOW(cl)


foreach(i = UNSPSC_list, .packages = c('data.table', 'dplyr'), .verbose = TRUE) %dopar%
{
terms <- as.data.table(unique(gsub(" ", "", unlist(terms_list_by_UNSPSC$Terms[which(substr(terms_list_by_UNSPSC$UNSPSC,1,6) == i)]))))
temp <- inner_join(N_of_UNSPSCs_by_Term, terms, on = 'V1')
temp$V2 <- 1/as.numeric(temp$V2)
temp <- temp[order(temp$V2, decreasing = TRUE),]
names(temp) <- c('Term','Imp')
ABNs <- unique(UNSPSCs_per_ABN[which(substr(UNSPSCs_per_ABN$UNSPSC,1,4) == substr(i,1,4)), 1])

predictions <- as.numeric(vector())
predictions <- foreach (j = seq(1 : nrow(train)), .combine = 'c', .packages = 'dplyr') %do%
{
descr <- names(which(!is.na(train[j,]) == TRUE))
if(unlist(predict_all[j,1]) %in% unlist(ABNs) || !unlist(predict_all[j,1]) %in% unlist(suppliers)) {union_all(predictions, sum(temp$Imp[which(temp$Term %in% descr)]))} else {union_all(predictions, 0)}

}
save(predictions, file = paste("Predictions", i,".rda", sep = "_"))
}

最佳答案

嵌套的正确方法foreach循环正在使用 %:%运算符(operator)。请参阅示例。我已经在 Windows 上测试过了。

library(foreach)
library(doSNOW)

NumberOfCluster <- 4
cl <- makeCluster(NumberOfCluster)
registerDoSNOW(cl)

N <- 1e6

system.time(foreach(i = 1:10, .combine = rbind) %:%
foreach(j = 1:10, .combine = c) %do% mean(rnorm(N, i, j)))

system.time(foreach(i = 1:10, .combine = rbind) %:%
foreach(j = 1:10, .combine = c) %dopar% mean(rnorm(N, i, j)))

输出:
> system.time(foreach(i = 1:10, .combine = rbind) %:%
+ foreach(j = 1:10, .combine = c) %do% mean(rnorm(N, i, j)))
user system elapsed
7.38 0.23 7.64
> system.time(foreach(i = 1:10, .combine = rbind) %:%
+ foreach(j = 1:10, .combine = c) %dopar% mean(rnorm(N, i, j)))
user system elapsed
0.09 0.00 2.14

CPU usage for %do% and %dopar%

使用嵌套循环的方案如下:
foreach(i) %:% foreach(j) {foo(i, j)}

接线员 %:%用于嵌套多个 foreach循环。您不能在嵌套之间进行计算。在您的情况下,您必须执行两个循环,例如:
# Loop over i
x <- foreach(i = 1:10, .combine = c) %dopar% 2 ^ i

# Nested loop over i and j
foreach(i = 1:10, .combine = rbind) %:% foreach(j = 1:10, .combine = c) %dopar% {x[i] + j}

未经测试的代码:
library(data.table)
library(foreach)
library(doSNOW)

NumberOfCluster <- 2
cl <- makeCluster(NumberOfCluster)
registerDoSNOW(cl)

# Create ABNs as list
ABNs <- foreach(i = UNSPSC_list, .packages = c('data.table', 'dplyr'), .verbose = TRUE) %dopar% {
terms <- as.data.table(unique(gsub(" ", "", unlist(terms_list_by_UNSPSC$Terms[which(substr(terms_list_by_UNSPSC$UNSPSC, 1, 6) == i)]))))
temp <- inner_join(N_of_UNSPSCs_by_Term, terms, on = 'V1')
temp$V2 <- 1 / as.numeric(temp$V2)
temp <- temp[order(temp$V2, decreasing = TRUE), ]
names(temp) <- c('Term', 'Imp')
unique(UNSPSCs_per_ABN[which(substr(UNSPSCs_per_ABN$UNSPSC,1,4) == substr(i,1,4)), 1])
}

# Nested loop
predictions <- foreach(i = UNSPSC_list, .packages = c('data.table', 'dplyr'), .verbose = TRUE) %:%
foreach(j = seq(1:nrow(train)), .combine = 'c', .packages = 'dplyr') %dopar% {
descr <- names(which(!is.na(train[j, ]) == TRUE))
if (unlist(predict_all[j, 1]) %in% unlist(ABNs[[i]]) || !unlist(predict_all[j, 1]) %in% unlist(suppliers)) {
sum(temp$Imp[which(temp$Term %in% descr)])
} else 0
}

for (i in seq_along(predictions)) save(predictions[[i]], file = paste("Predictions", i, ".rda", sep = "_"))

关于R 嵌套 foreach %dopar% 外循环和 %do% 内循环,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/48632781/

38 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com