gpt4 book ai didi

r - 将对象传递给函数后,对象大小会增加

转载 作者:行者123 更新时间:2023-12-04 10:27:47 28 4
gpt4 key购买 nike

我正在根据大型数据集绘制一些图。在这段代码中,生成的所需绘图对象的大小非常小,但增加的内存使用量远不止于此。

到目前为止,我的发现是内存使用量的增加似乎是由一些对象引起的。特别是对象的值 tab_ind在图形绘制过程之后不会改变(使用 identical() 函数检查),但它的大小在过程之后显着增加(使用 object.size() 函数检查)。我唯一用 tab_ind 做的事在此过程中,将其作为参数传递给函数。


可重现的例子

模拟的大小可以通过改变N来控制。 .在运行结束时,尺寸变化并检查 tab_ind 的相同性。打印出来。

library(data.table)
library(magrittr)
library(ggplot2)

N <- 6000

set.seed(runif(1, 0, .Machine$integer.max) %>% ceiling)

logit <- function(x) {return(log(x/(1-x)))}
invLogit <- function(x) {return(exp(x)/(1+exp(x)))}

tab_dat <- data.table(datasetID = seq(N), MIX_MIN_SUCCESS = sample(c(0, 1), N, replace = T), MIX_ALL = sample(c(0, 1), N, replace = T))
tab_dat[MIX_MIN_SUCCESS == 0, MIX_ALL := 0]
n <- sample(20:300, N, replace = T)
tab_ind <- data.table(
datasetID = rep(seq(N), times = n),
SIM_ADJ_PP1 = runif(sum(n), 0.00001, 0.99999),
MIX_ADJ_PP1 = runif(sum(n), 0.00001, 0.99999)
)
tab_ind[, c("SIM_ADJ_LOGIT_PP1", "MIX_ADJ_LOGIT_PP1") := list(logit(SIM_ADJ_PP1), logit(MIX_ADJ_PP1))]

checkMem_gc <- function(status) {
print(status)
print(memory.size())
gc()
print(memory.size())
}

## Individual bins for x and y
tab_by_bin_idxy <- function(dt, x, y, xNItv, yNItv, by = "quantile") {
#Binning
if (by == "even") {
checkMem_gc("start x-y breaks")
checkMem_gc("start x breaks")
minN = dt[, min(get(x), na.rm = T)]
checkMem_gc("after x min")
maxN = dt[, max(get(x), na.rm = T)]
checkMem_gc("after x max")
xBreaks = seq(minN, maxN, length.out = xNItv + 1)
checkMem_gc("after seq")
checkMem_gc("after x breaks")
yBreaks = dt[, seq(min(get(y), na.rm = T), max(get(y), na.rm = T), length.out = yNItv + 1)]
checkMem_gc("after y breaks")
} else if (by == "quantile") {
xBreaks = dt[, quantile(get(x), seq(0, 1, length.out = xNItv + 1), names = F)]
yBreaks = dt[, quantile(get(y), seq(0, 1, length.out = yNItv + 1), names = F)]
} else {stop("type of 'by' not support")}
checkMem_gc("after x-y breaks")
xbinCode = dt[, .bincode(get(x), breaks = xBreaks, include.lowest = T)]
checkMem_gc("after x binCode")
xbinMid = sapply(seq(xNItv), function(i) {return(mean(xBreaks[c(i, i+1)]))})[xbinCode]
checkMem_gc("after x binMid")
ybinCode = dt[, .bincode(get(y), breaks = yBreaks, include.lowest = T)]
checkMem_gc("after y binCode")
ybinMid = sapply(seq(yNItv), function(i) {return(mean(yBreaks[c(i, i+1)]))})[ybinCode]
checkMem_gc("after y binMid")
#Creating table
tab_match = CJ(xbinCode = seq(xNItv), ybinCode = seq(yNItv))
checkMem_gc("after tab match")
tab_plot = data.table(xbinCode, xbinMid, ybinCode, ybinMid)[
tab_match, .(xbinMid = xbinMid[1], ybinMid = ybinMid[1], N = .N), keyby = .EACHI, on = c("xbinCode", "ybinCode")
]
checkMem_gc("after tab plot")
colnames(tab_plot)[colnames(tab_plot) == "xbinCode"] = paste0(x, "_binCode")
colnames(tab_plot)[colnames(tab_plot) == "xbinMid"] = paste0(x, "_binMid")
colnames(tab_plot)[colnames(tab_plot) == "ybinCode"] = paste0(y, "_binCode")
colnames(tab_plot)[colnames(tab_plot) == "ybinMid"] = paste0(y, "_binMid")
checkMem_gc("after col name")
rm(list = c("xBreaks", "yBreaks", "xbinCode", "ybinCode", "xbinMid", "ybinMid", "tab_match"))
checkMem_gc("after rm")
#Returning table
return(tab_plot)
}

tab_by_obin_x_str_y <- function(dt, x, y, width, Nbin, by = "even") {
#Binning
if (by == "even") {
xLLim = dt[, seq(min(get(x), na.rm = T), max(get(x), na.rm = T) - width, length.out = Nbin)]
xULim = dt[, seq(min(get(x), na.rm = T) + width, max(get(x), na.rm = T), length.out = Nbin)]
} else if (by == "quantile") {
xLLim = dt[, quantile(get(x), seq(0, 1 - width, length.out = Nbin), names = F)]
xULim = dt[, quantile(get(x), seq(width, 1, length.out = Nbin), names = F)]
} else {stop("type of 'by' not support")}
xbinMid = (xLLim + xULim) / 2
#summarizing y
tab_out <- sapply(seq(Nbin), function(i) {
dt[get(x) >= xLLim[i] & get(x) <= xULim[i], c(mean(get(y), na.rm = T), sd(get(y), na.rm = T),
quantile(get(y), c(0.025, 0.975), names = F))]
}) %>% t %>% as.data.table %>% set_colnames(., c("mean", "sd", ".025p", ".975p")) %>%
cbind(data.table(binCode = seq(Nbin), xLLim, xbinMid, xULim), .)
tab_out[, c("mean_plus_1sd", "mean_minus_1sd") := list(mean + sd, mean - sd)]
return(tab_out)
}

plotEnv <- new.env()
backupEnv <- new.env()

gc()
gc()
checkMem_gc("Starting memory size checking")
start.mem.size <- memory.size()
start_ObjSizes <- sapply(ls(), function(x) {object.size(get(x))})
start_tab_ind <- tab_ind
start_tab_ind_size <- object.size(tab_ind)
dummyEnv <- new.env()
with(dummyEnv, {
## Set function for analyses against SIM_PP1
fcn_SIM_PP1 <- function(dt, newTab = T) {
dat_prob = tab_by_bin_idxy(dt, x = "SIM_ADJ_PP1", y = "MIX_ADJ_PP1", xNItv = 50, yNItv = 50, by = "even")
checkMem_gc("after tab prob")
dat_logit = tab_by_bin_idxy(dt, x = "SIM_ADJ_LOGIT_PP1", y = "MIX_ADJ_LOGIT_PP1",
xNItv = 50, yNItv = 50, by = "even")
checkMem_gc("after tab logit")

if ((!newTab) && exists("summarytab_logit_SIM_ADJ_PP1", where = backupEnv) &&
exists("summarytab_prob_SIM_ADJ_PP1", where = backupEnv)) {
summarytab_logit = get("summarytab_logit_SIM_ADJ_PP1", envir = backupEnv)
summarytab_prob = get("summarytab_prob_SIM_ADJ_PP1", envir = backupEnv)
} else {
summarytab_logit = tab_by_obin_x_str_y(dt, x = "SIM_ADJ_LOGIT_PP1", y = "MIX_ADJ_LOGIT_PP1",
width = 0.05, Nbin = 1000, by = "even")
summarytab_prob = summarytab_logit[, .(
binCode, invLogit(xLLim), invLogit(xbinMid), invLogit(xULim), invLogit(mean), sd,
invLogit(`.025p`), invLogit(`.975p`), invLogit(mean_plus_1sd), invLogit(mean_minus_1sd)
)] %>% set_colnames(colnames(summarytab_logit))
assign("summarytab_logit_SIM_ADJ_PP1", summarytab_logit, envir = backupEnv)
assign("summarytab_prob_SIM_ADJ_PP1", summarytab_prob, envir = backupEnv)
}
checkMem_gc("after summary tab")

plot_prob <- ggplot(dat_prob, aes(x = SIM_ADJ_PP1_binMid)) +
geom_vline(xintercept = 1, linetype = "dotted") +
geom_hline(yintercept = 1, linetype = "dotted") +
geom_abline(slope = 1, intercept = 0, size = 1.5, linetype = "dashed", alpha = 0.5) +
geom_point(aes(y = MIX_ADJ_PP1_binMid, size = N), alpha = 0.5, na.rm = T) +
geom_line(data = summarytab_prob, aes(x = xbinMid, y = mean), size = 1.25, color = "black", na.rm = T) +
geom_line(data = summarytab_prob, aes(x = xbinMid, y = mean_plus_1sd), size = 1.25, color = "blue", na.rm = T, linetype = "dashed") +
geom_line(data = summarytab_prob, aes(x = xbinMid, y = mean_minus_1sd), size = 1.25, color = "blue", na.rm = T, linetype = "dashed") +
scale_size_continuous(range = c(0.5, 5)) +
scale_x_continuous(name = "Simulated PP", breaks = seq(0, 1, 0.25),
labels = c("0%", "25%", "50%", "75%", "100%")) +
scale_y_continuous(name = "Estimated PP", limits = c(0, 1), breaks = seq(0, 1, 0.25),
labels = c("0%", "25%", "50%", "75%", "100%")) +
theme_classic() +
theme(axis.title = element_text(size = 18),
axis.text = element_text(size = 16))

checkMem_gc("after plot prob")
rm(dat_prob)
rm(summarytab_prob)
checkMem_gc("after removing dat_prob and summary_prob")

plot_logit <- ggplot(dat_logit, aes(x = SIM_ADJ_LOGIT_PP1_binMid)) +
geom_abline(slope = 1, intercept = 0, size = 1.5, linetype = "dashed", alpha = 0.5) +
geom_point(aes(y = MIX_ADJ_LOGIT_PP1_binMid, size = N), alpha = 0.5, na.rm = T) +
geom_line(data = summarytab_logit, aes(x = xbinMid, y = mean), size = 1.25, color = "black", na.rm = T) +
geom_line(data = summarytab_logit, aes(x = xbinMid, y = mean_plus_1sd), size = 1.25, color = "blue", na.rm = T, linetype = "dashed") +
geom_line(data = summarytab_logit, aes(x = xbinMid, y = mean_minus_1sd), size = 1.25, color = "blue", na.rm = T, linetype = "dashed") +
scale_size_continuous(range = c(0.5, 5)) +
scale_x_continuous(name = "Simulated LOGIT PP1",
breaks = c(0.00001, 0.001, 0.05, 0.5, 0.95, 0.999, 0.99999) %>% logit,
labels = c("0.001%", "0.1%", "5%", "50%", "95%", "99.9%", "99.999%")) +
scale_y_continuous(name = "Estimated LOGIT PP1", limits = c(-12, 12),
breaks = c(0.00001, 0.001, 0.05, 0.5, 0.95, 0.999, 0.99999) %>% logit,
labels = c("0.001%", "0.1%", "5%", "50%", "95%", "99.9%", "99.999%")) +
theme_classic() +
theme(axis.title = element_text(size = 18),
axis.text = element_text(size = 16))

checkMem_gc("after plot logit")
rm(summarytab_logit)
rm(dat_logit)
checkMem_gc("after removing dat_logit and summary_logit")

return(list(plot_prob, plot_logit))
}

checkMem_gc("after defining function")

## Tabling

tab_stat <- tab_ind[, c("MIX_MIN_SUCCESS", "MIX_ALL") := list(
tab_dat[tab_ind[, datasetID], MIX_MIN_SUCCESS],
tab_dat[tab_ind[, datasetID], MIX_ALL]
)]
checkMem_gc("after new tab_stat")

tab_stat_MIN_SUCCESS <- tab_stat[MIX_MIN_SUCCESS == 1]
checkMem_gc("after new new tab_stat_MIN_SUCCESS")

tab_stat_MIX_ALL <- tab_stat[MIX_ALL == 1]
checkMem_gc("after new tab_stat_MIX_ALL")

# Generating ggplot objects
print("--- start lst full ---")
lst_full <- fcn_SIM_PP1(tab_stat, newTab = F)
checkMem_gc("after lst full")
rm(tab_stat)
checkMem_gc("after rm tab_stat")

print("--- start lst MIN_SUCCESS ---")
lst_MIN_SUCCESS <- fcn_SIM_PP1(tab_stat_MIN_SUCCESS, newTab = F)
checkMem_gc("after lst MIN_SUCCESS")
rm(tab_stat_MIN_SUCCESS)
checkMem_gc("after rm tab_MIN_SUCCESS")

print("--- start lst MIX_ALL ---")
lst_MIX_ALL <- fcn_SIM_PP1(tab_stat_MIX_ALL, newTab = F)
checkMem_gc("after lst MIX_ALL")
rm(tab_stat_MIX_ALL)
checkMem_gc("after rm tab_stat_MIX_ALL")

## Start plotting
print("--- Start plotting ---")
assign("full_sp_MIX_ADJ_PP1_vs_SIM_ADJ_PP1", lst_full[[1]], envir = plotEnv)
checkMem_gc("after assign1")
assign("full_sp_MIX_ADJ_LOGIT_PP1_vs_SIM_ADJ_LOGIT_PP1", lst_full[[2]], envir = plotEnv)
checkMem_gc("after assign2")
rm(lst_full)
checkMem_gc("after removing lst_full")
assign("MIN_SUCCESS_sp_MIX_ADJ_PP1_vs_SIM_ADJ_PP1", lst_MIN_SUCCESS[[1]], envir = plotEnv)
checkMem_gc("after assign3")
assign("MIN_SUCCESS_sp_MIX_ADJ_LOGIT_PP1_vs_SIM_ADJ_LOGIT_PP1", lst_MIN_SUCCESS[[2]], envir = plotEnv)
checkMem_gc("after assign4")
rm(lst_MIN_SUCCESS)
checkMem_gc("after removing lst_MIN_SUCCESS")
assign("MIX_ALL_sp_MIX_ADJ_PP1_vs_SIM_ADJ_PP1", lst_MIX_ALL[[1]], envir = plotEnv)
checkMem_gc("after assign5")
assign("MIX_ALL_sp_MIX_ADJ_LOGIT_PP1_vs_SIM_ADJ_LOGIT_PP1", lst_MIX_ALL[[2]], envir = plotEnv)
checkMem_gc("after assign6")
rm(lst_MIX_ALL)
checkMem_gc("after removing lst_MIX_ALL")
})

checkMem_gc("--- Finishing ---")
rm(dummyEnv)
gc()
checkMem_gc("After clean up")
final.mem.size <- memory.size()
end_ObjSizes <- sapply(ls(), function(x) {object.size(get(x))})
print("")
print("")
print("--- The sizes of all objects (under .GlobalEnv) BEFORE the graph plotting process ---")
print("--- (Before the process starts, all existing objects are stored under .GlobalEnv) ---")
print(start_ObjSizes)
print("")
print("--- The sizes of all objects (under .GlobalEnv) AFTER the graph plotting process ---")
print(end_ObjSizes)
print("--- I have not altered any existing objects under .GlobalEnv during the process, I only passed them to functions. And yet their sizes increase! ---")
print("--- Let's look at the object tab_ind, which shows the largest inflation in object size ---")
print("--- This is the size of tab_ind BEFORE the process: ---")
print(start_tab_ind_size)
print("--- This is the size of tab_ind AFTER the process: ---")
print(object.size(tab_ind))
print("--- But they are identical (checked using the function identical())! ---")
print(identical(start_tab_ind, tab_ind))
print("")

更新的可重现示例

这是一个更新的、更简单的可重现示例。最新发现是复制data.table对象,<- data.table::copy()应该使用而不是 <- .后者只创建一个指向相同值的指针(即通过引用)。改变新指针的值会改变原始指针的对象大小,这就是为什么当我更改新指针时对象大小膨胀的原因。虽然我不确定它是否是内存使用膨胀的唯一来源。

library(data.table)
library(magrittr)
library(ggplot2)

N <- 6000

set.seed(runif(1, 0, .Machine$integer.max) %>% ceiling)

logit <- function(x) {return(log(x/(1-x)))}
invLogit <- function(x) {return(exp(x)/(1+exp(x)))}

tab_dat <- data.table(datasetID = seq(N), MIX_MIN_SUCCESS = sample(c(0, 1), N, replace = T), MIX_ALL = sample(c(0, 1), N, replace = T))
tab_dat[MIX_MIN_SUCCESS == 0, MIX_ALL := 0]
n <- sample(20:300, N, replace = T)
tab_ind <- data.table(
datasetID = rep(seq(N), times = n),
SIM_ADJ_PP1 = runif(sum(n), 0.00001, 0.99999),
MIX_ADJ_PP1 = runif(sum(n), 0.00001, 0.99999)
)

## Individual bins for x and y
tab_by_bin_idxy <- function(dt, x, y, xNItv, yNItv, by = "quantile") {
#Binning
if (by == "even") {
minN = dt[, min(get(x), na.rm = T)]
maxN = dt[, max(get(x), na.rm = T)]
xBreaks = seq(minN, maxN, length.out = xNItv + 1)
yBreaks = dt[, seq(min(get(y), na.rm = T), max(get(y), na.rm = T), length.out = yNItv + 1)]
} else if (by == "quantile") {
xBreaks = dt[, quantile(get(x), seq(0, 1, length.out = xNItv + 1), names = F)]
yBreaks = dt[, quantile(get(y), seq(0, 1, length.out = yNItv + 1), names = F)]
}
xbinCode = dt[, .bincode(get(x), breaks = xBreaks, include.lowest = T)]
xbinMid = sapply(seq(xNItv), function(i) {return(mean(xBreaks[c(i, i+1)]))})[xbinCode]
ybinCode = dt[, .bincode(get(y), breaks = yBreaks, include.lowest = T)]
ybinMid = sapply(seq(yNItv), function(i) {return(mean(yBreaks[c(i, i+1)]))})[ybinCode]
#Creating table
tab_match = CJ(xbinCode = seq(xNItv), ybinCode = seq(yNItv))
tab_plot = data.table(xbinCode, xbinMid, ybinCode, ybinMid)[
tab_match, .(xbinMid = xbinMid[1], ybinMid = ybinMid[1], N = .N), keyby = .EACHI, on = c("xbinCode", "ybinCode")
]
colnames(tab_plot)[colnames(tab_plot) == "xbinCode"] = paste0(x, "_binCode")
colnames(tab_plot)[colnames(tab_plot) == "xbinMid"] = paste0(x, "_binMid")
colnames(tab_plot)[colnames(tab_plot) == "ybinCode"] = paste0(y, "_binCode")
colnames(tab_plot)[colnames(tab_plot) == "ybinMid"] = paste0(y, "_binMid")
rm(list = c("xBreaks", "yBreaks", "xbinCode", "ybinCode", "xbinMid", "ybinMid", "tab_match"))
#Returning table
return(tab_plot)
}

plotEnv <- new.env()
backupEnv <- new.env()

gc()
gc(verbose = T)
start.mem.size <- memory.size()
start_ObjSizes <- sapply(ls(), function(x) {object.size(get(x))})
start_tab_ind <- copy(tab_ind)
start_tab_ind_size <- object.size(tab_ind)
dummyEnv <- new.env()
with(dummyEnv, {
## Set function for analyses against SIM_PP1
fcn_SIM_PP1 <- function(dt, newTab = T) {
dat_prob = tab_by_bin_idxy(dt, x = "SIM_ADJ_PP1", y = "MIX_ADJ_PP1", xNItv = 50, yNItv = 50, by = "even")

plot_prob <- ggplot(dat_prob, aes(x = SIM_ADJ_PP1_binMid)) +
geom_vline(xintercept = 1, linetype = "dotted") +
geom_hline(yintercept = 1, linetype = "dotted") +
geom_abline(slope = 1, intercept = 0, size = 1.5, linetype = "dashed", alpha = 0.5) +
geom_point(aes(y = MIX_ADJ_PP1_binMid, size = N), alpha = 0.5, na.rm = T) +
scale_size_continuous(range = c(0.5, 5)) +
scale_x_continuous(name = "Simulated PP", breaks = seq(0, 1, 0.25),
labels = c("0%", "25%", "50%", "75%", "100%")) +
scale_y_continuous(name = "Estimated PP", limits = c(0, 1), breaks = seq(0, 1, 0.25),
labels = c("0%", "25%", "50%", "75%", "100%")) +
theme_classic() +
theme(axis.title = element_text(size = 18),
axis.text = element_text(size = 16))

return(plot_prob)
}

## Tabling
tab_stat <- copy(tab_ind)
tab_stat <- tab_stat[, c("MIX_MIN_SUCCESS", "MIX_ALL") := list(
tab_dat[tab_stat[, datasetID], MIX_MIN_SUCCESS],
tab_dat[tab_stat[, datasetID], MIX_ALL]
)]

tab_stat_MIN_SUCCESS <- tab_stat[MIX_MIN_SUCCESS == 1]

tab_stat_MIX_ALL <- tab_stat[MIX_ALL == 1]

# Generating ggplot objects
lst_full <- fcn_SIM_PP1(tab_stat, newTab = F)
lst_MIN_SUCCESS <- fcn_SIM_PP1(tab_stat_MIN_SUCCESS, newTab = F)
lst_MIX_ALL <- fcn_SIM_PP1(tab_stat_MIX_ALL, newTab = F)

## Start plotting
assign("full_sp_MIX_ADJ_PP1_vs_SIM_ADJ_PP1", lst_full, envir = plotEnv)
assign("MIN_SUCCESS_sp_MIX_ADJ_PP1_vs_SIM_ADJ_PP1", lst_MIN_SUCCESS, envir = plotEnv)
assign("MIX_ALL_sp_MIX_ADJ_PP1_vs_SIM_ADJ_PP1", lst_MIX_ALL, envir = plotEnv)
})

rm(dummyEnv)
rm(start_tab_ind)
gc(verbose = T)
final.mem.size <- memory.size()
end_ObjSizes <- sapply(ls(), function(x) {object.size(get(x))})

我的 sessionInfo()运行上面的例子时:

R version 3.5.0 (2018-04-23)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows >= 8 x64 (build 9200)

Matrix products: default

locale:
[1] LC_COLLATE=English_Hong Kong SAR.1252 LC_CTYPE=English_Hong Kong SAR.1252 LC_MONETARY=English_Hong Kong SAR.1252
[4] LC_NUMERIC=C LC_TIME=English_Hong Kong SAR.1252

attached base packages:
[1] stats graphics grDevices utils datasets methods base

other attached packages:
[1] ggplot2_2.2.1 magrittr_1.5 data.table_1.11.4

loaded via a namespace (and not attached):
[1] colorspace_1.3-2 scales_0.5.0 compiler_3.5.0 lazyeval_0.2.1 plyr_1.8.4 tools_3.5.0 pillar_1.2.3 gtable_0.2.0
[9] tibble_1.4.2 yaml_2.1.19 Rcpp_0.12.18 grid_3.5.0 rlang_0.2.1 munsell_0.4.3

最佳答案

我的感觉是您需要增加 --min-vsize=。为什么?错误 cannot allocate vector of size ... 意味着您需要增加 --min-vsize=

R 命令行调用:

R --min-vsize=400M

RStudio 调用

在您的 .Renviron 文件中创建或添加一个条目。

R_VSIZE=400M

引用:友好的 R 启动配置

关键问题:

  • 您运行的是 64 位操作系统吗? [是/否]
  • 您运行的是 64 位版本的 R 吗? [是/否]

如果您对这些问题中的任何一个回答“否”,我建议您升级。

背景

这里的实际情况是,如果您需要增加最小 vsize,您可能需要查看您的代码以查找分配陷阱。在大多数情况下,您会发现您正在通过复制分配复制数据。

有关 R Gotcha 的更多信息,我强烈建议您阅读:

这一切背后的细节。

R 为固定和可变大小的对象维护单独的区域。第一个被分配为 cons 单元数组(Lisp 程序员会知道它们是什么,其他人可能认为它们是语言本身的构建 block 、解析树等),第二个被扔在堆上每个 8 字节的“Vcells”。每个 cons 单元在 32 位构建的 R 上占用 28 个字节,(通常)在 64 位构建上占用 56 个字节。

默认值是(当前)350k cons cells 和 6Mb vector heap 的初始设置。请注意,这些区域最初并未实际分配:这些值是触发垃圾收集的大小。这些值可以通过命令行选项 --min-nsize--min-vsize 设置(或者如果不使用,环境变量 R_NSIZE R_VSIZE) 当 R 启动时。此后,R 将根据使用情况扩大或缩小面积,永远不会低于初始值。可以使用环境变量 R_MAX_VSIZE 设置最大向量堆大小。

R 在垃圾收集器中花费的时间将取决于这些初始设置以及内存管理器在内存填满时在收集垃圾以释放未使用的内存和增加这些区域之间做出的权衡。用于增长的策略可以通过将环境变量 R_GC_MEM_GROW 设置为 0 到 3 之间的整数值来指定。该变量在启动时读取。较高的值会更积极地增加堆,从而减少垃圾收集时间但使用更多内存。

引用: https://www.rdocumentation.org/packages/base/versions/3.5.1/topics/Memory

window

地址空间限制在 32 位 Windows 下为 2Gb,除非操作系统的默认值已更改为允许更多(最多 3Gb)。参见 https://www.microsoft.com/whdc/system/platform/server/PAE/PAEmem.mspxhttps://msdn.microsoft.com/en-us/library/bb613473(VS.85).aspx .在大多数 64 位版本的 Windows 下,32 位版本的 R 的限制是 4Gb:对于最旧的版本,它是 2Gb。 64 位 R 构建的限制(由操作系统强加)为 8Tb。

即使在 64 位 Windows 上,在 32 位构建的 R 中通常不可能为单个向量分配多达 2Gb,因为 Windows 在地址空间中间进行了预分配。

在 Windows 下,R 对单个 session 可用的总内存分配施加了限制,因为操作系统没有提供这样做的方法:请参阅 memory.size 和 memory.limit。

关于r - 将对象传递给函数后,对象大小会增加,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/53222918/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com