gpt4 book ai didi

r - 使用 ggcontour 在数据集上覆盖树

转载 作者:行者123 更新时间:2023-12-02 14:37:16 27 4
gpt4 key购买 nike

我的任务是根据下面的观察结果拟合分类树模型。然后我必须将树放置在现有数据之上。建议我使用 p + geom_contour(....) 但我对 ggplot 不太熟悉。

我提供的代码如下。

我可以相当容易地将树模型拟合到数据,但绘制它只能产生决策树。如何使用geom_contour在现有绘图上叠加树模型?

library(tidyverse)
set.seed(1234)
dat <- tibble(
x1 = rnorm(100),
x2 = rnorm(100)
) %>% mutate(y = as_factor(ifelse(x1^2 + x2^2 > 1.39, "A", "B")))

circlepts <- tibble(theta = seq(0, 2*pi, length = 100)) %>%
mutate(x = sqrt(1.39) * sin(theta), y = sqrt(1.39) * cos(theta))

p <- ggplot(dat) + geom_point(aes(x1, x2, color = y)) + coord_fixed() +
geom_polygon(data = circlepts, aes(x, y), color = "blue", fill = NA)
p

为了使树模型适合数据,我输入

library(tree)
tree_fit <- tree(y~., dat)

叠加层只是适合数据的决策树,例如像这样的东西(用 MS Paint 粗略地绘制)

enter image description here

最佳答案

我不认为 geom_contour 是执行此操作的方法,但您可以从 tree_fit 的基础数据框中获取线段的坐标,并执行一些争论逐步将每个片段限制在图中仍然“事件”的区域:

tree.df.segment <- tree_fit$frame %>% 
rownames_to_column() %>%
mutate(rowname = as.integer(rowname),
depth = tree:::tree.depth(rowname),
split = splits[, 1] %>%
gsub("<|>", "", .) %>%
as.numeric()) %>%

arrange(depth, rowname) %>%
mutate(leaf.position = case_when(lead(depth) > depth & lead(var) == "<leaf>" ~ "left",
lead(depth) > depth & lead(var) != "<leaf>" ~ "right",
TRUE ~ NA_character_)) %>%
fill(leaf.position, .direction = "up") %>%
filter(var != "<leaf>") %>%
select(depth, var, split, leaf.position) %>%

# define basic segment coordinates
mutate(x = -Inf, xend = Inf, y = -Inf, yend = Inf,
xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf) %>%

# modify coordinates of segment / active area based on split
mutate(x = ifelse(var == "x1", split, x),
xend = ifelse(var == "x1", split, xend),
y = ifelse(var == "x2", split, y),
yend = ifelse(var == "x2", split, yend),
xmin = ifelse(var == "x1" & leaf.position == "left", split, xmin),
xmax = ifelse(var == "x1" & leaf.position == "right", split, xmax),
ymin = ifelse(var == "x2" & leaf.position == "left", split, ymin),
ymax = ifelse(var == "x2" & leaf.position == "right", split, ymax)) %>%
# shrink active area progressively as depth increases
mutate(xmin = cummax(xmin), xmax = cummin(xmax),
ymin = cummax(ymin), ymax = cummin(ymax)) %>%
# limit segment coordinates to within active area
mutate(x = pmax(x, xmin), xend = pmin(xend, xmax),
y = pmax(y, ymin), yend = pmin(yend, ymax))

p +
geom_segment(data = tree.df.segment,
aes(x = x, xend = xend, y = y, yend = yend))

plot

<小时/>

此外(因为我认为这之后肯定会被问到),我们可以使用geom_rect将与终端叶子相对应的每个区域遮蔽为矩形。这将需要一些额外的争论。

tree.df.rect <- tree.df.segment %>%
mutate(depth = depth + 1) %>%
select(-c(x, xend, y, yend)) %>%
mutate_at(vars(xmin, xmax, ymin, ymax), list(rect = lag)) %>%
mutate_at(vars(xmin_rect, ymin_rect), ~ifelse(is.na(.), -Inf, .)) %>%
mutate_at(vars(xmax_rect, ymax_rect), ~ifelse(is.na(.), Inf, .)) %>%
mutate(xmin_rect = ifelse(var == "x1" & leaf.position == "right", split, xmin_rect),
xmax_rect = ifelse(var == "x1" & leaf.position == "left", split, xmax_rect),
ymin_rect = ifelse(var == "x2" & leaf.position == "right", split, ymin_rect),
ymax_rect = ifelse(var == "x2" & leaf.position == "left", split, ymax_rect)) %>%
# add label for each rect
full_join(tree_fit$frame %>%
rownames_to_column() %>%
mutate(rowname = as.integer(rowname),
depth = tree:::tree.depth(rowname),
split = splits[, 1] %>%
gsub("<|>", "", .) %>%
as.numeric()) %>%
filter(var == "<leaf>") %>%
select(depth, rowname, yval) %>%
arrange(depth, rowname))
# since last split is associated with two rectangles, determine which is the last 'active'
# one in order to assign the labels correctly (doesn't matter in this case since the last
# two labels are both 'B', but this should apply more generally)
if(tree.df.rect %>% filter(depth == max(depth)) %>% pull(leaf.position) %>% unique() == "left") {
tree.df.rect[nrow(tree.df.rect), c("xmin_rect", "xmax_rect", "ymin_rect", "ymax_rect")] <-
tree.df.rect[nrow(tree.df.rect), c("xmin", "xmax", "ymin", "ymax")]
} else {
tree.df.rect[nrow(tree.df.rect)-1, c("xmin_rect", "xmax_rect", "ymin_rect", "ymax_rect")] <-
tree.df.rect[nrow(tree.df.rect)-1, c("xmin", "xmax", "ymin", "ymax")]
}
tree.df.rect <- tree.df.rect %>%
select(depth, yval, xmin_rect, xmax_rect, ymin_rect, ymax_rect)

# combine into one data frame
tree.df <- full_join(
tree.df.rect %>%
select(depth, yval, xmin_rect, xmax_rect, ymin_rect, ymax_rect),
tree.df.segment %>%
select(depth, x, xend, y, yend)
)

p.shaded <- ggplot(data = tree.df) +
geom_point(data = dat, aes(x1, x2, color = y)) +
geom_polygon(data = circlepts, aes(x, y), color = "blue", fill = NA) +
geom_rect(aes(xmin = xmin_rect, xmax = xmax_rect,
ymin = ymin_rect, ymax = ymax_rect,
fill = yval),
alpha = 0.25) +
geom_segment(aes(x = x, xend = xend, y = y, yend = yend)) +
coord_fixed() +
labs(color = "", fill = "") +
scale_fill_discrete(breaks = c("A", "B"))

p.shaded

shaded plot

可以轻松地进一步扩展为动画形式:

library(gganimate)

p.anim <- p.shaded +
transition_states(depth) +
shadow_mark() +
enter_fade() +
labs(title = "{closest_state}")

animate(p.anim, nframes = 10, fps = 1)

animated plot

关于r - 使用 ggcontour 在数据集上覆盖树,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/55735981/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com