gpt4 book ai didi

r - 在 R 中按组有条件地计算两个日期时间列之间的行

转载 作者:行者123 更新时间:2023-12-03 08:17:48 27 4
gpt4 key购买 nike

我正在处理用户生成的数据,我想计算特定时间段内每个 ID 变量的两个不同日期时间列之间的行数。这是一个模拟数据框,类似于我正在使用的数据框(抱歉它很长):

library(Pareto)
library(ggplot2)
library(lubridate)
library(stringi)

set.seed(1)
n_users<-150
n_rows <- 1000
relative_probs <- rPareto(n = n_users, t = 1, alpha = 0.3, truncation = 500)
UserId<-stri_rand_strings(n_users,6)
UserId <- sample(UserId, size = n_rows, prob = relative_probs, replace = TRUE)

starttimes<-structure(c(1612224005, 1612224010, 1612224022, 1612224027, 1612224038,
1612224040, 1612224050, 1612224067, 1612224018, 1612224092, 1612224082,
1612224076, 1612224065, 1612224107, 1612224144, 1612224085, 1612224037,
1612224212, 1612224249, 1612224281, 1612224271, 1612224293, 1612224281,
1612224348, 1612224358, 1612224286, 1612224398, 1612224098, 1612224498,
1612224474, 1612224511, 1612224511, 1612224522, 1612224443, 1612224085,
1612224480, 1612224309, 1612224583, 1612224726, 1612224533, 1612224732,
1612224752, 1612224741, 1612224322, 1612224856, 1612224887, 1612224879,
1612224673, 1612224968, 1612224711, 1612224230, 1612225058, 1612224985,
1612225166, 1612224159, 1612224522, 1612225124, 1612224676, 1612224695,
1612225361, 1612224854, 1612225316, 1612225161, 1612224662, 1612225263,
1612225511, 1612224232, 1612225636, 1612225642, 1612225648, 1612225662,
1612225667, 1612225673, 1612225681, 1612225668, 1612225689, 1612225699,
1612225685, 1612224031, 1612225797, 1612224427, 1612225705, 1612225814,
1612225743, 1612225869, 1612225919, 1612225926, 1612225829, 1612225154,
1612226208, 1612226216, 1612226234, 1612226103, 1612226249, 1612226255,
1612226267, 1612226261, 1612226278, 1612226283, 1612226253, 1612226339,
1612226394, 1612226409, 1612224236, 1612226472, 1612226477, 1612226547,
1612226560, 1612226602, 1612226608, 1612225074, 1612226633, 1612226200,
1612226614, 1612226667, 1612226678, 1612226729, 1612226749, 1612226746,
1612226803, 1612226607, 1612226912, 1612226922, 1612226930, 1612226661,
1612226938, 1612226966, 1612227017, 1612227026, 1612227032, 1612227043,
1612227065, 1612226991, 1612227174, 1612227060, 1612227179, 1612227184,
1612227191, 1612227127, 1612226655, 1612227079, 1612225878, 1612227235,
1612227181, 1612227249, 1612227309, 1612227317, 1612227337, 1612226341,
1612227290, 1612227352, 1612227390, 1612226677, 1612227397, 1612227404,
1612227426, 1612227447, 1612227196, 1612227245, 1612224115, 1612227444,
1612227576, 1612227583, 1612225830, 1612227639, 1612227692, 1612227741,
1612227767, 1612225934, 1612226260, 1612225495, 1612227572, 1612227886,
1612227547, 1612227857, 1612228109, 1612228049, 1612228106, 1612228148,
1612228337, 1612228351, 1612228358, 1612228148, 1612228412, 1612228505,
1612228523, 1612228444, 1612228574, 1612228587, 1612228597, 1612228607,
1612228937, 1612228955, 1612228962, 1612228969, 1612229187, 1612228571,
1612229235, 1612229364, 1612229374, 1612229519, 1612226920, 1612229434,
1612229282, 1612229762, 1612229025, 1612230063, 1612230201, 1612230372,
1612230380, 1612230469, 1612229782, 1612230493, 1612230395, 1612230617,
1612230540, 1612225845, 1612230535, 1612230671, 1612229443, 1612230646,
1612230830, 1612228533, 1612230889, 1612230907, 1612230939, 1612230954,
1612230975, 1612227207, 1612230981, 1612230991, 1612230677, 1612231009,
1612231017, 1612231023, 1612231016, 1612231036, 1612231064, 1612231085,
1612231105, 1612231122, 1612231137, 1612231151, 1612231156, 1612231173,
1612231030, 1612230998, 1612230838, 1612231627, 1612231651, 1612231657,
1612231672, 1612231203, 1612231927, 1612231226, 1612231939, 1612232151,
1612232159, 1612232375, 1612232385, 1612232526, 1612232705, 1612232761,
1612232721, 1612231191, 1612232905, 1612232915, 1612232923, 1612232972,
1612225176, 1612233131, 1612224567, 1612233071, 1612234287, 1612234426,
1612234443, 1612234449, 1612234650, 1612234658, 1612225211, 1612227597,
1612232868, 1612232580, 1612234987, 1612235008, 1612235133, 1612235182,
1612234722, 1612234967, 1612235140, 1612235398, 1612235409, 1612235417,
1612235432, 1612235450, 1612235459, 1612235710, 1612236491, 1612236575,
1612236582, 1612226403, 1612236587, 1612238122, 1612238128, 1612238028,
1612238207, 1612234517, 1612238299, 1612238475, 1612239138, 1612239150,
1612239156, 1612239428, 1612231495, 1612239577, 1612239586, 1612228731,
1612238892, 1612239592, 1612239651, 1612239692, 1612239692, 1612239699,
1612239786, 1612239940, 1612240802, 1612240808, 1612240903, 1612240908,
1612240913, 1612240919, 1612240935, 1612240941, 1612240947, 1612241020,
1612239507, 1612224122, 1612240873, 1612230662, 1612241642, 1612241630,
1612241680, 1612241689, 1612241727, 1612241649, 1612241740, 1612241749,
1612241800, 1612241760, 1612241738, 1612241890, 1612241964, 1612241971,
1612241976, 1612241997, 1612242009, 1612242078, 1612227671, 1612242173,
1612241819, 1612242425, 1612242509, 1612242055, 1612242595, 1612242746,
1612230074, 1612242983, 1612242993, 1612243012, 1612243020, 1612243029,
1612243036, 1612242957, 1612243145, 1612243169, 1612243042, 1612243136,
1612243227, 1612242992, 1612243173, 1612243179, 1612232803, 1612243829,
1612243930, 1612243951, 1612243977, 1612243431, 1612243991, 1612243986,
1612243896, 1612244036, 1612244089, 1612244114, 1612244124, 1612244130,
1612244140, 1612244150, 1612244543, 1612242359, 1612244555, 1612244268,
1612244768, 1612243995, 1612238398, 1612245001, 1612245038, 1612245146,
1612245157, 1612245164, 1612239697, 1612244996, 1612245319, 1612245430,
1612242602, 1612245198, 1612245489, 1612245565, 1612245572, 1612241056,
1612245593, 1612245564, 1612243723, 1612245749, 1612245586, 1612245811,
1612245677, 1612245854, 1612245871, 1612245893, 1612245899, 1612245905,
1612245910, 1612245915, 1612245830, 1612235046, 1612246117, 1612246124,
1612246185, 1612245434, 1612246573, 1612245932, 1612246762, 1612246770,
1612246776, 1612246786, 1612246806, 1612246888, 1612246900, 1612246918,
1612246923, 1612246929, 1612246937, 1612246949, 1612246981, 1612245786,
1612246943, 1612247016, 1612247021, 1612246988, 1612247034, 1612246974,
1612247044, 1612247047, 1612247055, 1612247065, 1612247071, 1612247085,
1612247094, 1612247110, 1612247116, 1612247123, 1612247129, 1612247135,
1612247150, 1612247163, 1612247166, 1612246605, 1612247172, 1612247181,
1612247188, 1612247202, 1612247209, 1612247031, 1612247217, 1612247226,
1612247206, 1612247236, 1612247245, 1612247258, 1612247272, 1612247301,
1612247315, 1612247323, 1612247329, 1612247337, 1612247273, 1612246181,
1612247570, 1612246720, 1612247820), class = c("POSIXct", "POSIXt"
), tzone = "UTC")

endtimes<-structure(c(1612224010, 1612224018, 1612224027, 1612224038, 1612224046,
1612224051, 1612224064, 1612224081, 1612224092, 1612224098, 1612224124,
1612224152, 1612224156, 1612224187, 1612224191, 1612224203, 1612224230,
1612224231, 1612224286, 1612224290, 1612224291, 1612224299, 1612224322,
1612224353, 1612224374, 1612224377, 1612224426, 1612224436, 1612224507,
1612224510, 1612224520, 1612224533, 1612224537, 1612224582, 1612224621,
1612224662, 1612224673, 1612224675, 1612224739, 1612224741, 1612224741,
1612224856, 1612224878, 1612224886, 1612224892, 1612224906, 1612224922,
1612224983, 1612225023, 1612225029, 1612225060, 1612225071, 1612225095,
1612225185, 1612225198, 1612225215, 1612225246, 1612225262, 1612225301,
1612225381, 1612225416, 1612225423, 1612225432, 1612225487, 1612225495,
1612225550, 1612225636, 1612225642, 1612225648, 1612225661, 1612225667,
1612225673, 1612225680, 1612225681, 1612225683, 1612225695, 1612225699,
1612225727, 1612225796, 1612225813, 1612225814, 1612225835, 1612225845,
1612225869, 1612225875, 1612225925, 1612225933, 1612226103, 1612226208,
1612226215, 1612226225, 1612226244, 1612226252, 1612226254, 1612226266,
1612226277, 1612226283, 1612226339, 1612226341, 1612226353, 1612226391,
1612226406, 1612226414, 1612226471, 1612226477, 1612226543, 1612226556,
1612226602, 1612226607, 1612226633, 1612226647, 1612226656, 1612226657,
1612226661, 1612226683, 1612226685, 1612226749, 1612226803, 1612226845,
1612226872, 1612226911, 1612226918, 1612226930, 1612226933, 1612226936,
1612226965, 1612227017, 1612227024, 1612227032, 1612227043, 1612227065,
1612227072, 1612227076, 1612227176, 1612227178, 1612227184, 1612227191,
1612227204, 1612227210, 1612227231, 1612227235, 1612227245, 1612227245,
1612227256, 1612227309, 1612227317, 1612227329, 1612227346, 1612227367,
1612227374, 1612227389, 1612227397, 1612227404, 1612227404, 1612227417,
1612227443, 1612227461, 1612227547, 1612227596, 1612227599, 1612227619,
1612227633, 1612227639, 1612227690, 1612227691, 1612227740, 1612227741,
1612227767, 1612227840, 1612227842, 1612227885, 1612227899, 1612227899,
1612228048, 1612228057, 1612228123, 1612228148, 1612228157, 1612228163,
1612228351, 1612228356, 1612228362, 1612228412, 1612228444, 1612228514,
1612228533, 1612228571, 1612228586, 1612228597, 1612228607, 1612228620,
1612228955, 1612228961, 1612228969, 1612229186, 1612229226, 1612229235,
1612229282, 1612229374, 1612229490, 1612229570, 1612229599, 1612229722,
1612229761, 1612229782, 1612230063, 1612230074, 1612230212, 1612230391,
1612230405, 1612230469, 1612230487, 1612230539, 1612230609, 1612230631,
1612230645, 1612230662, 1612230673, 1612230698, 1612230819, 1612230825,
1612230836, 1612230888, 1612230895, 1612230913, 1612230954, 1612230971,
1612230989, 1612230997, 1612230998, 1612231008, 1612231017, 1612231019,
1612231022, 1612231029, 1612231033, 1612231061, 1612231070, 1612231105,
1612231121, 1612231137, 1612231143, 1612231156, 1612231171, 1612231191,
1612231203, 1612231223, 1612231489, 1612231640, 1612231657, 1612231672,
1612231686, 1612231927, 1612231938, 1612232033, 1612232151, 1612232157,
1612232375, 1612232381, 1612232526, 1612232579, 1612232721, 1612232803,
1612232868, 1612232902, 1612232909, 1612232966, 1612232971, 1612232980,
1612233026, 1612233146, 1612233387, 1612234333, 1612234426, 1612234443,
1612234449, 1612234650, 1612234658, 1612234722, 1612234800, 1612234806,
1612234967, 1612234987, 1612235005, 1612235039, 1612235139, 1612235187,
1612235203, 1612235328, 1612235387, 1612235409, 1612235413, 1612235432,
1612235450, 1612235458, 1612235492, 1612235716, 1612236571, 1612236582,
1612236587, 1612237340, 1612238021, 1612238127, 1612238143, 1612238207,
1612238299, 1612238334, 1612238398, 1612238526, 1612239149, 1612239156,
1612239162, 1612239507, 1612239576, 1612239582, 1612239591, 1612239600,
1612239651, 1612239690, 1612239691, 1612239696, 1612239699, 1612239704,
1612239791, 1612239960, 1612240807, 1612240884, 1612240908, 1612240913,
1612240919, 1612240935, 1612240941, 1612240947, 1612241019, 1612241027,
1612241397, 1612241482, 1612241516, 1612241532, 1612241649, 1612241680,
1612241689, 1612241726, 1612241733, 1612241740, 1612241749, 1612241755,
1612241818, 1612241851, 1612241889, 1612241961, 1612241971, 1612241976,
1612241996, 1612242008, 1612242019, 1612242118, 1612242141, 1612242420,
1612242436, 1612242509, 1612242598, 1612242677, 1612242729, 1612242790,
1612242914, 1612242993, 1612243008, 1612243019, 1612243026, 1612243036,
1612243041, 1612243144, 1612243145, 1612243169, 1612243169, 1612243171,
1612243227, 1612243264, 1612243430, 1612243528, 1612243673, 1612243866,
1612243943, 1612243971, 1612243986, 1612243995, 1612244002, 1612244023,
1612244074, 1612244084, 1612244114, 1612244121, 1612244129, 1612244136,
1612244146, 1612244278, 1612244551, 1612244567, 1612244678, 1612244683,
1612244856, 1612245001, 1612245037, 1612245128, 1612245142, 1612245157,
1612245164, 1612245199, 1612245241, 1612245383, 1612245434, 1612245439,
1612245517, 1612245541, 1612245544, 1612245572, 1612245586, 1612245590,
1612245599, 1612245668, 1612245748, 1612245785, 1612245815, 1612245846,
1612245849, 1612245870, 1612245893, 1612245899, 1612245905, 1612245910,
1612245915, 1612245921, 1612245932, 1612246048, 1612246127, 1612246134,
1612246215, 1612246242, 1612246748, 1612246761, 1612246767, 1612246776,
1612246782, 1612246806, 1612246888, 1612246900, 1612246917, 1612246922,
1612246929, 1612246933, 1612246943, 1612246959, 1612246987, 1612247008,
1612247015, 1612247021, 1612247027, 1612247031, 1612247041, 1612247045,
1612247053, 1612247059, 1612247063, 1612247071, 1612247076, 1612247094,
1612247110, 1612247116, 1612247122, 1612247127, 1612247135, 1612247142,
1612247156, 1612247170, 1612247175, 1612247178, 1612247180, 1612247186,
1612247202, 1612247208, 1612247216, 1612247220, 1612247222, 1612247226,
1612247243, 1612247245, 1612247250, 1612247264, 1612247272, 1612247301,
1612247320, 1612247329, 1612247337, 1612247346, 1612247364, 1612247552,
1612247739, 1612247815, 1612247833), class = c("POSIXct", "POSIXt"
), tzone = "UTC")



df<-as.data.frame(cbind(UserId,starttimes,endtimes))
df$starttimes<-as.numeric(df$starttimes)
df$endtimes<-as.numeric(df$endtimes)
df$starttimes<-lubridate::as_datetime(df$starttimes)
df$endtimes<-lubridate::as_datetime(df$endtimes)



df<-df%>%arrange(UserId,starttimes)


基本上我想做的是计算每个UserId的时间戳之间的行数,条件如下:-

UserId分组,如果前导starttimes时间戳比前一个endtimes时间戳晚> = 1小时,则对前面的行进行计数.

这是我拥有的代码的工作示例,但仅适用于一个时间戳列。这是基于我不久前发布的类似问题:-

Count rows (conditionally) within specified time period by group in R

对于此示例,我将在 starttimes 时间戳列上使用它:-


library(purrr)
library(tidyverse)


# this is the code to conditionally count the rows between timestamps
SessionCount <- function(data,maxdelay){
result <- list()
row <- 0
events <- 0
OrigTime <- NA
n <- nrow(data)

for (i in 1:n) {
if (is.na(OrigTime)) {
OrigTime <- data$starttimes[[i]]
programmes <- 0
}
events = events + 1
if (difftime(data$starttimes[[i]],OrigTime,units='hours') > maxdelay) {
row <- row + 1
result[[row]] <- data.frame(OrigTime = OrigTime, LastTime = data$starttimes[[i-1]], events = events)
OrigTime <- data$starttimes[[i]]
}
}
dplyr::bind_rows(result)
}


df %>% arrange(UserId,starttimes) %>%
split(.$UserId) %>%
map(function(data) {SessionCount(data,1) }) %>%
bind_rows(.id="UserId")->result

#code which helps me do a sanity check, making sure counts are correct
result$events<-ifelse(result$events>1 & result$OrigTime == result$LastTime,
1,result$events)


count_events <- function(UserId, OrigTime, LastTime, df_new = df) {
df_new %>%
dplyr::filter(UserId== !! UserId,
dplyr::between(starttimes, OrigTime, LastTime)) %>%
nrow()
}

result %>%
mutate(events_recount = pmap_int(list(UserId, OrigTime, LastTime), count_events),
mismatch = events != events_recount)->results_new


##### output ####
UserId OrigTime LastTime events events_recount mismatch
1 0buhGq 2021-02-02 00:24:55 2021-02-02 00:24:55 1 1 FALSE
2 0buhGq 2021-02-02 02:28:25 2021-02-02 02:28:25 1 1 FALSE
3 0buhGq 2021-02-02 04:19:52 2021-02-02 04:40:02 6 3 TRUE
4 0ulN53 2021-02-02 00:02:39 2021-02-02 00:38:59 4 3 TRUE
5 0ulN53 2021-02-02 01:04:46 2021-02-02 01:04:46 1 1 FALSE
6 0ulN53 2021-02-02 02:26:43 2021-02-02 02:31:11 7 2 TRUE

在上面的输出中,events_recount 变量是行数。同样,这适用于一个时间戳列,但不适用于两个时间戳列。关于如何实现我所要求的目标有什么建议吗?

非常感谢!

编辑

这是我试图通过查看 DF 的前 27 行来实现的输出示例:-

    UserId          starttimes            endtimes
1 0buhGq 2021-02-02 00:24:55 2021-02-02 01:04:45
2 0buhGq 2021-02-02 02:28:25 2021-02-02 02:28:29
3 0buhGq 2021-02-02 04:19:52 2021-02-02 04:21:30
4 0buhGq 2021-02-02 04:23:06 2021-02-02 04:23:11
5 0buhGq 2021-02-02 04:40:02 2021-02-02 04:40:07
6 0buhGq 2021-02-02 05:34:49 2021-02-02 05:35:14
7 0buhGq 2021-02-02 06:04:14 2021-02-02 06:04:30
8 0buhGq 2021-02-02 06:28:43 2021-02-02 06:28:49
9 0buhGq 2021-02-02 06:28:57 2021-02-02 06:29:06
10 0ulN53 2021-02-02 00:02:39 2021-02-02 00:19:58
11 0ulN53 2021-02-02 00:21:03 2021-02-02 00:24:55
12 0ulN53 2021-02-02 00:38:59 2021-02-02 00:39:51
13 0ulN53 2021-02-02 01:04:46 2021-02-02 01:04:59
14 0ulN53 2021-02-02 02:26:43 2021-02-02 05:27:53
15 0ulN53 2021-02-02 02:31:11 2021-02-02 02:52:13
16 0ulN53 2021-02-02 04:41:43 2021-02-02 04:41:48
17 0ulN53 2021-02-02 05:59:32 2021-02-02 05:59:46
18 0wzMkn 2021-02-02 00:04:09 2021-02-02 00:04:46
19 0wzMkn 2021-02-02 01:55:07 2021-02-02 01:55:13
20 0wzMkn 2021-02-02 05:16:33 2021-02-02 05:16:48
21 2diLwk 2021-02-02 00:00:38 2021-02-02 00:00:46
22 2diLwk 2021-02-02 00:25:11 2021-02-02 00:25:50
23 2diLwk 2021-02-02 00:48:32 2021-02-02 00:48:38
24 2diLwk 2021-02-02 02:27:48 2021-02-02 03:02:47
25 2diLwk 2021-02-02 03:10:17 2021-02-02 03:10:32
26 2diLwk 2021-02-02 06:21:40 2021-02-02 06:21:57
27 2diLwk 2021-02-02 06:28:49 2021-02-02 06:28:57

如果在上面的行上完成,那么我想要的输出将如下所示:-

enter image description here

如果前导行中的 starttimes 时间戳比前一行中的 endtimes 时间戳晚 >=1 小时,则对这些时间戳之间的前几行进行计数(如图所示)在我想要的输出图像中)

希望这有帮助!

最佳答案

这是使用 shiftdata.table 方法:

DF <- structure(list(UserId = c("0buhGq", "0buhGq", "0buhGq", "0buhGq", 
"0buhGq", "0buhGq", "0buhGq", "0buhGq", "0buhGq", "0ulN53", "0ulN53",
"0ulN53", "0ulN53", "0ulN53", "0ulN53", "0ulN53", "0ulN53", "0wzMkn",
"0wzMkn", "0wzMkn", "2diLwk", "2diLwk", "2diLwk", "2diLwk", "2diLwk",
"2diLwk", "2diLwk", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB",
"36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB",
"36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB",
"36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB",
"36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB",
"3942Wm", "3OUdi4", "4hhSRY", "4hhSRY", "4hhSRY", "4hhSRY", "4hhSRY",
"4hhSRY", "4hhSRY", "4hhSRY", "4hhSRY", "4hhSRY", "4hhSRY", "52LXbG",
"52LXbG", "52LXbG", "52LXbG", "52LXbG", "52LXbG", "52LXbG", "64gfuI",
"64gfuI", "6KpHap", "6KpHap", "6KpHap", "6vHa6q", "72MKAc", "72MKAc",
"72MKAc", "72MKAc", "8RcC8m", "8RcC8m", "8RcC8m", "8RcC8m", "98vV9L",
"98vV9L", "98vV9L", "98vV9L", "98vV9L", "98vV9L", "98vV9L", "98vV9L",
"98vV9L", "98vV9L", "9PF5pW", "aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0",
"aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0",
"aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0",
"aGsBU0", "aGsBU0", "aGsBU0", "AUQYey", "AUQYey", "AUQYey", "AUQYey",
"AUQYey", "B0s81w", "B0s81w", "BPaJTw", "BPaJTw", "BPaJTw", "BPaJTw",
"BPaJTw", "BPaJTw", "BPaJTw", "BPaJTw", "BPaJTw", "BPaJTw", "BPaJTw",
"BWY8Fi", "c3qDX8", "c3qDX8", "c3qDX8", "c3qDX8", "c3qDX8", "c3qDX8",
"c3qDX8", "c3qDX8", "cIS2B3", "CNTp7f", "CNTp7f", "CNTp7f", "dHBGWg",
"dHBGWg", "dHBGWg", "dHBGWg", "dQ1kCz", "dQ1kCz", "e2aP4W", "EEnp7x",
"EEnp7x", "EEnp7x", "eJL9eB", "eJL9eB", "eJL9eB", "eXKdph", "exxLGA",
"FE1Gg2", "fPqjLw", "fPqjLw", "fPqjLw", "fPqjLw", "fPqjLw", "fPqjLw",
"fPqjLw", "fPqjLw", "fPqjLw", "fPqjLw", "fPqjLw", "fPqjLw", "fPqjLw",
"fPqjLw", "GfLwK1", "GfLwK1", "GfLwK1", "GQMw74", "GQMw74", "GSjzKw",
"hA3kcA", "hA3kcA", "hA3kcA", "hA3kcA", "hA3kcA", "hA3kcA", "hA3kcA"
), starttimes = structure(c(1612225495, 1612232905, 1612239592,
1612239786, 1612240802, 1612244089, 1612245854, 1612247323, 1612247337,
1612224159, 1612225263, 1612226339, 1612227886, 1612232803, 1612233071,
1612240903, 1612245572, 1612224249, 1612230907, 1612242993, 1612224038,
1612225511, 1612226912, 1612232868, 1612235417, 1612246900, 1612247329,
1612224511, 1612224732, 1612224741, 1612225124, 1612225673, 1612226602,
1612226633, 1612228148, 1612228574, 1612229782, 1612230617, 1612231030,
1612231085, 1612231122, 1612231156, 1612231939, 1612234987, 1612236491,
1612238398, 1612240919, 1612240941, 1612241997, 1612242078, 1612242957,
1612243896, 1612243977, 1612244150, 1612245811, 1612246720, 1612246923,
1612247034, 1612247123, 1612247570, 1612227309, 1612243829, 1612225263,
1612225797, 1612225814, 1612226255, 1612227017, 1612229235, 1612239150,
1612241630, 1612247047, 1612247085, 1612247217, 1612224887, 1612225648,
1612227079, 1612230380, 1612245565, 1612246181, 1612246806, 1612241890,
1612245854, 1612225689, 1612229282, 1612242957, 1612239156, 1612224511,
1612224522, 1612225166, 1612247820, 1612224358, 1612231137, 1612235046,
1612247226, 1612224076, 1612224232, 1612225673, 1612226912, 1612228533,
1612229434, 1612231023, 1612232915, 1612235398, 1612243431, 1612239692,
1612224286, 1612224309, 1612225511, 1612225845, 1612225919, 1612226267,
1612228574, 1612228937, 1612230201, 1612230395, 1612230838, 1612231030,
1612231657, 1612232159, 1612232761, 1612235182, 1612239699, 1612241642,
1612243227, 1612245164, 1612247272, 1612224065, 1612235409, 1612241800,
1612246929, 1612246937, 1612226922, 1612228937, 1612224085, 1612224662,
1612224856, 1612225316, 1612226938, 1612227404, 1612227447, 1612228412,
1612228597, 1612235046, 1612241997, 1612227060, 1612224348, 1612227235,
1612234517, 1612240919, 1612245430, 1612245749, 1612247129, 1612247337,
1612228148, 1612226677, 1612231105, 1612243991, 1612225685, 1612225878,
1612227191, 1612229187, 1612224085, 1612230981, 1612224286, 1612224067,
1612224427, 1612241971, 1612227060, 1612228351, 1612231023, 1612241649,
1612243036, 1612226655, 1612224398, 1612225668, 1612226677, 1612227337,
1612227390, 1612231017, 1612231085, 1612232375, 1612232761, 1612240941,
1612241749, 1612245893, 1612247016, 1612247031, 1612224695, 1612230535,
1612240808, 1612225648, 1612229235, 1612244130, 1612224443, 1612224480,
1612224567, 1612225154, 1612227317, 1612227547, 1612229762), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), endtimes = structure(c(1612227885,
1612232909, 1612239690, 1612239791, 1612240807, 1612244114, 1612245870,
1612247329, 1612247346, 1612225198, 1612225495, 1612226391, 1612227899,
1612243673, 1612234333, 1612240908, 1612245586, 1612224286, 1612230913,
1612243008, 1612224046, 1612225550, 1612226918, 1612234967, 1612235432,
1612246917, 1612247337, 1612224520, 1612224741, 1612224878, 1612225246,
1612225680, 1612226607, 1612226656, 1612228412, 1612228586, 1612230487,
1612230631, 1612231203, 1612231105, 1612231137, 1612231171, 1612232151,
1612235005, 1612236571, 1612245037, 1612240935, 1612240947, 1612242008,
1612242118, 1612243144, 1612244074, 1612243986, 1612244278, 1612245846,
1612247815, 1612246929, 1612247041, 1612247127, 1612247739, 1612227317,
1612243866, 1612225495, 1612225813, 1612225845, 1612226266, 1612227024,
1612229282, 1612239156, 1612241680, 1612247059, 1612247094, 1612247222,
1612224906, 1612225661, 1612227235, 1612230405, 1612245572, 1612247552,
1612246888, 1612241961, 1612245870, 1612225695, 1612229761, 1612243144,
1612239162, 1612224533, 1612224537, 1612225185, 1612247833, 1612224374,
1612231143, 1612246048, 1612247226, 1612224152, 1612225636, 1612225680,
1612226918, 1612230888, 1612229722, 1612231029, 1612232966, 1612235409,
1612243995, 1612239696, 1612224377, 1612224673, 1612225550, 1612230662,
1612225925, 1612226277, 1612228586, 1612228955, 1612230212, 1612230609,
1612231489, 1612231203, 1612231672, 1612232375, 1612232803, 1612235187,
1612239704, 1612241649, 1612243227, 1612245199, 1612247272, 1612224156,
1612235413, 1612241818, 1612246933, 1612246943, 1612226930, 1612228955,
1612224621, 1612225487, 1612224892, 1612225423, 1612226965, 1612227417,
1612227461, 1612228444, 1612228607, 1612246048, 1612242008, 1612227178,
1612224353, 1612227245, 1612238334, 1612240935, 1612245439, 1612245785,
1612247135, 1612247346, 1612228163, 1612227404, 1612231121, 1612244002,
1612225727, 1612227245, 1612227204, 1612229226, 1612224203, 1612230998,
1612224377, 1612224081, 1612225814, 1612241976, 1612227178, 1612228356,
1612231029, 1612241740, 1612243041, 1612227231, 1612224426, 1612225683,
1612227404, 1612227346, 1612227397, 1612231022, 1612231105, 1612232381,
1612232803, 1612240947, 1612241755, 1612245899, 1612247021, 1612247220,
1612225301, 1612230673, 1612240884, 1612225661, 1612229282, 1612244136,
1612224582, 1612224662, 1612233387, 1612226208, 1612227329, 1612228048,
1612229782), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA,
200L), class = "data.frame")

library(data.table)

setDT(DF)

DF <- unique(DF[, session_id := cumsum(starttimes-shift(endtimes, n=1L, fill=first(starttimes), type="lag") >= 3600), by = UserId][
, c("count", "starttimes", "endtimes") := .(.N, min(starttimes), max(endtimes)), by = .(UserId, session_id)][
, session_id := NULL])

head(DF, 10)

> head(DF, 10)
UserId starttimes endtimes count
1: 0buhGq 2021-02-02 00:24:55 2021-02-02 01:04:45 1
2: 0buhGq 2021-02-02 02:28:25 2021-02-02 02:28:29 1
3: 0buhGq 2021-02-02 04:19:52 2021-02-02 06:29:06 7
4: 0ulN53 2021-02-02 00:02:39 2021-02-02 01:04:59 4
5: 0ulN53 2021-02-02 02:26:43 2021-02-02 05:27:53 2
6: 0ulN53 2021-02-02 04:41:43 2021-02-02 04:41:48 1
7: 0ulN53 2021-02-02 05:59:32 2021-02-02 05:59:46 1
8: 0wzMkn 2021-02-02 00:04:09 2021-02-02 00:04:46 1
9: 0wzMkn 2021-02-02 01:55:07 2021-02-02 01:55:13 1
10: 0wzMkn 2021-02-02 05:16:33 2021-02-02 05:16:48 1

基准:

Unit: milliseconds
expr min lq mean median uq max neval
shs 73.507502 73.507502 73.507502 73.507502 73.507502 73.507502 1
ismirsehregal 9.535402 9.535402 9.535402 9.535402 9.535402 9.535402 1

基准代码:

library(lubridate)
library(dplyr)
library(data.table)
library(microbenchmark)

DFdata.table <- DFdplyr <- DF

microbenchmark(shs = {
DFdplyr %>%
group_by(UserId) %>%
mutate(grp = {starttimes - lag(endtimes)} %>%
{. >= hours(1)} %>%
ifelse(is.na(.), 0, .) %>%
cumsum()
) %>%
group_by(UserId, grp) %>%
summarize(starttimes = first(starttimes),
endtimes = last(endtimes),
count = n())
},
ismirsehregal = {
setDT(DF)
DFdata.table <- unique(DFdata.table[, session_id := cumsum(starttimes-shift(endtimes, n=1L, fill=first(starttimes), type="lag") >= 3600), by = UserId][
, c("count", "starttimes", "endtimes") := .(.N, min(starttimes), max(endtimes)), by = .(UserId, session_id)][
, session_id := NULL])
}, times = 1L)

关于r - 在 R 中按组有条件地计算两个日期时间列之间的行,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/68822928/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com