- html - 出于某种原因,IE8 对我的 Sass 文件中继承的 html5 CSS 不友好?
- JMeter 在响应断言中使用 span 标签的问题
- html - 在 :hover and :active? 上具有不同效果的 CSS 动画
- html - 相对于居中的 html 内容固定的 CSS 重复背景?
我正在使用以下代码按列汇总我的数据
library(data.table, warn.conflicts = FALSE)
library(lubridate, warn.conflicts = FALSE)
################
## PARAMETERS ##
################
# Set path of major source folder for raw transaction data
in_directory <- "C:/Users/NAME/Documents/Raw Data/"
# List names of sub-folders (currently grouped by first two characters of CUST_ID)
in_subfolders <- list("AA-CA", "CB-HZ")
# Set location for output
out_directory <- "C:/Users/NAME/Documents/YTD Master/"
out_filename <- "OUTPUT.csv"
# Set beginning and end of date range to be collected - year-month-day format
date_range <- interval(as.Date("2017-01-01"), as.Date("2017-01-31"))
# Enable or disable filtering of raw files to only grab items bought within certain months to save space.
# If false, all files will be scanned for unique items, which will take longer and be a larger file.
date_filter <- TRUE
##########
## CODE ##
##########
starttime <- Sys.time()
mastertable <- NULL
for (j in 1:length(in_subfolders)) {
subfolder <- in_subfolders[j]
sub_directory <- paste0(in_directory, subfolder, "/")
## IMPORT DATA
in_filenames <- dir(sub_directory, pattern =".txt")
for (i in 1:length(in_filenames)) {
# Default value provided for when fast filtering is disabled.
read_this_file <- TRUE
# To fast filter the data, we choose to include or exclude an entire file based on the date of its first line.
# WARNING: This is only a valid method if filtering by entire months, since that is the amount of data housed in each file.
if (date_filter) {
temptable <- fread(paste0(sub_directory, in_filenames[i]), colClasses=c(CUSTOMER_TIER = "character"),
na.strings = "", nrows = 1)
temptable[, INVOICE_DT := as.Date(INVOICE_DT)]
# If date matches, set read flag to TRUE. If date does not match, set read flag to FALSE.
read_this_file <- temptable[, INVOICE_DT] %within% date_range
}
if (read_this_file) {
print(Sys.time()-starttime)
print(paste0("Reading in ", in_filenames[i]))
temptable <- fread(paste0(sub_directory, in_filenames[i]), colClasses = c(CUSTOMER_TIER = "character"),
na.strings = "")
temptable <- temptable[,lapply(.SD, sum), by = .(CUST_ID),
.SDcols = c("Ext Sale")]
# Combine into full list
mastertable <- rbindlist(list(mastertable, temptable), use.names = TRUE)
# Release unneeded memory
rm(temptable)
}
}
}
# Save Final table
print("Saving master table")
fwrite(mastertable, paste0(out_directory, out_filename))
rm(mastertable)
print(Sys.time()-starttime)
CUST_ID Ext Sale
AK0010001 209.97
CO0020001 1540.3
date_range <- interval(as.Date("2017-01-01"), as.Date("2017-02-28"))
CUST_ID Ext Sale
AK0010001 209.97
AK0010001 217.833
CO0020001 1540.3
CO0010001 -179.765
INVOICE_DT,BRANCH_CODE,INVOICE_NO,INV_SEQ_NO,INV_ITEM_ID,ITEM_DESCR,STD_ITEM,PRIVATE_LABEL,CATEGORY_PATH1,CATEGORY_PATH2,CUST_ID,CUSTOMER_TIER,IS_VENDING,SALE_PRICE,TOTAL_COST,POS_COST,CE100,CE110,CE120,CE200,CORP_PRICE,QTY_SOLD,PACKSLIP_WHSL,PRICING_GROUP,PGG_MIN_PRICE,PGY_MIN_PRICE,PGR_MIN_PRICE,Ext Sale,Ext Total Cost
2017-01-27,AK001,AK0016997,4,12772-00079,"3.75"""""""" 4.12"""""""" HOSE OD",N,N,08.5-Fleet & Automotive,01.6-DOT Hose & Tubing,AK0010001,Tier 3,No,42.74,22.438335,22.438335,21.37,,,0,,3,,PGR,168.2875125,134.63001,112.191675,128.22,67.315005
2017-01-27,AK001,AK0016997,3,12772-00022,"2.5"""""""" 2.87"""""""" HOSE OD C",N,N,08-Hydraulics & Pneumatics,02-Hose and Hose Reels,AK0010001,Tier 3,No,27.25,14.143396,14.143396,13.47,,,0,,3,,PGR,106.07547,84.860376,70.71698,81.75,42.430188
INVOICE_DT,BRANCH_CODE,INVOICE_NO,INV_SEQ_NO,INV_ITEM_ID,ITEM_DESCR,STD_ITEM,PRIVATE_LABEL,CATEGORY_PATH1,CATEGORY_PATH2,CUST_ID,CUSTOMER_TIER,IS_VENDING,SALE_PRICE,TOTAL_COST,POS_COST,CE100,CE110,CE120,CE200,CORP_PRICE,QTY_SOLD,PACKSLIP_WHSL,PRICING_GROUP,PGG_MIN_PRICE,PGY_MIN_PRICE,PGR_MIN_PRICE,Ext Sale,Ext Total Cost
2017-02-28,AK001,AK0017107,1,12772-00307,3-WAY MALE HOUSING,N,N,09-Electrical,05.5-Terminals and Wire Connectors,AK0010001,Tier 3,No,95.21,74.591453,74.591453,71.04,,,0,,1,,PGG,0,0,0,95.21,74.591453
2017-02-28,AK001,AK0017105,3,99523968,PC58570 1/2 PRS BALL,Y,N,,,AK0010001,Tier 3,No,24.5246,12.356039,12.356039,11.767743,,,0,,5,,PGG,0,0,0,122.623,61.780195
INVOICE_DT,BRANCH_CODE,INVOICE_NO,INV_SEQ_NO,INV_ITEM_ID,ITEM_DESCR,STD_ITEM,PRIVATE_LABEL,CATEGORY_PATH1,CATEGORY_PATH2,CUST_ID,CUSTOMER_TIER,IS_VENDING,SALE_PRICE,TOTAL_COST,POS_COST,CE100,CE110,CE120,CE200,CORP_PRICE,QTY_SOLD,PACKSLIP_WHSL,PRICING_GROUP,PGG_MIN_PRICE,PGY_MIN_PRICE,PGR_MIN_PRICE,Ext Sale,Ext Total Cost
2017-01-31,CO002,CO0023603,19,13117-00095,8-32X5/16 BHSCS MAG,N,N,18-Work Order Parts,Finished Products,CO0020001,Tier 3,No,0.1858,0.037528,0.037528,0.01833,,,0,,6000,,PGG,0,0,0,1114.8,225.168
2017-01-31,CO002,CO0023603,20,13117-00186,"#8-16X3/4"""""""" 6-LOBE PA",N,N,01-Fasteners,03-Screws,CO0020001,Tier 3,No,0.0851,0.029652,0.029652,,,,0,,5000,,PGG,0,0,0,425.5,148.26
INVOICE_DT,BRANCH_CODE,INVOICE_NO,INV_SEQ_NO,INV_ITEM_ID,ITEM_DESCR,STD_ITEM,PRIVATE_LABEL,CATEGORY_PATH1,CATEGORY_PATH2,CUST_ID,CUSTOMER_TIER,IS_VENDING,SALE_PRICE,TOTAL_COST,POS_COST,CE100,CE110,CE120,CE200,CORP_PRICE,QTY_SOLD,PACKSLIP_WHSL,PRICING_GROUP,PGG_MIN_PRICE,PGY_MIN_PRICE,PGR_MIN_PRICE,Ext Sale,Ext Total Cost
2017-02-03,CO001,CO0019017,1,MN2550000A20000,M6-1.0 HEX NUT A-2,Y,N,01-Fasteners,04-Nuts,CO0010001,NA,No,0.0313,0.00767,0.00767,0.006215,0.000593,,0.001241,,-50,0.1058,,,,,-1.565,-0.3835
2017-02-16,CO001,CO0019018,1,11516769,RS37518BlkRndSpacer,Y,N,01.5-Hardware,Electronic Hardware,CO0010001,NA,No,0.0396,0.011245,0.011245,0.01071,,,0,,-4500,0.0543,,,,,-178.2,-50.6025
最佳答案
OP 想知道为什么结果没有被合并为 CUST_ID
如果处理的数据超过一个月。
原因是每个月的文件都被一个一个地读入和聚合,但需要最后的聚合步骤来合并所有月份。
下面的代码是双 for
的简化替换循环。我省略了测试“快速过滤”的代码。
第一部分创建要处理的文件列表。第二部分进行处理。
# create vector of filenames to be processed
in_filenames <- list.files(
file.path(in_directory, in_subfolders),
pattern = "\\.txt$",
full.names = TRUE,
recursive = TRUE)
# read and aggregate each file separately
mastertable <- rbindlist(
lapply(in_filenames, function(fn) {
# code for "fast filter" test goes here
message("Reading in ", fn)
temptable <- fread(fn,
colClasses = c(CUSTOMER_TIER = "character"),
na.strings = "")
# aggregate
temptable[, lapply(.SD, sum), by = .(CUST_ID), .SDcols = c("Ext Sale")]
})
)[
# THIS IS THE MISSING STEP:
# second aggregation for overall totals
, lapply(.SD, sum), by = .(CUST_ID), .SDcols = c("Ext Sale")]
Processing file: Raw Data/AA-CA/AA-CA 2017-01.txt
Processing file: Raw Data/AA-CA/AA-CA 2017-02.txt
Processing file: Raw Data/CB-HZ/CB-HZ 2017-01.txt
Processing file: Raw Data/CB-HZ/CB-HZ 2017-02.txt
mastertable
CUST_ID Ext Sale
1: AK0010001 427.803
2: CO0020001 1540.300
3: CO0010001 -179.765
data.table
的链接这里使用表达式。
### MODIFIED
.
library(data.table, warn.conflicts = FALSE)
library(lubridate, warn.conflicts = FALSE)
################
## PARAMETERS ##
################
# Set path of major source folder for raw transaction data
in_directory <- "Raw Data" ### MODIFIED
# List names of sub-folders (currently grouped by first two characters of CUST_ID)
in_subfolders <- list("AA-CA", "CB-HZ")
# Set location for output
out_directory <- "YTD Master" ### MODIFIED
out_filename <- "OUTPUT.csv"
# Set beginning and end of date range to be collected - year-month-day format
date_range <- interval(as.Date("2017-01-01"), as.Date("2017-02-28")) ### MODIFIED
# Enable or disable filtering of raw files to only grab items bought within certain months to save space.
# If false, all files will be scanned for unique items, which will take longer and be a larger file.
date_filter <- TRUE
##########
## CODE ##
##########
starttime <- Sys.time()
# create vector of filenames to be processed
in_filenames <- list.files(
file.path(in_directory, in_subfolders),
pattern = "\\.txt$",
full.names = TRUE,
recursive = TRUE)
# read and aggregate each file separetely
mastertable <- rbindlist(
lapply(in_filenames, function(fn) {
# code for fast filter test goes here
message("Processing file: ", fn)
temptable <- fread(fn,
colClasses = c(CUSTOMER_TIER = "character"),
na.strings = "")
# aggregate by month
temptable[, lapply(.SD, sum), by = .(CUST_ID), .SDcols = c("Ext Sale")]
})
)[
# second aggregation overall
, lapply(.SD, sum), by = .(CUST_ID), .SDcols = c("Ext Sale")]
# Save Final table
print("Saving master table")
fwrite(mastertable, paste0(out_directory, out_filename))
# rm(mastertable) ### MODIFIED
print(Sys.time()-starttime)
INVOICE_DT
在给定的
date_range
内我的方法过滤文件名。文件名包含 ISO 8601 格式的年月。
date_range
构造了一个允许的年月字符串向量。 .仅选择包含允许的年月字符串之一的文件名进行进一步处理。
date-range
可能在一个月的中间开始或结束,我们还需要过滤每个处理文件的行。 OP 的代码中缺少此步骤。
library(data.table, warn.conflicts = FALSE)
library(magrittr) ### MODIFIED
# library(lubridate, warn.conflicts = FALSE) ### MODIFIED
################
## PARAMETERS ##
################
# Set path of major source folder for raw transaction data
in_directory <- "Raw Data" ### MODIFIED
# List names of sub-folders (currently grouped by first two characters of CUST_ID)
in_subfolders <- list("AA-CA", "CB-HZ")
# Set location for output
out_directory <- "YTD Master" ### MODIFIED
out_filename <- "OUTPUT.csv"
# Set beginning and end of date range to be collected - year-month-day format
date_range <- c("2017-01-01", "2017-02-14") ### MODIFIED
# Enable or disable filtering of raw files to only grab items bought within certain months to save space.
# If false, all files will be scanned for unique items, which will take longer and be a larger file.
# date_filter <- TRUE ### MODIFIED
##########
## CODE ##
##########
starttime <- Sys.time()
# create vector of filenames to be processed
in_filenames <- list.files(
file.path(in_directory, in_subfolders),
pattern = "\\.txt$",
full.names = TRUE,
recursive = TRUE)
# filter filenames, only
selected_in_filenames <-
seq(as.Date(date_range[1]),
as.Date(date_range[2]), by = "1 month") %>%
format("%Y-%m") %>%
lapply(function(x) stringr::str_subset(in_filenames, x)) %>%
unlist()
# read and aggregate each file separetely
mastertable <- rbindlist(
lapply(selected_in_filenames, function(fn) {
message("Processing file: ", fn)
temptable <- fread(fn,
colClasses = c(CUSTOMER_TIER = "character"),
na.strings = "")
# aggregate file but filtered for date_range
temptable[INVOICE_DT %between% date_range,
lapply(.SD, sum), by = .(CUST_ID, QTR = quarter(INVOICE_DT)),
.SDcols = c("Ext Sale")]
})
)[
# second aggregation overall
, lapply(.SD, sum), by = .(CUST_ID, QTR), .SDcols = c("Ext Sale")]
# Save Final table
print("Saving master table")
fwrite(mastertable, file.path(out_directory, out_filename))
# rm(mastertable) ### MODIFIED
print(Sys.time()-starttime)
mastertable
CUST_ID QTR Ext Sale
1: AK0010001 1 209.970
2: CO0020001 1 1540.300
3: CO0010001 1 -1.565
date_range <- c("2017-01-01", "2017-02-14")
现在结束二月中旬。
关于在日期过滤器中使用多个月份时,行不合并 R 中的重复项,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/51387157/
我正在用 javascript 制作日历,我不想在 中显示当前日期、日期名称和月份名称它在我的函数内 Kalender()但由于某种原因,如果我执行函数 volgende() (下个月)它更改 中
有一个输入标签应用了数据掩码。 这允许输入数据,例如 02/12/2014 或 13/05/2014。但不接受 1/1/2013 或 13/5/2014。请帮忙!!! 最佳答案 根据jQuery
我正在尝试创建一个函数,它将接受三个输入变量(开始日期、结束日期、粒度),并将输出一个包含所有日期的数组。输出将根据输入以每日或每月为粒度。例如,如果我查看 2015 年 1 月 - 2015 年 2
我有这段代码可以从数据库中获取一个字段: $end_date=$row1['end_date']; 如果我打印它,它会给我这样的信息:25-09-2012我需要的是获取月份值、年份和日期。类似于: $
我有一个事件表,其中 X 为频率(每 X 个月一次),第一个开始日期和结束日期如下: 如何复制每一行并将其粘贴到新工作表中,并根据 X 和月份日期的增量为每一行添加附加行,如下所示: 最佳答案 这里是
我将以下格式的日期存储在Mailed_Date列中 Mon, 09/20/10 01:04 PM 我使用了一个serde(csv-serde-1.1.2-0.11.0-all.jar)从csv文件中获
我想根据日期和月份值对数组进行排序。我正在使用此代码 NSSortDescriptor *descriptor=[[NSSortDescriptor alloc]initWithKey:@"self
我需要当前年份,月份和日期为3个不同的变量。下面的代码给出了日期时间 val now = Calendar.getInstance().getTime() 2016年9月29日星期四18:27:38
我无法获得正确的查询结果 我正在查询获取在相应月份注册了多少用户。到目前为止,我提出了以下查询,它允许我获得该结果,但没有显示没有用户注册的月份 SELECT YEAR(c.created_at) a
在 Programming in the Key of C# 中,作者给出了一个示例(附源代码),说明如何将日期(年、月、日 -- 数字)打包为 32 位整数。在示例中,作者将信息打包如下: int
在表中,我有一列名为“service_time”的列,它是服务的开始日期,另一列“times_year”是每年应完成服务的次数。 我遇到的问题是如何在 -> (thisMonth == service
使用:Python 3.6, Pandas 0.22 我有一个 .csv 文件,我需要从中获取基于月份和位置的平均值。这是数据中的一行,还有更多具有多个位置和日期的行: 名称日期雪 大急流城杰拉尔德福
我想循环遍历给定开始时间以来的月份,并打印第一天和最后一天。我可以手动跟踪它是哪个月份和年份,并使用 calendar.monthrange(year, Month) 来获取天数......但这是最好
我正在做试卷的最后一题,但我已经迷失在算法的创建过程中。我的定义图看起来不错,但我就是无法确定计算月份方面的顺序。 题目如下: A file of transaction records includ
我正在获取系统当前日期并尝试在 TextView 中显示它。 尝试下面的代码后 private OnClickListener listener1 = new OnClickListener() {
我正在使用以下格式来格式化 DateTime: DateTime CusDate = dateTimePicker1.Value; string Date = CusDate.ToString("dd
废话不多少,上代码 复制代码 代码如下: // 获取指定日期所在星期的开始时间与结束时间 function getWeekRange($date){ &nb
今天我运行了自动化测试,并想知道为什么我收到了有关某些特定日期内容的错误。 事实证明,设置固定的 UTC 月份不再有效。但昨天确实如此。据我所知,没有任何变化。 我尝试运行以下代码 var d = n
我有一个包含两个日期字段的 Grails 域 Date updated Date created 我想根据月、小时或年的更新时间来计算行数。我怎样才能做到这一点。互联网上显示的方法不起作用。我正在使用
是否有任何内置函数可用于数据框对象以生成类 Date 时间序列上的变量以创建星期几、月份、年份、年份中的星期,等在 R 中? 基础包中的weekdays、months、quarters函数生成文本输出
我是一名优秀的程序员,十分优秀!