gpt4 book ai didi

r - 使用 R 中的数据表进行线性回归

转载 作者:行者123 更新时间:2023-12-01 22:36:23 25 4
gpt4 key购买 nike

我有如下代码:

dt <- ddply(dt, .(SIC,FYEAR), function(x) disAccRegFunc(x));

disAccRegFunc <- function(dt)
{
#Compute Discreationary Accrual
model <- lm(ACNew ~ DSALENew + PPEGTNew + ROANew, data = dt);
dt$RES <- residuals(model);
dt$StudRES <- studres(model); #Calculation of studentized residuals
return(dt)
}

在这段代码中,我使用 ddply 函数对每个片段应用函数 disAccRegFunc。我已经使用数据表编写了代码,如下所示:

        dt[,disAccRegFunc(.SD),by=.by]

但我猜这个会更慢,因为它必须将每个片段的 .SD 加载到内存中。有什么方法可以使这段代码高效?谢谢。

这里是数据的快照:

structure(list(SIC = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1), FYEAR = c(1989, 1989, 1989, 1989, 1989, 1989, 1989,
1989, 1989, 1989, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990,
1990, 1990, 1990, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991,
1991, 1991, 1991, 1991, 1992, 1992, 1992, 1992, 1992, 1992, 1992,
1992, 1992, 1992, 1992, 1993, 1993, 1993, 1993, 1993, 1993, 1993,
1993, 1993, 1993, 1993, 1993, 1994, 1994, 1994, 1994, 1994, 1994,
1994, 1994, 1994, 1994, 1994, 1994, 1994, 1995, 1995, 1995, 1995,
1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1996, 1996,
1996, 1996, 1996, 1996, 1996, 1996, 1996, 1996, 1996, 1996, 1996,
1997, 1997, 1997, 1997, 1997), DSALENew = c(0.012602500023269,
0.291902040273809, 0.118617033965829, 0.0893175203093097, -0.00852049231260627,
0.0088329859025545, 0.209634378324404, 0.0830958123218592, 0.0738020724667918,
0.109482024510348, -0.0428304666755963, -0.16588866439072, 0.121627138869356,
0.0312269226711679, 0.101225809778869, -0.0275779376498801, 0.237572262729396,
-0.0121992630135952, -0.00510842445824787, 0.0576157552901739,
0.0855443732845379, 0.0872632057071098, -0.156267221848019, 0.0815859699707067,
-0.0430624961441175, 0.153418299584922, -8.85024282853663e-05,
0.133435797726111, -0.0184609333710255, -0.146181230961207, 0.0781112477932131,
0.0442291827447641, 0.00716417910447766, 0.0481930614039844,
0.0541753077810537, 0.0665705612789049, 0.118963433232041, 0.0294059514659054,
-0.632275504735391, -0.0502141257669623, 0.0958285084007296,
-0.0272426945849758, 0.085945755547728, 0.301778708148965, -0.0459045802393442,
0.0169764469498758, -0.0562287270251872, 0.0669855988183644,
0.0547472043521437, -0.067714725413364, 0.0617426162281712, 0.440429133206918,
0.0833729932633978, -0.0280735721200387, 0.0383118213480845,
-0.0194657903500448, 0.0626774121566572, 0.113601675703828, 0.30761369443025,
0.109305701022796, -0.030075679207274, 0.506415816050758, 0.125916995075369,
-0.0196319087485011, 0.0578873173006881, 0.0897072710103872,
-0.719538572458544, -0.478305381558564, 0.173499612656267, -0.0250104170612523,
-0.0119919744060999, -0.139720136759367, 0.0418697622544592,
0.0593189307945807, -0.505190248796772, 0.211265167747981, -0.0227537539511344,
-0.00186915887850474, -1.10693374422188, 0.0756610100079348,
0.0921928012282265, 0.159792224191246, 0.035416442965031, -0.153989830860226,
0.0147928615931956, 0.139226519337017, -0.0114289417966556, -0.0513681750613451,
0.307342194442119, 0.218928016906197, 0.198455485939094, 0.00664589823468326,
0.0398349694124342, 0.115581717451524, 0.584157679434734, 0.00032726458480456,
0.039702980735921, 0.200301694587959, 0.0416528925619835, 0.36358070267058
), PPEGTNew = c(0.953973882854457, 0.467086462417758, 0.638359582619649,
0.265758721056519, 0.689282635504338, 0.512784274929631, 0.500905533546401,
0.302835073280151, 0.392572501564782, 0.173497722981228, 0.969752361742529,
0.24137810910611, 0.684512774616975, 0.339065302247072, 0.70778363944283,
0.509976924121081, 0.570903219759228, 0.30435274734949, 0.355635184458544,
0.129286263007193, 0.895242451967063, 0.949408181259518, 0.336231706570326,
0.744086161679677, 0.305410574372262, 0.680226270692954, 0.513867224231965,
0.556038600187438, 0.256535686631187, 0.230172129041729, 0.205983930988692,
0.918203511012942, 0.82228855721393, 0.940475545033404, 0.784086638101383,
0.285761166391243, 0.66772390313165, 0.533457779729878, 0.584734315365566,
0.261132460991096, 0.322836113207432, 0.978359054565186, 0.794293765410356,
0.712110006643519, 0.914969230419874, 0.784890348299594, 0.540912185159973,
0.648134411028597, 0.551530590216312, 0.67236550759716, 0.30596000552919,
0.299670387008921, 1.10469551228341, 0.748951274604711, 0.447654169227617,
0.541347525306156, 0.887468150139335, 0.791733854056621, 0.647061829330608,
0.680562828917594, 0.598813088691675, 0.736365482650909, 0.388248067037364,
0.348369262297389, 1.16189687609724, 0.77435945860875, 0.534607065609229,
0.690092274533576, 0.254856986617654, 0.868432516383196, 0.499511832537101,
0.631419407141095, 0.71472891353351, 0.660164441929363, 0.612835925592257,
0.454140131190202, 0.396457871496262, 0.747663551401869, 0.663174114021572,
0.760667025007039, 0.269186299967778, 0.73279715312704, 0.868230788503526,
0.409463525993566, 0.663444240909767, 0.725598526703499, 0.691893877198289,
0.674017924092229, 0.598189952656007, 0.373202231080638, 0.775171207926563,
1.22658359293873, 0.805768957177408, 0.220152354570637, 0.591156955236735,
0.832888368327678, 0.431280406066905, 0.670032249971822, 0.687603305785124,
0.705083646971962), ACNew = c(0.0395945606344065, 0.0664252010367515,
-0.0301384111110581, 0.00254137886094096, -0.0265658063511183,
0.0166295681258759, 0.017084585460487, -0.2362842156747, 0.0046734821614855,
0.226591136287904, 0.0173969224465998, 0.17431895770919, 0.0477768543970679,
0.0700759573794704, -0.00381011122461684, 0.0327360752907108,
0.0270528951957744, 0.0692339617421051, 0.145256938943222, -0.012902437321042,
0.0731930466605672, 0.0408395008950757, 0.220439654644541, -0.0044062389767342,
0.275945462397434, 0.0790790446221029, -0.0311086035423097, -0.0284790074946835,
0.0561202541758336, 0.139409843285499, 0.0526540633186986, 0.0137318040290603,
-0.00597014925373134, 0.0544822559172043, -0.00638549410303916,
0.355472733026265, 0.0192527105080905, 0.0449544306577358, 0.06393425639316,
0.000712762608473587, 0.128074844252703, 0.0703969102602978,
0.0250088059175766, 0.0264988655878261, 0.0102073256579694, 0.0162804709314325,
0.848230088495575, 0.0312279981947237, 0.00828979750442554, 0.0349715025339877,
0.100239598212229, 0.187866678544612, 0.0916311961658344, -0.0464666021297193,
0.322474902683876, 0.1259809866102, -0.0111675601060711, 0.000467070106532058,
0.0368932038834952, 0.0957255259448906, 0.00633745304077966,
0.0479452127623754, 0.134842513781169, -0.0118286894983082, 0.0424276408533741,
0.0698772426817753, 0.341384282624369, 0.0355627399474641, 0.216942870596813,
-0.0462706920716694, 0.036495723805627, 0.147967995488351, -0.0337729047054296,
-0.0368829266668886, 0.0225649771950469, 0.0382707722793551,
0.0477004248403114, -0.00872274143302181, 0.551001540832049,
0.0122219662648135, 0.156750507022499, 0.0566504760333187, -0.00385661619957765,
0.0477623445351992, 0.00240292197259525, 0.122283609576427, 0.0188393898803094,
0.0593513325987576, 0.565895582777576, 0.285157733474218, 0.155034241585313,
-0.149117341640706, -0.017095841039503, -0.126246537396122, -0.2505725078781,
-0.0850198942418657, -0.00165680116879791, 0.0528700778192669,
-0.044297520661157, -0.161513388917902), ROANew = c(0.163292659090274,
-0.226041735894198, 0.0452896804759701, -0.064034058820974, 0.0921216374505778,
0.0575910680846553, 0.0444595485158336, -0.114887086165315, 0.0114889769803185,
-0.0696064274871339, 0.0740157108076805, -0.186354769037832,
0.0513349954186111, -0.235757991349298, 0.0756937929151935, 0.0442740147504638,
0.0495950382782889, -0.0935696049702564, -0.0920108683581228,
-0.0784272644737761, 0.070448307410796, 0.0760680791941646, -0.470699895903496,
0.0512729475637624, -0.368708742056882, 0.0996120773321018, 0.0328344008938745,
0.0483339285972892, -0.176730866139797, -0.172129041729365, -0.0340218336790418,
0.0206509300902148, 0.0624875621890547, 0.0590244519545298, 0.0221510307432041,
-0.425699987107814, 0.132221178502936, 0.0329197320742286, -0.0736185481498404,
-0.22114647865573, -0.185582948611794, 0.0310091128169936, 0.0352236703064459,
0.0579615678704388, 0.0335740400118082, 0.0240287571367621, -1.15527569775357,
0.11279120338079, 0.0641595786019602, -0.0181738389010379, -0.290547389761784,
-0.220156025300024, 0.0394543687368033, 0.0100032268473701, -0.58840401557058,
0.0152936519406099, 0.0802071603157223, 0.0187610177761631, -1.2452733776188,
0.171864696070445, 0.0332552948222355, -0.0172610138205074, -0.513469255546958,
0.0195868685219487, 0.0165811580607801, 0.0346238589864652, -0.301730353280461,
0.0856334613053142, -0.533258396245044, 0.0846717678699951, 0.0380945477528242,
-0.990166014592365, 0.1437311990952, 0.00963128169279762, 0.0101235097874349,
-0.365303235587282, -0.0863005431502716, 0.029595015576324, -1.14576271186441,
0.0828918068033479, -0.572186735912356, 0.144467323379801, 0.0397204624360213,
-0.0796039075586653, 0.0361253877308557, 0.16427255985267, 0.0364040962710107,
-0.0108940888920407, -0.601575652723907, -0.243145420678573,
0.0495410170479382, -0.144963655244029, 0.0926992934035187, -0.0573407202216066,
0.119598363703979, 0.096224400158465, -0.0436491798834214, 0.0647035511327484,
0.160661157024793, 0.0546001332589004)), .Names = c("SIC", "FYEAR",
"DSALENew", "PPEGTNew", "ACNew", "ROANew"), row.names = c(NA,
100L), class = "data.frame")

最佳答案

最好重写你的函数并利用:=:

library(MASS)
disAccRegFunc <- function(dt)
{
model <- lm(ACNew ~ DSALENew + PPEGTNew + ROANew, data = dt);
RES <- residuals(model);
StudRES <- studres(model); #Calculation of studentized residuals
list(RES, StudRES)
}

DT[,c("Res", "StudRes") := disAccRegFunc(.SD), by = list(SIC, FYEAR)]

这可以通过使用 lm.fit 并“手动”计算学生化残差来加快速度。

PS:为什么您会认为 ddply 可能比 data.table 更快,这超出了我的理解范围。

关于r - 使用 R 中的数据表进行线性回归,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/22588673/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com