隔江千里
现在可以在data.table中实现此功能(从版本1.8.11开始)。所以我想有一个data.table解决方案。应用于此问题:library(data.table)set.seed(1234)DT <- data.table(x=rep(c(1,2,3),each=1e6), y=c("A","B"), v=sample(1:100,12))out <- DT[,list(SUM=sum(v)),by=list(x,y)]# edit (mnel) to avoid setNames which creates a copy# when calling `names<-` inside the functionout[, as.list(setattr(SUM, 'names', y)), by=list(x)]}) x A B1: 1 26499966 281666772: 2 26499978 281666733: 3 26500056 28166650这与DWin的方法具有相同的结果:tapply(DT$v,list(DT$x, DT$y), FUN=sum) A B1 26499966 281666772 26499978 281666733 26500056 28166650而且,它很快:system.time({ out <- DT[,list(SUM=sum(v)),by=list(x,y)] out[, as.list(setattr(SUM, 'names', y)), by=list(x)]})## user system elapsed ## 0.64 0.05 0.70 system.time(tapply(DT$v,list(DT$x, DT$y), FUN=sum))## user system elapsed ## 7.23 0.16 7.39 更新为了使该解决方案也适用于非平衡数据集(即某些组合不存在),您必须首先在数据表中输入这些组合:library(data.table)set.seed(1234)DT <- data.table(x=c(rep(c(1,2,3),each=4),3,4), y=c("A","B"), v=sample(1:100,14))out <- DT[,list(SUM=sum(v)),by=list(x,y)]setkey(out, x, y)intDT <- expand.grid(unique(out[,x]), unique(out[,y]))setnames(intDT, c("x", "y"))out <- out[intDT]out[, as.list(setattr(SUM, 'names', y)), by=list(x)]摘要结合上面的评论,这是一线解决方案:DT[, sum(v), keyby = list(x,y)][CJ(unique(x), unique(y)), allow.cartesian = T][, setNames(as.list(V1), paste(y)), by = x]也可以很容易地修改它,使其不仅具有总和,例如:DT[, list(sum(v), mean(v)), keyby = list(x,y)][CJ(unique(x), unique(y)), allow.cartesian = T][, setNames(as.list(c(V1, V2)), c(paste0(y,".sum"), paste0(y,".mean"))), by = x]# x A.sum B.sum A.mean B.mean#1: 1 72 123 36.00000 61.5#2: 2 84 119 42.00000 59.5#3: 3 187 96 62.33333 48.0#4: 4 NA 81 NA 81.0