lapply
https://onesixx.com/lapply/
https://onesixx.com/lapply-vs-docall/
https://onesixx.com/bind/
예제데이터
DD <- data.table(iris) names(DD) <- c("SL","SW","PL","PW","Species") # SL SW PL PW Species # 1: 5.1 3.5 1.4 0.2 setosa # 2: 4.9 3.0 1.4 0.2 setosa # 3: 4.7 3.2 1.3 0.2 setosa # 4: 4.6 3.1 1.5 0.2 setosa # 5: 5.0 3.6 1.4 0.2 setosa # --- # 146: 6.7 3.0 5.2 2.3 virginica # 147: 6.3 2.5 5.0 1.9 virginica # 148: 6.5 3.0 5.2 2.0 virginica # 149: 6.2 3.4 5.4 2.3 virginica # 150: 5.9 3.0 5.1 1.8 virginica DD_list <- DD %>% split(DD$Species) names(DD_list) <- str_c("cycle",1:3) names(DD_list) %>% map( ~mutate(DD_list[[.]], new_column=.) ) # transform # [[1]] # SL SW PL PW Species new_column # 1: 5.1 3.5 1.4 0.2 setosa cycle1 #... # 50: 5.0 3.3 1.4 0.2 setosa cycle1 # SL SW PL PW Species new_column # # [[2]] # SL SW PL PW Species new_column # 1: 7.0 3.2 4.7 1.4 versicolor cycle2 # 2: 6.4 3.2 4.5 1.5 versicolor cycle2 #... # 50: 5.7 2.8 4.1 1.3 versicolor cycle2 # SL SW PL PW Species new_column # # [[3]] # SL SW PL PW Species new_column # 1: 6.3 3.3 6.0 2.5 virginica cycle3 #... # 49: 6.2 3.4 5.4 2.3 virginica cycle3 # 50: 5.9 3.0 5.1 1.8 virginica cycle3 # SL SW PL PW Species new_column
참고 apply – 1 row
> apply(DD[,1:4],1,mean) [1] 2.550 2.350 2.550 2.425 2.225 2.700 ... [150] 3.950
참고 apply – 2 column
> apply(DD[,1:4],2,mean) SL SW PL PW 5.843333 3.057333 3.758000 1.199333 > sapply(DD[,1:4], mean) > sapply(DD[,1:4], mean, simplify=T) #same SL SW PL PW 5.843333 3.057333 3.758000 1.199333
sapply는 lapply를 쉽게 사용할수 있게 wrapping한거
> sapply(DD[,1:4], mean, simplify=F) # sapply는 wrapper of lapply, simple version > lapply(DD[,1:4], mean) #same $SL [1] 5.843333 $SW [1] 3.057333 $PL [1] 3.758 $PW [1] 1.199333
lapply기본원리
> lapply function (X, FUN, ...) { FUN <- match.fun(FUN) if (!is.vector(X) || is.object(X)) X <- as.list(X) .Internal(lapply(X, FUN)) }
- X가 어떤 Object든지 list로 만들어서,
- Column기준으로 Loop하면서 Fun을 적용한다.
- 결과는 항상 List 이다.
lapply(c(1:4), runif) lapply(as.list(c(1:4)), runif) #same
참고: as.list
> c("A","B") %>% as.list() [[1]] [1] "A" [[2]] [1] "B" > c("A","B") %>% as.list() %>% as.list() %>% as.list() %>% as.list() %>% as.list() [[1]] [1] "A" [[2]] [1] "B"
Column별로 List를 만들어서 동시에 함수적용
> lapply(DD, function(x){ x }) $SL [1] 5.1 4.7 5.0 4.6 4.4 5.4 ... [150] 5.9 $SW [1] 3.5 3.2 3.6 3.4 2.9 3.7 ... [150] 3.0 $PL [1] 1.4 1.3 1.4 1.4 1.4 1.5 ... [150] 5.1 $PW [1] 0.2 0.2 0.2 0.3 0.2 0.2 ... [150] 1.8 $Species [1] setosa setosa setosa ... [150] virginica Levels: setosa versicolor virginica $Cycle [1] 1 1 1 1 1 1 1 1 1 ... [150] 2
lapply의 Argument
- vector 자체 또는 data.table
- (vector 자체 대신에) vector의 ColumnName 또는 Index
- Iteration Number
###### vector 자체 또는 data.table dt=DD[,1:4] lapply(dt, function(x){ mean(x) }) # (vector 자체 대신에) vector의 컬럼명이나 인덱스를 넘긴다. ###### vector의 컬럼명 or vector Index vec = colnames(DD[,1:4]) lapply(vec, function(x){ mean(DD[[x]]) }) #DD[,..x] %>% unlist() %>% mean() #result is UN-named list ###### iteration number i or vector Index i= seq_along(DD[,1:4]) lapply(i, function(x){ mean(DD[[x]]) })
split()
활용하여 List만들기 => Group별로 자르기
x object를 f 를 기준으로 row기준으로 데이터를 나눠준다.
> args(split) function (x, f, drop = FALSE, ...) NULL
X <- DD[ ,unique(Cycle)] CycleList <- lapply(X, function(x){x}) [[1]] [1] 1 [[2]] [1] 2 #same using for문 X <- DD[ ,unique(Cycle)] CycleList <- vector("list", length(X)) #empty List 만들기 for(i in seq_along(X)){ CycleList[[i]] <- X[i] } names(CycleList) <- X #naming $`1` [1] 1 $`2` [1] 2
#named list iSpecies <- split(DD, DD$Species) lapply(iSpecies, function(x){ temp <- x nm <- temp[,unique(Species)] names(temp) <- str_c(names(temp),"_",nm) temp }) #Un-named list iSpecies <- split(DD, DD$Species) lapply(seq_along(iSpecies), function(x){ temp <- iSpecies[[x]] nm <- temp[,unique(Species)] names(temp) <- str_c(names(temp),"_",nm) temp }) #same above iSpecies <- DD[, unique(Species)] %>% as.vector() lapply(iSpecies, function(x){ temp <- DD[Species==x,] names(temp) <- str_c(names(temp),"_",x) temp }) $setosa SL_setosa SW_setosa PL_setosa PW_setosa Species_setosa Cycle_setosa 1: 5.1 3.5 1.4 0.2 setosa 1 2: 4.7 3.2 1.3 0.2 setosa 1 ... 50: 5.0 3.3 1.4 0.2 setosa 2 SL_setosa SW_setosa PL_setosa PW_setosa Species_setosa Cycle_setosa $versicolor SL_versicolor SW_versicolor PL_versicolor PW_versicolor Species_versicolor Cycle_versicolor 1: 7.0 3.2 4.7 1.4 versicolor 1 2: 6.9 3.1 4.9 1.5 versicolor 1 ... 50: 5.7 2.8 4.1 1.3 versicolor 2 SL_versicolor SW_versicolor PL_versicolor PW_versicolor Species_versicolor Cycle_versicolor $virginica SL_virginica SW_virginica PL_virginica PW_virginica Species_virginica Cycle_virginica 1: 6.3 3.3 6.0 2.5 virginica 1 2: 7.1 3.0 5.9 2.1 virginica 1 ... 50: 5.9 3.0 5.1 1.8 virginica 2 SL_virginica SW_virginica PL_virginica PW_virginica Species_virginica Cycle_virginica ","showLines":false,"wrapLines":false,"highlightStart":"1","highlightEnd":"2
Example
type1
DDSpStat_Cy <- lapply(unique(DD$Cycle),function(x){ #x=unique(DD$Cycle)[1] DDCy <- DD[DD$Cycle == x,] DDSp <- split(DDCy,DDCy$Species) DDSpStat <- lapply(1:length(DDSp),function(y){ #y=1 Tmp1 <- DDSp[[y]] names(Tmp1) <- str_c(names(Tmp1),names(DDSp)[y],sep="_") STAT <- c("min","max") DDStat <- lapply(STAT, function(z){ #z=STAT[1] varnm <- paste0(c("SL","SW","PL","PW"),"_") vidx <- map_int(varnm, function(v){ #v=varnm[1] str_which(names(Tmp1), v) }) res <- sapply(Tmp1[,..vidx],z) names(res) <- str_c(names(res),"_",z) res }) %>% unlist() }) %>% unlist() }) DDSpStat_Cy result <- do.call("rbind", DDSpStat_Cy) result %>% dim ### SL_setosa_min SW_setosa_min PL_setosa_min PW_setosa_min SL_setosa_max SW_setosa_max PL_setosa_max PW_setosa_max SL_versicolor_min SW_versicolor_min PL_versicolor_min PW_versicolor_min [1,] 4.4 2.9 1.0 0.1 5.8 4.1 1.9 0.4 5.0 2.0 3.0 1 [2,] 4.3 2.3 1.1 0.1 5.7 4.4 1.7 0.6 4.9 2.3 3.3 1 SL_versicolor_max SW_versicolor_max PL_versicolor_max PW_versicolor_max SL_virginica_min SW_virginica_min PL_virginica_min PW_virginica_min SL_virginica_max SW_virginica_max PL_virginica_max [1,] 7.0 3.3 4.9 1.8 4.9 2.5 4.5 1.4 7.7 3.4 6.9 [2,] 6.7 3.4 5.1 1.7 5.6 2.2 4.9 1.5 7.9 3.8 6.7 PW_virginica_max [1,] 2.5 [2,] 2.5
type2
DDCy <- DD %>% split(by="Cycle") names(DDCy) <- NULL DDSpStat_Cy <- DDCy %>% map(function(x){ #x=DDCy[[1]] DDSp <- x %>% split(by="Species") names(DDSp) <- NULL DDSpStat <- DDSp %>% map(function(y){ #y=DDSp[[1]] names(y) <- str_c(names(y), unique(y[,Species]), sep="_") STAT <- c("min","max") DDStat <- map(STAT, function(z){ #z=STAT[1] v_nm <- str_c(c("SL","SW","PL","PW"),"_") v_idx <- map_int(v_nm, function(v){ #v=v_nm[1] str_which(names(y), v) }) res <- sapply(y[,..v_idx], z) names(res) <- str_c(names(res), z, sep="_") res }) %>% unlist() }) %>% unlist() } )
type3
groupCol1 = c("Cycle") groupCol2 = c("Species") varNm = c("SL","SW","PL","PW") STAT = c("min","max") DDCy <- DD %>% split(by=groupCol1) names(DDCy) <- NULL DDSpStat_Cy <- DDCy %>% map(function(x){ #x=DDCy[[1]] DDSp <- x %>% split(by=groupCol2) names(DDSp) <- NULL DDSpStat <- DDSp %>% map(function(y){ #y=DDSp[[1]] names(y) <- str_c(names(y), unlist(unique(y[,..groupCol2])), sep="_") DDStat <- map(STAT, function(z){ #z=STAT[1] v_nm <- str_c(varNm,"_") v_idx <- map_int(v_nm, function(v){ str_which(names(y), v) }) res <- map_dbl(y[,..v_idx], get(z)) names(res) <- str_c(names(res), z, sep="_") res }) %>% unlist() }) %>% unlist() }) do.call(rbind, DDSpStat_Cy)