lapply
https://onesixx.com/lapply/
https://onesixx.com/lapply-vs-docall/
https://onesixx.com/bind/
예제데이터
DD <- data.table(iris)
names(DD) <- c("SL","SW","PL","PW","Species")
# SL SW PL PW Species
# 1: 5.1 3.5 1.4 0.2 setosa
# 2: 4.9 3.0 1.4 0.2 setosa
# 3: 4.7 3.2 1.3 0.2 setosa
# 4: 4.6 3.1 1.5 0.2 setosa
# 5: 5.0 3.6 1.4 0.2 setosa
# ---
# 146: 6.7 3.0 5.2 2.3 virginica
# 147: 6.3 2.5 5.0 1.9 virginica
# 148: 6.5 3.0 5.2 2.0 virginica
# 149: 6.2 3.4 5.4 2.3 virginica
# 150: 5.9 3.0 5.1 1.8 virginica
DD_list <- DD %>% split(DD$Species)
names(DD_list) <- str_c("cycle",1:3)
names(DD_list) %>% map( ~mutate(DD_list[[.]], new_column=.) ) # transform
# [[1]]
# SL SW PL PW Species new_column
# 1: 5.1 3.5 1.4 0.2 setosa cycle1
#...
# 50: 5.0 3.3 1.4 0.2 setosa cycle1
# SL SW PL PW Species new_column
#
# [[2]]
# SL SW PL PW Species new_column
# 1: 7.0 3.2 4.7 1.4 versicolor cycle2
# 2: 6.4 3.2 4.5 1.5 versicolor cycle2
#...
# 50: 5.7 2.8 4.1 1.3 versicolor cycle2
# SL SW PL PW Species new_column
#
# [[3]]
# SL SW PL PW Species new_column
# 1: 6.3 3.3 6.0 2.5 virginica cycle3
#...
# 49: 6.2 3.4 5.4 2.3 virginica cycle3
# 50: 5.9 3.0 5.1 1.8 virginica cycle3
# SL SW PL PW Species new_column
참고 apply – 1 row
> apply(DD[,1:4],1,mean) [1] 2.550 2.350 2.550 2.425 2.225 2.700 ... [150] 3.950
참고 apply – 2 column
> apply(DD[,1:4],2,mean) SL SW PL PW 5.843333 3.057333 3.758000 1.199333 > sapply(DD[,1:4], mean) > sapply(DD[,1:4], mean, simplify=T) #same SL SW PL PW 5.843333 3.057333 3.758000 1.199333
sapply는 lapply를 쉽게 사용할수 있게 wrapping한거
> sapply(DD[,1:4], mean, simplify=F) # sapply는 wrapper of lapply, simple version > lapply(DD[,1:4], mean) #same $SL [1] 5.843333 $SW [1] 3.057333 $PL [1] 3.758 $PW [1] 1.199333
lapply기본원리
> lapply
function (X, FUN, ...)
{
FUN <- match.fun(FUN)
if (!is.vector(X) || is.object(X)) X <- as.list(X)
.Internal(lapply(X, FUN))
}
- X가 어떤 Object든지 list로 만들어서,
- Column기준으로 Loop하면서 Fun을 적용한다.
- 결과는 항상 List 이다.
lapply(c(1:4), runif) lapply(as.list(c(1:4)), runif) #same
참고: as.list
> c("A","B") %>% as.list()
[[1]]
[1] "A"
[[2]]
[1] "B"
> c("A","B") %>% as.list() %>% as.list() %>% as.list() %>% as.list() %>% as.list()
[[1]]
[1] "A"
[[2]]
[1] "B"
Column별로 List를 만들어서 동시에 함수적용
> lapply(DD, function(x){ x })
$SL
[1] 5.1 4.7 5.0 4.6 4.4 5.4 ...
[150] 5.9
$SW
[1] 3.5 3.2 3.6 3.4 2.9 3.7 ...
[150] 3.0
$PL
[1] 1.4 1.3 1.4 1.4 1.4 1.5 ...
[150] 5.1
$PW
[1] 0.2 0.2 0.2 0.3 0.2 0.2 ...
[150] 1.8
$Species
[1] setosa setosa setosa ...
[150] virginica
Levels: setosa versicolor virginica
$Cycle
[1] 1 1 1 1 1 1 1 1 1 ...
[150] 2
lapply의 Argument
- vector 자체 또는 data.table
- (vector 자체 대신에) vector의 ColumnName 또는 Index
- Iteration Number
###### vector 자체 또는 data.table
dt=DD[,1:4]
lapply(dt, function(x){
mean(x)
})
# (vector 자체 대신에) vector의 컬럼명이나 인덱스를 넘긴다.
###### vector의 컬럼명 or vector Index
vec = colnames(DD[,1:4])
lapply(vec, function(x){
mean(DD[[x]])
})
#DD[,..x] %>% unlist() %>% mean()
#result is UN-named list
###### iteration number i or vector Index
i= seq_along(DD[,1:4])
lapply(i, function(x){
mean(DD[[x]])
})
split()활용하여 List만들기 => Group별로 자르기
x object를 f 를 기준으로 row기준으로 데이터를 나눠준다.
> args(split) function (x, f, drop = FALSE, ...) NULL
X <- DD[ ,unique(Cycle)]
CycleList <- lapply(X, function(x){x})
[[1]]
[1] 1
[[2]]
[1] 2
#same using for문
X <- DD[ ,unique(Cycle)]
CycleList <- vector("list", length(X)) #empty List 만들기
for(i in seq_along(X)){
CycleList[[i]] <- X[i]
}
names(CycleList) <- X #naming
$`1`
[1] 1
$`2`
[1] 2
#named list
iSpecies <- split(DD, DD$Species)
lapply(iSpecies, function(x){
temp <- x
nm <- temp[,unique(Species)]
names(temp) <- str_c(names(temp),"_",nm)
temp
})
#Un-named list
iSpecies <- split(DD, DD$Species)
lapply(seq_along(iSpecies), function(x){
temp <- iSpecies[[x]]
nm <- temp[,unique(Species)]
names(temp) <- str_c(names(temp),"_",nm)
temp
})
#same above
iSpecies <- DD[, unique(Species)] %>% as.vector()
lapply(iSpecies, function(x){
temp <- DD[Species==x,]
names(temp) <- str_c(names(temp),"_",x)
temp
})
$setosa
SL_setosa SW_setosa PL_setosa PW_setosa Species_setosa Cycle_setosa
1: 5.1 3.5 1.4 0.2 setosa 1
2: 4.7 3.2 1.3 0.2 setosa 1
...
50: 5.0 3.3 1.4 0.2 setosa 2
SL_setosa SW_setosa PL_setosa PW_setosa Species_setosa Cycle_setosa
$versicolor
SL_versicolor SW_versicolor PL_versicolor PW_versicolor Species_versicolor Cycle_versicolor
1: 7.0 3.2 4.7 1.4 versicolor 1
2: 6.9 3.1 4.9 1.5 versicolor 1
...
50: 5.7 2.8 4.1 1.3 versicolor 2
SL_versicolor SW_versicolor PL_versicolor PW_versicolor Species_versicolor Cycle_versicolor
$virginica
SL_virginica SW_virginica PL_virginica PW_virginica Species_virginica Cycle_virginica
1: 6.3 3.3 6.0 2.5 virginica 1
2: 7.1 3.0 5.9 2.1 virginica 1
...
50: 5.9 3.0 5.1 1.8 virginica 2
SL_virginica SW_virginica PL_virginica PW_virginica Species_virginica Cycle_virginica
","showLines":false,"wrapLines":false,"highlightStart":"1","highlightEnd":"2
Example
type1
DDSpStat_Cy <- lapply(unique(DD$Cycle),function(x){ #x=unique(DD$Cycle)[1]
DDCy <- DD[DD$Cycle == x,]
DDSp <- split(DDCy,DDCy$Species)
DDSpStat <- lapply(1:length(DDSp),function(y){ #y=1
Tmp1 <- DDSp[[y]]
names(Tmp1) <- str_c(names(Tmp1),names(DDSp)[y],sep="_")
STAT <- c("min","max")
DDStat <- lapply(STAT, function(z){ #z=STAT[1]
varnm <- paste0(c("SL","SW","PL","PW"),"_")
vidx <- map_int(varnm, function(v){ #v=varnm[1]
str_which(names(Tmp1), v)
})
res <- sapply(Tmp1[,..vidx],z)
names(res) <- str_c(names(res),"_",z)
res
}) %>% unlist()
}) %>% unlist()
})
DDSpStat_Cy
result <- do.call("rbind", DDSpStat_Cy)
result %>% dim
###
SL_setosa_min SW_setosa_min PL_setosa_min PW_setosa_min SL_setosa_max SW_setosa_max PL_setosa_max PW_setosa_max SL_versicolor_min SW_versicolor_min PL_versicolor_min PW_versicolor_min
[1,] 4.4 2.9 1.0 0.1 5.8 4.1 1.9 0.4 5.0 2.0 3.0 1
[2,] 4.3 2.3 1.1 0.1 5.7 4.4 1.7 0.6 4.9 2.3 3.3 1
SL_versicolor_max SW_versicolor_max PL_versicolor_max PW_versicolor_max SL_virginica_min SW_virginica_min PL_virginica_min PW_virginica_min SL_virginica_max SW_virginica_max PL_virginica_max
[1,] 7.0 3.3 4.9 1.8 4.9 2.5 4.5 1.4 7.7 3.4 6.9
[2,] 6.7 3.4 5.1 1.7 5.6 2.2 4.9 1.5 7.9 3.8 6.7
PW_virginica_max
[1,] 2.5
[2,] 2.5
type2
DDCy <- DD %>% split(by="Cycle")
names(DDCy) <- NULL
DDSpStat_Cy <- DDCy %>% map(function(x){ #x=DDCy[[1]]
DDSp <- x %>% split(by="Species")
names(DDSp) <- NULL
DDSpStat <- DDSp %>% map(function(y){ #y=DDSp[[1]]
names(y) <- str_c(names(y), unique(y[,Species]), sep="_")
STAT <- c("min","max")
DDStat <- map(STAT, function(z){ #z=STAT[1]
v_nm <- str_c(c("SL","SW","PL","PW"),"_")
v_idx <- map_int(v_nm, function(v){ #v=v_nm[1]
str_which(names(y), v)
})
res <- sapply(y[,..v_idx], z)
names(res) <- str_c(names(res), z, sep="_")
res
}) %>% unlist()
}) %>% unlist()
} )
type3
groupCol1 = c("Cycle")
groupCol2 = c("Species")
varNm = c("SL","SW","PL","PW")
STAT = c("min","max")
DDCy <- DD %>% split(by=groupCol1)
names(DDCy) <- NULL
DDSpStat_Cy <- DDCy %>% map(function(x){ #x=DDCy[[1]]
DDSp <- x %>% split(by=groupCol2)
names(DDSp) <- NULL
DDSpStat <- DDSp %>% map(function(y){ #y=DDSp[[1]]
names(y) <- str_c(names(y), unlist(unique(y[,..groupCol2])), sep="_")
DDStat <- map(STAT, function(z){ #z=STAT[1]
v_nm <- str_c(varNm,"_")
v_idx <- map_int(v_nm, function(v){ str_which(names(y), v) })
res <- map_dbl(y[,..v_idx], get(z))
names(res) <- str_c(names(res), z, sep="_")
res
}) %>% unlist()
}) %>% unlist()
})
do.call(rbind, DDSpStat_Cy)