lapply

Published onesixx on

https://onesixx.com/lapply/
https://onesixx.com/lapply-vs-docall/
https://onesixx.com/bind/
예제데이터
DD <- data.table(iris)
names(DD) <- c("SL","SW","PL","PW","Species")
#       SL  SW  PL  PW   Species
#   1: 5.1 3.5 1.4 0.2    setosa
#   2: 4.9 3.0 1.4 0.2    setosa
#   3: 4.7 3.2 1.3 0.2    setosa
#   4: 4.6 3.1 1.5 0.2    setosa
#   5: 5.0 3.6 1.4 0.2    setosa
#  ---                          
# 146: 6.7 3.0 5.2 2.3 virginica
# 147: 6.3 2.5 5.0 1.9 virginica
# 148: 6.5 3.0 5.2 2.0 virginica
# 149: 6.2 3.4 5.4 2.3 virginica
# 150: 5.9 3.0 5.1 1.8 virginica

DD_list <- DD %>% split(DD$Species)
names(DD_list) <- str_c("cycle",1:3)
names(DD_list) %>% map( ~mutate(DD_list[[.]], new_column=.) ) # transform
# [[1]]
#      SL  SW  PL  PW Species new_column
#  1: 5.1 3.5 1.4 0.2  setosa     cycle1
#...
# 50: 5.0 3.3 1.4 0.2  setosa     cycle1
#      SL  SW  PL  PW Species new_column
# 
# [[2]]
#      SL  SW  PL  PW    Species new_column
#  1: 7.0 3.2 4.7 1.4 versicolor     cycle2
#  2: 6.4 3.2 4.5 1.5 versicolor     cycle2
#...
# 50: 5.7 2.8 4.1 1.3 versicolor     cycle2
#      SL  SW  PL  PW    Species new_column
# 
# [[3]]
#      SL  SW  PL  PW   Species new_column
#  1: 6.3 3.3 6.0 2.5 virginica     cycle3
#...
# 49: 6.2 3.4 5.4 2.3 virginica     cycle3
# 50: 5.9 3.0 5.1 1.8 virginica     cycle3
#      SL  SW  PL  PW   Species new_column
참고 apply – 1 row
> apply(DD[,1:4],1,mean)
  [1] 2.550 2.350 2.550 2.425 2.225 2.700  
      ...
[150] 3.950
참고 apply – 2 column
> apply(DD[,1:4],2,mean)
      SL       SW       PL       PW 
5.843333 3.057333 3.758000 1.199333

> sapply(DD[,1:4], mean)
> sapply(DD[,1:4], mean, simplify=T) #same
      SL       SW       PL       PW 
5.843333 3.057333 3.758000 1.199333

sapply는 lapply를 쉽게 사용할수 있게 wrapping한거

> sapply(DD[,1:4], mean, simplify=F) # sapply는 wrapper of lapply, simple version
> lapply(DD[,1:4], mean)  #same
$SL
[1] 5.843333
$SW
[1] 3.057333
$PL
[1] 3.758
$PW
[1] 1.199333

lapply기본원리

> lapply
function (X, FUN, ...) 
{
    FUN <- match.fun(FUN)
    if (!is.vector(X) || is.object(X)) X <- as.list(X)
    .Internal(lapply(X, FUN))
}
  • X가 어떤 Object든지 list로 만들어서,
  • Column기준으로 Loop하면서 Fun을 적용한다.
  • 결과는 항상 List 이다.
lapply(c(1:4),          runif)
lapply(as.list(c(1:4)), runif)  #same
참고: as.list
> c("A","B") %>% as.list()
[[1]]
[1] "A"

[[2]]
[1] "B"

> c("A","B") %>% as.list() %>% as.list() %>% as.list() %>% as.list() %>% as.list()
[[1]]
[1] "A"

[[2]]
[1] "B"

Column별로 List를 만들어서 동시에 함수적용

> lapply(DD, function(x){ x })
$SL
  [1] 5.1 4.7 5.0 4.6 4.4 5.4 ...
[150] 5.9

$SW
  [1] 3.5 3.2 3.6 3.4 2.9 3.7 ...
[150] 3.0

$PL
  [1] 1.4 1.3 1.4 1.4 1.4 1.5 ...
[150] 5.1

$PW
  [1] 0.2 0.2 0.2 0.3 0.2 0.2 ...
[150] 1.8

$Species
  [1] setosa     setosa     setosa     ...
[150]  virginica 
Levels: setosa versicolor virginica

$Cycle
  [1] 1 1 1 1 1 1 1 1 1 ...
[150] 2

lapply의 Argument

  • vector 자체 또는 data.table
  • (vector 자체 대신에) vector의 ColumnName 또는 Index
  • Iteration Number
###### vector 자체 또는 data.table
dt=DD[,1:4]
lapply(dt, function(x){ 
   mean(x)
})

# (vector 자체 대신에) vector의 컬럼명이나 인덱스를 넘긴다.
###### vector의 컬럼명 or vector Index
vec = colnames(DD[,1:4])
lapply(vec, function(x){ 
   mean(DD[[x]])
})
#DD[,..x] %>% unlist() %>% mean()
#result is UN-named list

###### iteration number i or vector Index
i= seq_along(DD[,1:4])
lapply(i, function(x){ 
   mean(DD[[x]])
})

split()활용하여 List만들기 => Group별로 자르기

x object를 f 를 기준으로 row기준으로 데이터를 나눠준다. 

> args(split)
function (x, f, drop = FALSE, ...) 
NULL
X <- DD[ ,unique(Cycle)]
CycleList <- lapply(X, function(x){x})
[[1]]
[1] 1

[[2]]
[1] 2

#same using for문
X <- DD[ ,unique(Cycle)]
CycleList <- vector("list", length(X))   #empty List 만들기
for(i in seq_along(X)){
  CycleList[[i]] <- X[i]
}
names(CycleList) <- X                    #naming
$`1`
[1] 1

$`2`
[1] 2
#named list
iSpecies <- split(DD, DD$Species)
lapply(iSpecies, function(x){
  temp <- x
  nm <- temp[,unique(Species)]
  names(temp) <- str_c(names(temp),"_",nm)
  temp
})
#Un-named list
iSpecies <- split(DD, DD$Species)
lapply(seq_along(iSpecies), function(x){
  temp <- iSpecies[[x]]
  nm <- temp[,unique(Species)]
  names(temp) <- str_c(names(temp),"_",nm)
  temp
})
#same above
iSpecies <- DD[, unique(Species)] %>% as.vector()
lapply(iSpecies, function(x){
  temp <- DD[Species==x,]
  names(temp) <- str_c(names(temp),"_",x)
  temp
})

$setosa
    SL_setosa SW_setosa PL_setosa PW_setosa Species_setosa Cycle_setosa
 1:       5.1       3.5       1.4       0.2         setosa           1
 2:       4.7       3.2       1.3       0.2         setosa           1
 ...
50:       5.0       3.3       1.4       0.2         setosa           2
    SL_setosa SW_setosa PL_setosa PW_setosa Species_setosa Cycle_setosa

$versicolor
    SL_versicolor SW_versicolor PL_versicolor PW_versicolor Species_versicolor Cycle_versicolor
 1:           7.0           3.2           4.7           1.4         versicolor               1
 2:           6.9           3.1           4.9           1.5         versicolor               1
 ...
50:           5.7           2.8           4.1           1.3         versicolor               2
    SL_versicolor SW_versicolor PL_versicolor PW_versicolor Species_versicolor Cycle_versicolor

$virginica
    SL_virginica SW_virginica PL_virginica PW_virginica Species_virginica Cycle_virginica
 1:          6.3          3.3          6.0          2.5         virginica              1
 2:          7.1          3.0          5.9          2.1         virginica              1
 ...
50:          5.9          3.0          5.1          1.8         virginica              2
    SL_virginica SW_virginica PL_virginica PW_virginica Species_virginica Cycle_virginica
","showLines":false,"wrapLines":false,"highlightStart":"1","highlightEnd":"2

Example

type1

DDSpStat_Cy <- lapply(unique(DD$Cycle),function(x){  #x=unique(DD$Cycle)[1]
  DDCy <- DD[DD$Cycle == x,]
  
  DDSp <- split(DDCy,DDCy$Species)
  DDSpStat <- lapply(1:length(DDSp),function(y){  #y=1
    Tmp1 <- DDSp[[y]]
    names(Tmp1) <- str_c(names(Tmp1),names(DDSp)[y],sep="_")
    
    STAT <- c("min","max")
    DDStat <- lapply(STAT, function(z){ #z=STAT[1]
      varnm <- paste0(c("SL","SW","PL","PW"),"_")
      vidx <- map_int(varnm, function(v){ #v=varnm[1]
        str_which(names(Tmp1), v)
      })
      res <- sapply(Tmp1[,..vidx],z)
      names(res) <- str_c(names(res),"_",z)
      res
    }) %>% unlist()
  }) %>% unlist()
})  

DDSpStat_Cy
result <- do.call("rbind", DDSpStat_Cy)
result %>% dim

###
     SL_setosa_min SW_setosa_min PL_setosa_min PW_setosa_min SL_setosa_max SW_setosa_max PL_setosa_max PW_setosa_max SL_versicolor_min SW_versicolor_min PL_versicolor_min PW_versicolor_min
[1,]           4.4           2.9           1.0           0.1           5.8           4.1           1.9           0.4               5.0               2.0               3.0                 1
[2,]           4.3           2.3           1.1           0.1           5.7           4.4           1.7           0.6               4.9               2.3               3.3                 1
     SL_versicolor_max SW_versicolor_max PL_versicolor_max PW_versicolor_max SL_virginica_min SW_virginica_min PL_virginica_min PW_virginica_min SL_virginica_max SW_virginica_max PL_virginica_max
[1,]               7.0               3.3               4.9               1.8              4.9              2.5              4.5              1.4              7.7              3.4              6.9
[2,]               6.7               3.4               5.1               1.7              5.6              2.2              4.9              1.5              7.9              3.8              6.7
     PW_virginica_max
[1,]              2.5
[2,]              2.5

type2

DDCy <- DD %>% split(by="Cycle")
names(DDCy) <- NULL
DDSpStat_Cy <- DDCy %>% map(function(x){ #x=DDCy[[1]]
  DDSp <- x %>% split(by="Species")
  names(DDSp) <- NULL
  DDSpStat <- DDSp %>% map(function(y){  #y=DDSp[[1]]
    names(y) <- str_c(names(y), unique(y[,Species]), sep="_")
    
    STAT <- c("min","max")
    DDStat <- map(STAT, function(z){ #z=STAT[1]
      v_nm <- str_c(c("SL","SW","PL","PW"),"_")
      v_idx <- map_int(v_nm, function(v){ #v=v_nm[1]
        str_which(names(y), v)
      })
      res <- sapply(y[,..v_idx], z)
      names(res) <- str_c(names(res), z, sep="_")
      res
    }) %>% unlist()
  }) %>% unlist()
} )

type3

groupCol1 = c("Cycle")
groupCol2 = c("Species")
varNm = c("SL","SW","PL","PW")
STAT  = c("min","max")

DDCy <- DD %>% split(by=groupCol1)
names(DDCy) <- NULL
DDSpStat_Cy <- DDCy %>% map(function(x){ #x=DDCy[[1]]
  DDSp <- x %>% split(by=groupCol2)
  names(DDSp) <- NULL
  DDSpStat <- DDSp %>% map(function(y){  #y=DDSp[[1]]
    names(y) <- str_c(names(y), unlist(unique(y[,..groupCol2])), sep="_")
    
    DDStat <- map(STAT, function(z){ #z=STAT[1]
      v_nm <- str_c(varNm,"_")
      v_idx <- map_int(v_nm, function(v){ str_which(names(y), v) })
      res <- map_dbl(y[,..v_idx], get(z))
      
      names(res) <- str_c(names(res), z, sep="_")
      res
    }) %>% unlist()
  }) %>% unlist()
})

do.call(rbind, DDSpStat_Cy)
Categories: R Reshaping

onesixx

Blog Owner

Subscribe
Notify of
guest

0 Comments
Inline Feedbacks
View all comments
0
Would love your thoughts, please comment.x
()
x