Missing Value : Amelia
https://cran.r-project.org/web/packages/Amelia/index.html
http://gking.harvard.edu/amelia/
https://www.r-bloggers.com/ggplot-your-missing-data-2/
> install.packages('Amelia') also installing the dependency ‘RcppArmadillo’ trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.5/RcppArmadillo_0.9.500.2.0.tgz' ================================================== trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.5/Amelia_1.7.5.tgz' ==================================================
data: wakefield (random 데이터셋 생성)
library(wakefield) df <- r_data_frame( n=30, id,race, age, sex, hour, iq, height, Scoring = rnorm,Smoker = valid ) %>% r_na(prob=.4) dt <- df %>% data.table
library(Amelia) df %>% missmap() dt %>% missmap()
ggplot_missing <- function(x){ #x <- df x %>% is.na %>% melt %>% ggplot(aes(x=Var2, y=Var1)) + geom_raster(aes(fill=value)) + scale_fill_manual(name = "", values = c('skyblue', 'tomato'), labels=c("Present","Missing")) + labs(x="Variables", y="Rows") + theme_minimal() + theme(axis.text.x=element_text(angle=45, vjust=0.5)) } dt[order(-ID)] %>% ggplot_missing()
dt_gg_missing <- function(x, ID){ x[ , key:=do.call(paste0, c(.SD, sep="_")), .SDcols=ID] x[ , key:=str_replace_all(key,"NA",".")] ID=c("key",ID) MM=names(x)[!names(x)%in%ID] cbind(x[,..ID], is.na(x[,..MM])%>% data.table) %>% melt.data.table(id.vars=c("key"), measure.vars=MM ) %>% ggplot(aes(x=variable, y=key)) + geom_raster(aes(fill=value)) + scale_fill_manual(name = "", values = c('skyblue', 'tomato'), labels=c("Present","Missing")) + labs(x="Variables", y="Rows") + theme_minimal() + theme(axis.text.x=element_text(angle=45, vjust=0.5)) } dt %>% dt_gg_missing(ID=c("ID","Sex"))