Missing Value : Amelia
https://cran.r-project.org/web/packages/Amelia/index.html
http://gking.harvard.edu/amelia/
https://www.r-bloggers.com/ggplot-your-missing-data-2/
> install.packages('Amelia')
also installing the dependency ‘RcppArmadillo’
trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.5/RcppArmadillo_0.9.500.2.0.tgz'
==================================================
trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.5/Amelia_1.7.5.tgz'
==================================================
data: wakefield (random 데이터셋 생성)
library(wakefield) df <- r_data_frame( n=30, id,race, age, sex, hour, iq, height, Scoring = rnorm,Smoker = valid ) %>% r_na(prob=.4) dt <- df %>% data.table
library(Amelia) df %>% missmap() dt %>% missmap()
ggplot_missing <- function(x){
#x <- df
x %>% is.na %>% melt %>%
ggplot(aes(x=Var2, y=Var1)) +
geom_raster(aes(fill=value)) +
scale_fill_manual(name = "", values = c('skyblue', 'tomato'), labels=c("Present","Missing")) +
labs(x="Variables", y="Rows") +
theme_minimal() + theme(axis.text.x=element_text(angle=45, vjust=0.5))
}
dt[order(-ID)] %>% ggplot_missing()
dt_gg_missing <- function(x, ID){
x[ , key:=do.call(paste0, c(.SD, sep="_")), .SDcols=ID]
x[ , key:=str_replace_all(key,"NA",".")]
ID=c("key",ID)
MM=names(x)[!names(x)%in%ID]
cbind(x[,..ID], is.na(x[,..MM])%>% data.table) %>%
melt.data.table(id.vars=c("key"), measure.vars=MM ) %>% ggplot(aes(x=variable, y=key)) +
geom_raster(aes(fill=value)) +
scale_fill_manual(name = "", values = c('skyblue', 'tomato'), labels=c("Present","Missing")) +
labs(x="Variables", y="Rows") +
theme_minimal() + theme(axis.text.x=element_text(angle=45, vjust=0.5))
}
dt %>% dt_gg_missing(ID=c("ID","Sex"))