boston nn-model
https://onesixx.com/global/
predict a continuous value (instead of a discrete label)
source(file.path(getwd(),"00.global.R")) ##1# DATA Source : INPUT LAYER ------------------------------------------------- #데이터 량이 적다. #feature간 척도가 다르다. # y : 주택가격 median (단위 1,000$) dataset <- dataset_boston_housing() c(c(trnData, trnTarget), c(tstData, tstTarget)) %<-% dataset melt_trnX <- data.table(dataset$train$x) %>% melt.data.table(measure=str_c("V",1:13)) melt_trnX %>% ggplot(aes(variable, value)) + geom_boxplot() ##2# Preprocess --------------------------------------------------------------- trnData %>% str() tstData %>% str() trnTarget %>% summary() # *Normalize ---- trn_mean <- apply(trnData, 2, mean) trn_stdev <- apply(trnData, 2, sd) trnData <- scale(trnData, center=trn_mean, scale=trn_stdev) tstData <- scale(tstData, center=trn_mean, scale=trn_stdev) # test data!!! melt_trnData <- data.table(trnData) %>% melt.data.table(measure=str_c("V",1:13)) melt_trnData %>% ggplot(aes(variable, value)) + geom_boxplot() ##3# Build the model ----------------------------------------------------------- uF_buildModel <- function(){ \tmodel <- keras_model_sequential() %>% \t\tlayer_dense(units=64, activation="relu", input_shape=dim(trnData)[[2]]) %>% \t\tlayer_dense(units=64, activation="relu") %>% \t\tlayer_dense(units=1) # pure linear \t \tmodel %>% compile( \t\toptimizer = "rmsprop", \t\tloss="mse", \t\tmetrics=c("mae") \t) } ##4# Train the model ----------------------------------------------------------- history <- model %>% fit(trnData, trnTarget, \t\t\t\t\t\t\t\t\t\t\t\t batch_size=10, epoches=30, validation_split=.2) #history %>% uF_histPlot() #history %>% plot() ##5# Evaluate accuracy --------------------------------------------------------- eval <- model %>% evaluate(tstData, tstTarget) ##6# Make predictions ---------------------------------------------------------- rlt <- model %>% predict(tstData) #data.table(cbind(tstTarget, rlt)) %>% ggplot(aes(tstTarget, rlt)) + geom_point() + geom_abline() # K-fold Cross Validation ------------------------------------------------------ k <- 4 indices <- sample(1:nrow(trnData)) folds <- cut(1:length(indices), breaks=k, labels=F) num_epochs <- 100 scores <- c() for (i in 1:k){ \t#i=1 \tcat("processing fold #", i, "\ ") \t \t# k 부분 데이터 : 검증 데이터 준비 \tval_indices <- which(folds==i, arr.ind=T) val_trnData <- trnData[val_indices, ] \tval_trnTarget <- trnTarget[val_indices] \t \t# 나머지 데이터 : 훈련 데이터 준비 \tpartial_trnData <- trnData[-val_indices, ] \tpartial_trnTarget <- trnTarget[-val_indices] \t \t# Training \tmodel <- uF_buildModel()\t \tmodel %>% fit(partial_trnData, partial_trnTarget, \t\t\t\t\t\t\t\tepochs=num_epochs, batch_size=1, verbose=0) \t# evaluate \teval <- model %>% evaluate(val_trnData, val_trnTarget, verbose=0) \tscores <- c(scores, eval$mean_absolute_error) } scores scores %>% mean()