Tutorial:: Basic Regression
https://keras.rstudio.com/articles/tutorial_basic_regression.html
discrete label을 predict하는 classification과 달리, Regression에서는 continuous value의 결과를 predict한다.
이 예에서는, 70년대중반, Boston 교외의 집값의 median을 predict하는 Model을 만든다.
Medv (주택의 가격 변수)에 대한여러 요건들(13개 변수)간의 관계 분석
데이터는 Boston Housing Prices dataset
https://onesixx.com/data-boston/
### Title: --- --- --- -- --- --- --- --- --- --- --- --- --- --- --- --- --- -- ## reference: library(keras) ##1# DATA Source : INPUT LAYER ------------------------------------------------- boston_housing <- dataset_boston_housing() #boston_housing %>% str # List of 2 - train:test = 8:2 # $ train:List of 2 # ..$ x: num [1:404, 1:13] 1.2325 0.0218 4.8982 0.0396 3.6931 ... # ..$ y: num [1:404(1d)] 15.2 42.3 50 21.1 17.7 18.5 11.3 15.6 15.6 14.4 ... # $ test :List of 2 # ..$ x: num [1:102, 1:13] 18.0846 0.1233 0.055 1.2735 0.0715 ... # ..$ y: num [1:102(1d)] 7.2 18.8 19 27 22.2 24.5 31.2 22.9 20.5 23.2 ... # matrix , array c(train_data, train_labels) %<-% boston_housing$train c(test_data, test_labels) %<-% boston_housing$test # str_c("Training entries: ", train_data %>% length(), ",", # "labels: ", train_labels %>% length()) # "Training entries: 5252, labels: 404" train_dt <- train_data %>% data.table() column_nm <- c('CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT') colnames(train_dt) <- column_nm ##2# Preprocess --------------------------------------------------------------- train_df[1,] # different scales train_labels[1:10] # dollar ## ` ` rescale Normalize ------------------------------------------------------- train_data <- scale(train_data) train_data_colmean <- train_data %>% attr("scaled:center") train_data_colstd <- train_data %>% attr("scaled:scale") test_data <- scale(test_data, center=train_data_colmean, scale=train_data_colstd) ## ` ` Ploting ----------------------------------------------------------------- ##3# Build the model ----------------------------------------------------------- # sequential model with two densely connected hidden layers ## ` ` Setup the layers -------------------------------------------------------- model <- keras_model_sequential() %>% layer_dense(units=64, activation = "relu", input_shape=dim(train_data)[2]) %>% layer_dense(units=64, activation = "relu") %>% layer_dense(units=1) model %>% summary ## ` ` reshape - Image Flatten ------------------------------------------------- ## ` ` Compile the model ------------------------------------------------------- model %>% compile( loss = "mse", optimizer = optimizer_rmsprop(), metrics = list("mean_absolute_error") ) ##4# Train the model ----------------------------------------------------------- epochs <- 500 history <- model %>% fit( train_data, train_labels, epochs = epochs, validation_split = 0.2, verbose = 0 ) # visualize the model’s training progress, determin when progress stop. plot(history, metrics="mean_absolute_error", smooth=F) + coord_cartesian(ylim = c(0, 5)) + theme_ipsum(base_size=9) # Display training progress by printing a single dot for each completed epoch. # in 4# Train the model print_dot_callback <- callback_lambda( on_epoch_end = function(epoch, logs) { if (epoch %% 80 == 0) cat("\ ") cat(".") } ) # The patience parameter is the amount of epochs to check for improvement early_stop <- callback_early_stopping(monitor = "val_loss", patience = 20) epochs <- 500 history <- model %>% fit( train_data, train_labels, epochs = epochs, validation_split = 0.2, verbose = 0, callbacks = list(early_stop, print_dot_callback) ) plot(history, metrics="mean_absolute_error", smooth=F) + coord_cartesian(ylim = c(0, 5),, xlim=c(0,100)) ##5# Evaluate accuracy --------------------------------------------------------- score <- (model %>% evaluate(test_data, test_labels, verbose=F)) str_c('Test loss:', score$loss %>% round(3)) str_c('Test accuracy(Mean absolute error on test set):', "$", (score$mean_absolute_error *1000) %>% round(2) ) ##6# Make predictions ---------------------------------------------------------- test_pred <- model %>% predict(test_data) test_pred[ , 1]