Tutorial:: Basic Regression
https://keras.rstudio.com/articles/tutorial_basic_regression.html
discrete label을 predict하는 classification과 달리, Regression에서는 continuous value의 결과를 predict한다.
이 예에서는, 70년대중반, Boston 교외의 집값의 median을 predict하는 Model을 만든다.
Medv (주택의 가격 변수)에 대한여러 요건들(13개 변수)간의 관계 분석
데이터는 Boston Housing Prices dataset
https://onesixx.com/data-boston/
### Title: --- --- --- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --
## reference:
library(keras)
##1# DATA Source : INPUT LAYER -------------------------------------------------
boston_housing <- dataset_boston_housing()
#boston_housing %>% str
# List of 2 - train:test = 8:2
# $ train:List of 2
# ..$ x: num [1:404, 1:13] 1.2325 0.0218 4.8982 0.0396 3.6931 ...
# ..$ y: num [1:404(1d)] 15.2 42.3 50 21.1 17.7 18.5 11.3 15.6 15.6 14.4 ...
# $ test :List of 2
# ..$ x: num [1:102, 1:13] 18.0846 0.1233 0.055 1.2735 0.0715 ...
# ..$ y: num [1:102(1d)] 7.2 18.8 19 27 22.2 24.5 31.2 22.9 20.5 23.2 ...
# matrix , array
c(train_data, train_labels) %<-% boston_housing$train
c(test_data, test_labels) %<-% boston_housing$test
# str_c("Training entries: ", train_data %>% length(), ",",
# "labels: ", train_labels %>% length())
# "Training entries: 5252, labels: 404"
train_dt <- train_data %>% data.table()
column_nm <- c('CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT')
colnames(train_dt) <- column_nm
##2# Preprocess ---------------------------------------------------------------
train_df[1,] # different scales
train_labels[1:10] # dollar
## ` ` rescale Normalize -------------------------------------------------------
train_data <- scale(train_data)
train_data_colmean <- train_data %>% attr("scaled:center")
train_data_colstd <- train_data %>% attr("scaled:scale")
test_data <- scale(test_data, center=train_data_colmean, scale=train_data_colstd)
## ` ` Ploting -----------------------------------------------------------------
##3# Build the model -----------------------------------------------------------
# sequential model with two densely connected hidden layers
## ` ` Setup the layers --------------------------------------------------------
model <- keras_model_sequential() %>%
layer_dense(units=64, activation = "relu", input_shape=dim(train_data)[2]) %>%
layer_dense(units=64, activation = "relu") %>%
layer_dense(units=1)
model %>% summary
## ` ` reshape - Image Flatten -------------------------------------------------
## ` ` Compile the model -------------------------------------------------------
model %>% compile(
loss = "mse",
optimizer = optimizer_rmsprop(),
metrics = list("mean_absolute_error")
)
##4# Train the model -----------------------------------------------------------
epochs <- 500
history <- model %>% fit(
train_data,
train_labels,
epochs = epochs,
validation_split = 0.2,
verbose = 0
)
# visualize the model’s training progress, determin when progress stop.
plot(history, metrics="mean_absolute_error", smooth=F) +
coord_cartesian(ylim = c(0, 5)) + theme_ipsum(base_size=9)
# Display training progress by printing a single dot for each completed epoch.
# in 4# Train the model
print_dot_callback <- callback_lambda(
on_epoch_end = function(epoch, logs) {
if (epoch %% 80 == 0) cat("\
")
cat(".")
}
)
# The patience parameter is the amount of epochs to check for improvement
early_stop <- callback_early_stopping(monitor = "val_loss", patience = 20)
epochs <- 500
history <- model %>% fit(
train_data,
train_labels,
epochs = epochs,
validation_split = 0.2,
verbose = 0,
callbacks = list(early_stop, print_dot_callback)
)
plot(history, metrics="mean_absolute_error", smooth=F) +
coord_cartesian(ylim = c(0, 5),, xlim=c(0,100))
##5# Evaluate accuracy ---------------------------------------------------------
score <- (model %>% evaluate(test_data, test_labels, verbose=F))
str_c('Test loss:', score$loss %>% round(3))
str_c('Test accuracy(Mean absolute error on test set):',
"$", (score$mean_absolute_error *1000) %>% round(2) )
##6# Make predictions ----------------------------------------------------------
test_pred <- model %>% predict(test_data)
test_pred[ , 1]