Scree Plot for Hierarchical clustering
https://www.youtube.com/watch?v=aMYCFtoBrdA by Gopal Malakar
library(ggplot2) ### 1. Data & EDA ---- # (Hierarchical clustering with the sample data) # Way1 temp_str <- "Name physics math P 15 20 Q 20 15 R 26 21 X 44 52 Y 50 45 Z 57 38 A 80 85 B 90 88 C 98 98" base_data <- read.table(textConnection(temp_str), header = TRUE) closeAllConnections() # Way2 base_data <- data.frame( c("P","Q","R","X","Y","Z","A","B","C"), c(15,20,26,44,50,57,80,90,98), c(20,15,21,52,45,38,85,88,98) ) # Check distinct categories of Variables names(base_data) <- c("Name","physics","math") base_data %>% str base_data %>% dim # Plot data ggplot(base_data, aes(x=physics, y=math, label=Name))+ geom_point() + geom_text(hjust=0, nudge_x=1) + theme_bw() + ggtitle("Base Data") ### 2. Obtain distance matrix ---- my_dist <- dist(base_data[c(2,3)], method="euclidean") ### 3. Apply Hierarchical Clustering ---- fit <- hclust(my_dist, method="ward.D2") fit %>% str ### 4. Decide # of Cluster ---- # 4.1. Scree Plot # Way 1 ggplot(NULL, aes(x=length(fit$height):1, y=fit$height)) + geom_point() + geom_line() + theme_bw() + labs(title="Scree Plot of HCluster(euclidean, ward.D2)", x = "# of clusters", y="Height") # Way 2 plot(9:1, append(0, fit$height), type="b", xlab="# of clusters", ylab="Dendogram Height") #Way3 Dendogram_Height=0 fit$height for (i in 2:9) { Dendogram_Height[i] <- fit$height[i-1] print(paste0(i," : ", Dendogram_Height[i])); } plot(9:1, Dendogram_Height, type="b", xlab="# of clusters", ylab="Dendogram Height") ### way 2- Dendogram plot(fit, labels=base_data$Name) # draw dendogram with color borders # (One can use this step to take a look at execution) rect.hclust(fit, k=8, border="red") plot(fit, labels = base_data$Name) rect.hclust(fit, k=7, border="red") plot(fit, labels = base_data$Name) rect.hclust(fit, k=6, border="red") # draw color borders around required clusterd plot(fit, labels = base_data$Name) rect.hclust(fit, k=3, border="blue") ### 5. Cut tree (into 3 clusters) ---- my_groups <- cutree(fit, k=3)
(원본) Scree_Plot_for_Hierarchical_clustering_Using_R.R
#----------------------------------------------- # Hierarchical clustering with the sample data #------------------------------------------------ # Reading data into R similar to CARDS temp_str <- "Name physics math P 15 20 Q 20 15 R 26 21 X 44 52 Y 50 45 Z 57 38 A 80 85 B 90 88 C 98 98" base_data <- read.table(textConnection( temp_str), header = TRUE) closeAllConnections() # Check distinct categories of Variables useing STR function str(base_data) # Plot data plot(base_data$physics, base_data$math, pch=21, bg=c("red","green3","blue","red","green3","blue", "red","green3","blue")[unclass(base_data$Name)], main="Base Data") # Step 01- obtain distance matrix (right way) my_dist <- dist(base_data[c(2,3)], method = "euclidean") print(my_dist) # Step 02- Apply Hierarchical Clustering fit <- hclust(my_dist, method="ward.D2") # Step 03- Display dendogram plot(fit, labels = base_data$Name) Dendogram_Height=0 for (i in 2:9) Dendogram_Height[i] <- fit$height[i-1] plot(1:9, Dendogram_Height, type="b", xlab="Sequence of merging", ylab="Dendogram Height") plot(9:1, Dendogram_Height, type="b", xlab="# of clusters", ylab="Dendogram Height") # Step 04- draw dendogram with color borders # One can use this step to take a look at execution rect.hclust(fit, k=8, border="red") plot(fit, labels = base_data$Name) rect.hclust(fit, k=7, border="red") plot(fit, labels = base_data$Name) rect.hclust(fit, k=6, border="red") # draw color borders around required clusterd plot(fit, labels = base_data$Name) rect.hclust(fit, k=3, border="blue") # cut tree into 3 clusters my_groups <- cutree(fit, k=3)