Scree Plot for Hierarchical clustering

### 1. Data & EDA ----
# (Hierarchical clustering with the sample data)

# Way1
temp_str <- "Name physics math
                P      15   20
                Q      20   15
                R      26   21
                X      44   52
                Y      50   45
                Z      57   38
                A      80   85
                B      90   88
                C      98   98"
base_data <- read.table(textConnection(temp_str), header = TRUE)

# Way2
base_data <- data.frame(
# Check distinct categories of Variables
names(base_data) <- c("Name","physics","math")
base_data %>% str
base_data %>% dim

# Plot data 
ggplot(base_data, aes(x=physics, y=math, label=Name))+
    geom_point() + geom_text(hjust=0, nudge_x=1) +
    theme_bw() + ggtitle("Base Data")

### 2. Obtain distance matrix ----
my_dist <- dist(base_data[c(2,3)], method="euclidean")

### 3. Apply Hierarchical Clustering ----
fit <- hclust(my_dist, method="ward.D2")
fit %>% str

### 4. Decide # of Cluster ----
# 4.1. Scree Plot 
# Way 1
ggplot(NULL, aes(x=length(fit$height):1, y=fit$height)) +
    geom_point() + geom_line() +
    theme_bw() + labs(title="Scree Plot of HCluster(euclidean, ward.D2)", x = "# of clusters", y="Height")
# Way 2
plot(9:1, append(0, fit$height), type="b", xlab="# of clusters", ylab="Dendogram Height")

for (i in 2:9) {
    Dendogram_Height[i] <- fit$height[i-1]
    print(paste0(i," : ", Dendogram_Height[i]));
plot(9:1, Dendogram_Height, type="b", xlab="# of clusters", ylab="Dendogram Height")

### way 2- Dendogram
plot(fit, labels=base_data$Name)
# draw dendogram with color borders 
# (One can use this step to take a look at execution)
rect.hclust(fit, k=8, border="red")

plot(fit, labels = base_data$Name)
rect.hclust(fit, k=7, border="red")

plot(fit, labels = base_data$Name)
rect.hclust(fit, k=6, border="red")

# draw color borders around required clusterd
plot(fit, labels = base_data$Name)
rect.hclust(fit, k=3, border="blue")

### 5. Cut tree (into 3 clusters) ----
my_groups <- cutree(fit, k=3)


