pca ex3
https://www.kaggle.com/sagarnildass/red-wine-analysis-by-r/data
library(tidyverse)
library(data.table)
wine <- fread('~/Downloads/wineQualityReds.csv')
dd <- wine[,.(alcohol, pH)]
dd %>% ggplot(aes(alcohol, pH))+geom_point()
# centering ----------------------------------------------------------------
dd_centering <- data.frame(lapply(dd, function(x){((x-mean(x)))}))
dd_centering %>% ggplot(aes(alcohol, pH))+geom_point()
sigma1 <- cov(dd_centering)
# Prcomp in r ------------------------------------------------------------------------
pr_c <- prcomp(dd, center=T) # Xmatrix1
pr_c$sdev^2 # eigen(sigma1)$values # 분산 설명력
pr_c$rotation # eigen(sigma1)$vectors*-1 # New 축
pr_c$x # as.matrix(dd_centering) %*% pr_c$rotation # New 좌표 predict(pr_c) identical(A,B)
# as.matrix(dd_centering) %*% eigen(sigma1)$vectors*-1
dd_centering %>% ggplot(aes(alcohol, pH))+geom_point() + geom_point(aes(pr_c$x[,1], pr_c$x[,2], color="red"))
kk <- pr_c$x
pr_c$x %>% str
# normalizing ----------------------------------------------------------------
dd_normalizing <- data.frame(lapply(dd,function(x){((x-mean(x))/sd(x))}))
dd_normalizing %>% ggplot(aes(alcohol, pH))+geom_point()
sigma2 <- cov(dd_normalizing)
pr_cs <- prcomp(Xmatrix0, center=T, scale=T) # Xmatrix2
pr_cs$sdev^2 # eigen(sigma2)$values
pr_cs$rotation # eigen(sigma2)$vectors*-1
pr_cs$x # Xmatrix2 %*% eigen(sigma2)$vectors