PCA eigen 관계
# Data from https://ratsgo.github.io/machine%20learning/2017/04/24/PCA/ # n1 n2 n3 n4 n5 # p1 0.2 ,0.45,0.33,0.54,0.77 # p2 5.6 ,5.89,6.37,7.9 ,7.87 # p3 3.56,2.4 ,1.95,1.32,0.98 Xorigin <- data.frame(p1=c(0.2 ,0.45,0.33,0.54,0.77), p2=c(5.6 ,5.89,6.37,7.9 ,7.87), p3=c(3.56,2.4 ,1.95,1.32,0.98)) Xmatrix0 <- data.matrix(Xorigin) # centering ---------------------------------------------------------------- Xorigin_centering <- data.frame(lapply(Xorigin, function(x){((x-mean(x)))})) Xmatrix1 <- data.matrix(Xorigin_centering) sigma1 <- cov(Xmatrix1) # normalizing ------------------------------------------------------------------------ Xorigin_centering_scale <- data.frame(lapply(Xorigin,function(x){((x-mean(x))/sd(x))})) Xmatrix2 <- data.matrix(Xorigin_centering_scale) sigma2 <- cov(Xmatrix2) # Prcomp in r ------------------------------------------------------------------------ pr_c <- prcomp(Xmatrix0, center=T) # Xmatrix1 pr_c$sdev^2 # eigen(sigma1)$values # 분산 설명력 pr_c$rotation # eigen(sigma1)$vectors*-1 # New 축 pr_c$x # Xmatrix1 %*% eigen(sigma1)$vectors*-1 # New 좌표 predict(pr_c) identical(A,B) pr_cs <- prcomp(Xmatrix0, center=T, scale=T) # Xmatrix2 pr_cs$sdev^2 # eigen(sigma2)$values pr_cs$rotation # eigen(sigma2)$vectors*-1 pr_cs$x # Xmatrix2 %*% eigen(sigma2)$vectors
선형변환을 통해, 분산을 최대이고 직교하는 새로운 축을 기준으로 Projection한다.
이를통해 고차원(2차)–> 저차원, 기존 V1,V2에서 PC1,PC2의 새로운 변수추출이 가능하다.
Xorigin <- data.frame(v1=c(22,45,33,54,77,56,40), v2=c(56,58,63,79,78,70,68)) (p <- ggplot(Xorigin) + geom_point(aes(x=v1, y=v2, color="skyblue")) +theme_bw()) Xorigin_centering_scale <- data.frame(lapply(Xorigin,function(x){((x-mean(x))/sd(x))})) Xmatrix2 <- data.matrix(Xorigin_centering_scale) (p_n <- ggplot(Xorigin_centering_scale, aes(x=v1, y=v2)) + geom_point(color='darkblue')+ geom_abline(color='darkblue')) pr_cs <- prcomp(Xorigin, center=T, scale=T) #Xmatrix2 pr_cs$sdev^2 # eigen(sigma2)$values pr_cs$rotation # eigen(sigma2)$vectors*-1 pr_cs$x # Xmatrix2 %*% eigen(sigma2)$vectors (p_pc <- p_n + geom_point(data.frame(pr_cs$x), aes(x=PC1, y=PC2), color='red', shape=6) + geom_hline(yintercept = 0, color= 'red') + theme_bw())