SAS_codes_multivariate_5(hw5)

TEMP·2021년 11월 22일
0

SAS

목록 보기
11/13
post-custom-banner

주성분 분석

Principal component analysis; PCA

NLP Embedding 인가?

2.

correlation matrix given

위와 같이 상관계수 행렬이 주어졌을 때 주성분 분석

data 입력

data Q2;
input y1 - y19;
cards;
1.000 .934 .927 .909 .524 .799 .854 .789 .835 .845 -.458 .917 .939 .953 .895 .691 .327 -.676 .702
.934 1.000 .941 .944 .487 .821 .865 .834 .863 .878 -.496 .942 .961 .954 .899 .652 .305 -.712 .729 
.927 .941 1.000 .933 .543 .856 .886 .846 .862 .863 -.522 .940 .956 .946 .882 .694 .356 -.667 .746
.909 .944 .933 1.000 .499 .833 .889 .885 .850 .881 -.488 .945 .952 .949 .908 .623 .272 -.736 .777
.524 .487 .543 .499 1.000 .703 .719 .253 .462 .567 -.174 .516 .494 .452 .551 .815 .746 -.233 .285 
.799 .821 .856 .833 .703 1.000 .923 .699 .752 .836 -.317 .846 .849 .823 .831 .812 .553 -.504 .499
.854 .865 .886 .889 .719 .923 1.000 .751 .793 .913 -.383 .907 .914 .886 .891 .855 .567 -.502 .592
.789 .834 .846 .885 .253 .699 .751 1.000 .745 .787 -.497 .861 .876 .878 .794 .410 .067 -.758 .793
.835 .863 .862 .850 .462 .752 .793 .745 1.000 .805 -.356 .848 .877 .883 .818 .620 .300 -.666 .671 
.845 .878 .863 .881 .567 .836 .913 .787 .805 1.000 -.371 .902 .901 .891 .848 .712 .384 -.629 .668 
-.458 -.496 -.522 -.488 -.174 -.317 -.383 -.497 -.356 -.371 1.000 -.465 -.447 -.439 -.405 -.198 -.032 .492 -.425 
.917 .942 .940 .945 .516 .846 .907 .861 .848 .902 -.465 1.000 .981 .971 .908 .725 .396 -.657 .696 
.939 .961 .956 .952 .494 .849 .914 .876 .877 .901 -.447 .981 1.000 .991 .920 .714 .360 -.655 .724
.953 .954 .946 .949 .452 .823 .886 .878 .883 .891 -.439 .971 .991 1.000 .921 .676 .298 -.678 .731 
.895 .899 .882 .908 .551 .831 .891 .794 .818 .848 -.405 .908 .920 .921 1.000 .720 .378 -.633 .694 
.691 .652 .694 .623 .815 .812 .855 .410 .620 .712 -.198 .725 .714 .676 .720 1.000 .781 -.186 .287 
.327 .305 .356 .272 .746 .553 .567 .067 .300 .384 -.032 .396 .360 .298 .378 .781 1.000 .169 .026
-.676 -.712 -.667 -.736 -.233 -.504 -.502 -.758 -.666 -.629 .492 -.657 -.655 -.678 -.633 -.186 .169 1.000 -.775 
.702 .729 .746 .777 .285 .499 .592 .793 .671 .668 -.425 .696 .724 .731 .694 .287 .026 -.775 1.000 
;
run;

고유치들로만 으로 이루어진 행렬 만들기

proc iml ;
use Q2;
read all into A;
print A;
T = T(A);
print T;
test = T - A;
print test;
val = eigval(A);
vec = eigvec(A);
print val;
print vec;
create values;
append var {val};
close values;
run; 
quit;

고유치를 이용하여 누적률 계산하기

proc means data = values sum;
run;
/*19.0000000*/
data h;
set values;
percent = val / 19;
retain cumul 0;
cumul = cumul + percent;
keep percent cumul val;
proc print;
run;

4.

data가 주어졌을 때 주성분 분석하기

data 입력

data Q4;
input y1 - y11;
cards;
84 65 147 85 59 151 95 40 398 273 30
84 65 149 86 61 159 94 28 345 140 34
79 66 142 83 64 152 94 41 368 318 33
81 67 147 83 65 158 94 50 406 282 26
84 68 167 88 69 180 93 46 379 311 41
74 66 131 77 67 147 96 73 478 446 4
73 66 131 78 69 159 96 72 462 294 5
75 67 134 84 68 159 95 70 464 313 20
84 68 161 89 71 195 95 63 430 455 31
86 72 169 91 76 206 93 56 406 604 36
88 73 176 91 76 206 94 55 393 610 43
90 74 187 94 76 211 94 51 385 520 47
88 72 171 94 75 211 96 54 405 663 45
58 72 171 92 70 201 95 51 392 467 45
81 69 154 87 68 167 95 61 448 184 11
79 68 149 83 68 162 95 59 436 177 10
84 69 160 87 66 173 95 42 392 173 30
84 70 160 87 68 177 94 44 392 76 29
84 70 168 88 70 169 95 48 396 72 23
77 67 147 83 66 170 97 60 431 183 16
87 67 166 92 67 196 96 44 379 76 37
89 69 171 92 72 199 94 48 393 230 50
89 72 180 94 72 204 95 48 394 193 36
93 72 186 92 73 201 94 47 386 400 54
93 74 188 93 72 206 95 47 389 339 44
94 75 199 94 72 208 96 45 370 172 41
93 74 193 95 73 214 95 50 396 238 45
93 74 196 95 70 210 96 45 380 118 42
96 75 198 95 71 207 93 40 365 93 50
95 76 202 95 69 202 93 39 357 269 48
84 73 173 96 69 173 94 58 418 128 17
91 71 170 91 69 168 94 44 420 423 20
88 72 179 89 70 189 93 50 399 415 15
89 72 179 95 71 210 98 46 389 300 42
91 72 182 96 73 208 95 43 384 193 44
92 74 196 97 75 215 96 46 389 195 41
94 75 192 96 69 198 95 36 380 215 49
96 75 195 95 67 196 97 24 354 185 53
93 76 198 94 75 211 93 43 364 466 53
88 74 188 92 73 198 95 52 405 399 21
88 74 178 90 74 197 95 61 447 232 1
91 72 175 94 70 205 94 42 380 275 44
92 72 190 95 71 209 96 44 379 166 44
92 73 189 96 72 208 93 42 372 189 46
94 75 194 95 71 208 93 43 373 164 47
96 76 202 96 71 208 94 40 368 139 50
;
run;

주성분 분석
공분산으로 하고 싶은면

proc princomp data = Q4 out = cov_out covariance;
var y1 -- y11;
run;

상관계수로 하고 싶으면 ( defalut가 corr )

proc princomp data = Q4 out = cov_out;
var y1 -- y11;
run;

정규성 검토
그 대우명제 이용 하는거

proc univariate data = cov_out;
var prin1 - prin4;
qqplot prin1;
qqplot prin2;
qqplot prin3;
qqplot prin4;
run;
post-custom-banner

0개의 댓글