[R] vector , factor, matrix

JONGYOON JEON·2024년 2월 4일

R

목록 보기

2/4

단축키 TIPs

ctrl enter 실행 후 다음줄
art enter 실행 후 현재줄

ctrl shift c 주석
ctrl d 삭제
ctrl l 콘솔 삭제
ctrl shift f10 R 세션 다시 시작
alt - make <-
alt 화살표 방향대로 움직임
alt shift k 단축키보여줌

벡터 함수

isTRUE(all.equal(sqrt(2)^2, 3))

log(1:5, base=exp(10))
log2(1:5)
log10(1:10)
exp(1:5)

y <- exp(1:5)
log(y)
options("digits") #숫자 자리수 = 7
signif(454334, 2) # 표현 자리수 = 2
round(123123,-3)
round(12.5) #가장 가까운 짝수로감 이상하다
floor(13.4)
ceiling(13.4)
trunc(345.63) #0에 가까운 정수로 변경
inf # 무한대
is.infinite(1/0) #무한대인지?
is.infinite(10^(305:310)) # 1.8*10^308부터 무한대
NaN # not a number 계산결과를 알 수 없음
log(-2) # NaN
is.nan(NaN+3) # TRUE
NA # not available 결측값. 하나의 값임.
is.na(NA+5) # TRUE

z <- c(1,2,3,5,7, NA)
sum(z)
prod(z)
max(z)
min(z)
mean(z)
median(z)
range(z)
var(na.omit(z)) # NA 제외
sd(z, na.rm=TRUE) # NA 제외

v <- c(NA, NA, NA, NA, NA)
sum(v, na.rm=TRUE) # 결측값으로만 구성되어 있을 때 에러가 안뜸. 예상한 것과 다른 결과를 얻을 수 있음

traffic.death <- c(842,848,441,326,789,237,997)
cumsum(traffic.death) # 누적 합
cumprod(traffic.death) # 누적 곱
cummax(traffic.death) # 누적 최대
cummin(traffic.death) # cum은 결측값 제외 적용 안됨.

diff(traffic.death) # 다음 원소랑 차이. 결측 값 제거 안됨.
diff(1:5, lag=2) # lag 떨어진거리

p <- 1:10
q <- 6:15
union(p,q) # 합집합
intersect(p,q) # 교집합
setdiff(p,q) # 차집합
setequal(p,q) # 같은지?
is.element(setdiff(p,q), p) # 안에 있는지?
is.element(setdiff(p,q), q) # 안에 있는지?

벡터 인덱싱

num <- 0:30
num[1]

prime <-c(2,3,5,7,11,13,17,19)
prime[c(7,6,5,7)]

indices <- c(1,3,5,7)
prime[indices]
prime[-1] # 첫번째 숫자는 제거 후 출력

prime[-length(prime)] # length 로 마지막 원소 제외

prime[11] = 23 # 더 먼 인덱싱을 지정하면 그앞의 숫자들은 NA처리
prime[2] = 4
prime[ prime < 10]
prime[prime%%2 == 0]

prime[seq_along(prime) %% 2 == 0] # 매 2번 째 원소 추출
prime[c(FALSE, FALSE, TRUE)] # 매 3번째 원소 추출

rainfall <- c(21.6,23.6,45.8,77.0,102,133.3,327.9,348.0,137.6,49.3,53.0,24.9)
which.max(rainfall) # TURE의 인덱스를 반환
which(rainfall > 100) # 조건에 맞는 인덱스 반환
month.name[which.max(rainfall)]
month.abb[which.min(rainfall)]
rainfall[rainfall>100] # 강수량이 100보다 큰 날의 강수량

names(traffic.death) <- c("월","화","수","목","금", "토","일")
#인덱스에 이름 부여 names()
traffic.death["월"]
traffic.death[traffic.death>800] # 요일과 인덱싱 추출
names(traffic.death[traffic.death>800]) # 요일만 추출하기

factor()

벡터의 "" 마크 사라지고 levels 이 생김

review <- c("good", "good", "indifferent", "bad", "good", "bad")
review.factor <- factor(review)

str(review.factor) # 리뷰 펙터 들여다 보기
as.numeric(review.factor) # 레벨 순서로 보기
#레벨 순서 안정해주면 알파벳순
fator(everyday, levels=c("a","b"))
levels(review.factor) <- c("B", "G", "I")
nlevels(review.factor) # level 개수 보기
length((levels(review.factor))) # level 개수 보기

eval <- c("Medium", "Low", "High", "Medium","High")
eval.factor <- factor(eval)
eval.ordered <- factor(eval, levels=c("Low", "Medium", "High", ordered=TRUE))
#레벨 순서 정해주기. 서열 펙터. 범주형 데이터
table(eval.ordered) # 할당 숫자 요약 보기

sex <- c(2,1,2,2,1,0)
sex.factor <- factor(sex, levels=c(1,2), labels = c("Male","Fefale"))
table(sex.factor) # labels 할당하기

forcats 라이브러리

library(forcats)
food <- factor(c("Vegetables", "Fruits" ,"Vegetables", "Grains", "Fruits", "Vegetables", "Dairy", "Fruits", "Proteins", "Fruits"))
fct_inorder(food) #변수 나온 순서대로 레벨링 (기본값은 알파벳순)
fct_infreq(food) # 많이 나온 대로 레벨링
fct_relevel(food, "Proteins") #지정한거 먼저 레벨링
fct_relevel(food, "Proteins", after=2) # 지정 순서에 맞게 레벨링
fct_relevel(food, "Proteins", after=Inf) #마지막으로 레벨링

value <- c(100,1500,1200,700,2000, 2000,1350,2500,15000,3000)
fct_reorder(food, .x=value) #음식 단가로 레벨링 (기존 오름차순)
#변수 하나의 값이 여러개면 중위값으로 사용됨
fct_reorder(food, .x=value, .fun = mean, .desc=TRUE) #변수 값 평균으로 재정렬. 내림차순

fct_recode(food, Fats="Proteins", Fats="Dairy") #변수명 변경

행렬 matrix

! 2차원 벡터
ex. matrix(data = NA, nrow = 1, ncol = 1, byrow = FALSE, dimnames = NULL)

v <- 1:12
dim(v) <- c(3,4) #차원 만들기
matrix(data=v, nrow=3, ncol=4) #기본 열의 방향으로 입력
matrix(data=v, nrow=3, ncol=4, byrow=TRUE) #행의 방향으로 데이터 입력
rname <- c("R1","R2","R3")
cname <- c("C1","C2","C3","C4")
matrix(data=v, nrow=3, ncol=4, dimnames = list(rname,cname)) #행 열 이름 지정

mat <- matrix(v, ncol = 3) # 열 개수로 자동 행 생성
str(mat) #구조 보기
dim(mat)[1] # 행 열 숫자
nrow(mat) #행 숫자
ncol(mat) #열 숫자
length(mat) #셀의 개수

#벡터로 행렬 만들기
v1 <- c(1,2,3,4,5)
v2 <- c(6,7,8,9,10)
rbind(v1, v2)
cbind(v1, v2)
rbind(matrix(1:12,3,4), matrix(10:21,3,4))

paste("a",10)
paste("a",10,sep="")

#행렬연산
mtx <- matrix(1:6,2,3)
mtx + 1
mtx * 3
mtx %% 2

a <- matrix(1:6,2,3)
b <- matrix(6:1,2,3)
a+b
a%*%b #규칙 안맞음. 에러 발생

c <- matrix(6:1,3,2)
ab #행끼리 대응되는 값의 곱
a%%c #행의 곱

mtx %% 1:3
a %% 1:3 #벡터로 인식하여 계산됨. 행/열은 알아서 맞춤

a %*% 2:3 #규칙 안맞음. 에러 발생

a %*% 3:1 #벡터로 인식하여 계산됨.

1:2 %*% a # 이건 가능해짐

mtx <- matrix(1:6,2,3)
mtx + 1:3 #재활용 규칙 발생

rowSum(a) # 행의합
rowSums(a) # 행의합
colSums(a) # 열의 합
rowMeans(a) # 행의 평균
colMeans(a) # 열의 평균

t(a)# 전치행렬
mtx[2,]
mtx[1,]
t(mtx[2,])

행렬 인덱싱

mat <- matrix(1:12,3,4)
str(mat)
mat[1,]
mat[,1]
mat[,3]

#이럴 경우 t() 역의 행렬을 뽑으면 오류가 발생할 수 있음. 행렬이 아니게 됨.
mat[1,,drop=T]

#이럴 경우 t() 역의 행렬을 뽑으면 오류가 발생할 수 있음
mat[1,,drop=FALSE] #drop을 FALSE로 하면 행렬을 유지한다.

mat[2:3,]
mat[1:2,2:3]
mat[c(1,3),]
mat[-1,]
mat[1,3] <- 99
mat[2,] <- c(22,55)

city.distance <- c(0,331,238,269,195,331,0,95,194,189,238,95,0,171,130,269,194,171,0,77,195,189,130,77,0)

city.distance.mat <- matrix(city.distance,5,5, byrow=T)

colnames(city.distance.mat) <- c("Seoul", "Busan", "Daegu","Gwangju","Jeonju")
rownames(city.distance.mat) <- c("Seoul", "Busan", "Daegu","Gwangju","Jeonju")

rownames(city.distance.mat)
cownames(city.distance.mat)
colnames(city.distance.mat)
city.distance.mat["Seoul", "Busan"]
city.distance.mat["Seoul",]
city.distance.mat[c("Seoul","Gwangju"),]

JONGYOON JEON

효율적인 걸 좋아해요

이전 포스트

[R] 기초문법

다음 포스트