R - 기본 문법

구너탱·2023년 11월 30일
0

R

목록 보기
1/3
post-thumbnail

📖 기본 문법

📌 여러개의 값을 변수에 넣는 법

v1 <- c(1, 2, 3)
v1
[1] 1 2 3

v2 <- c(4:6)
v2
[1] 4 5 6

v3 <- seq(1, 7)
v3
[1] 1 2 3 4 5 6 7

v4 <- seq(1, 18, by = 2) # 2씩 증가하는 숫자
v4
[1]  1  3  5  7  9 11 13 15 17

📌 날짜형

var_date1 <- as.Date("2023-05-05")
var_date1
[1] "2023-05-05"

var_date2 <- as.Date("23/05/05")
var_date2
[1] "0023-05-05"	# ???

# 🪄 포맷을 지정해줘야 함!
var_date2 <- as.Date("23/05/05", format = "%y/%m/%d")
var_date2
[1] "2023-05-05"

📌 행렬

  • 통계 계산 이나 선형대수 계산에서 많이 사용
# matrix(입력 값, 행, 렬)

m1 <- matrix(1:12, 3, 4)
m1
     [,1] [,2] [,3] [,4]
[1,]    1    4    7   10
[2,]    2    5    8   11
[3,]    3    6    9   12

m2 <- matrix(c("a", "b", "c", "d"), 2, 2)
m2
     [,1] [,2]
[1,] "a"  "c" 
[2,] "b"  "d" 

📌 배열

a1 <- array(1:12, c(2, 3, 2))
a1
, , 1

     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6

, , 2

     [,1] [,2] [,3]
[1,]    7    9   11
[2,]    8   10   12


a2 <- array(1:12, c(1, 3, 4))
a2
, , 1

     [,1] [,2] [,3]
[1,]    1    2    3

, , 2

     [,1] [,2] [,3]
[1,]    4    5    6

, , 3

     [,1] [,2] [,3]
[1,]    7    8    9

, , 4

     [,1] [,2] [,3]
[1,]   10   11   12

📌 데이터 프레임

df1 <- data.frame(id = 1:3,
+                   first_name = c("Minji", "Sara", "Jahee"),
+                   last_name = c("Kim", "Lee", "Park"))
df1
  id first_name last_name
1  1      Minji       Kim
2  2       Sara       Lee
3  3      Jahee      Park

df2 <- data.frame(v1, v2)
df2
  v1 v2
1  1  a
2  2  b
3  3  c

df4 <- data.frame(v1, v2, v3)
Error in data.frame(v1, v2, v3) : 
  arguments imply differing number of rows: 3, 5
# 데이터는 모두 같은 길이를 가져야 함
# 길이가 다른 데이터를 한 데이터프레임 안에 넣으면 에러 발생

📌 리스트

l1 <- list(v1, a2, m1, df1)
l1
[[1]]
[1] 1 2 3

[[2]]
, , 1

     [,1] [,2] [,3]
[1,]    1    2    3

, , 2

     [,1] [,2] [,3]
[1,]    4    5    6

, , 3

     [,1] [,2] [,3]
[1,]    7    8    9

, , 4

     [,1] [,2] [,3]
[1,]   10   11   12


[[3]]
     [,1] [,2] [,3] [,4]
[1,]    1    4    7   10
[2,]    2    5    8   11
[3,]    3    6    9   12

[[4]]
  id first_name last_name
1  1      Minji       Kim
2  2       Sara       Lee
3  3      Jahee      Park

📖 데이터 프레임 조작

📌 csv 호출

csv1 <- read.csv("/Users/hsty9/Documents/R/datas/emp.csv")
head(csv1)
    id ename dept_no job_level  join_date gender base bonus
1 6353  skim      10         1 2022/07/06      F 4000   400
2 6477 skim2      10         1 2020/06/01      M 3900   400
3 6302 jpark      10         2 2021/05/01      M 3700   300
4 6163  jlee      10         3 2022/09/03      M 4500   500
5 6409   msa      10         3 2020/03/18      F 4000   500
6 6018  rnoh      10         3 2021/11/20      M 3500   300

csv2 <- read.csv("/Users/hsty9/documents/R/datas/emp2.csv", header = F)
head(csv2)
    V1    V2 V3 V4         V5 V6   V7  V8
1 6353  skim 10  1 2022/07/06  F 4000 400
2 6477 skim2 10  1 2020/06/01  M 3900 390
3 6302 jpark 10  1 2021/05/01  M 3700 370
4 6162  ylee 20  1 2020/01/02  M 4400 440
5 6003  dkoh 30  1 2021/11/11  F 3600 360
6 6081  hryu 40  1 2022/05/05  F 3200 320

📌 컬럼명 변경

names(csv2)
[1] "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8"

names(csv2) = c("id", "ename", "dept_no", "level", "join_date",
+                 "gender", "base", "bonus")
head(csv2)
    id ename dept_no level  join_date gender base bonus
1 6353  skim      10     1 2022/07/06      F 4000   400
2 6477 skim2      10     1 2020/06/01      M 3900   390
3 6302 jpark      10     1 2021/05/01      M 3700   370
4 6162  ylee      20     1 2020/01/02      M 4400   440
5 6003  dkoh      30     1 2021/11/11      F 3600   360
6 6081  hryu      40     1 2022/05/05      F 3200   320

names(csv2)[1]
[1] "id"

names(csv2)[4] <- "job_level"
head(csv2)
    id ename dept_no job_level  join_date gender base bonus
1 6353  skim      10         1 2022/07/06      F 4000   400
2 6477 skim2      10         1 2020/06/01      M 3900   390
3 6302 jpark      10         1 2021/05/01      M 3700   370
4 6162  ylee      20         1 2020/01/02      M 4400   440
5 6003  dkoh      30         1 2021/11/11      F 3600   360
6 6081  hryu      40         1 2022/05/05      F 3200   320

📌 파생 변수

> emp
     id ename dept_no job_level  join_date gender base bonus
1  6353  skim      10         1 2022/07/06      F 4000   400
2  6477 skim2      10         1 2020/06/01      M 3900   400
3  6302 jpark      10         2 2021/05/01      M 3700   300
4  6163  jlee      10         3 2022/09/03      M 4500   500
5  6409   msa      10         3 2020/03/18      F 4000   500
6  6018  rnoh      10         3 2021/11/20      M 3500   300
7  6681  slee      10         4 2021/09/24      M 5000   500
8  6531 jpark      10         5 2020/09/22      F 4000   400
9  6162  ylee      20         1 2020/01/02      M 4400   400
10 6018  jlee      20         3 2022/06/20      F 3800   300
11 6335 jlee2      20         3 2023/01/05      F 4700   500
12 6700 mkwon      20         4 2023/03/18      M 7000   600
13 6252 hpark      20         5 2020/06/01      M 5600   400
14 6003  dkoh      30         2 2021/11/11      F 3600   400
15 6224   yma      30         5 2021/10/10      F 5200   500
16 6081  hryu      40         2 2022/05/05      F 3200   300
17 6484 jchae      40         3 2022/12/01      M 3400   500
18 6195 mkwon      40         3 2022/05/09      M 3300   400
19 6228   noh      40         4 2020/06/01      F 5000   500
20 6670  bjin      40         5 2021/04/08      M 6000   600

> emp$total <- emp$base + emp$bonus
> head(emp)
    id ename dept_no job_level  join_date gender base bonus total
1 6353  skim      10         1 2022/07/06      F 4000   400  4400
2 6477 skim2      10         1 2020/06/01      M 3900   400  4300
3 6302 jpark      10         2 2021/05/01      M 3700   300  4000
4 6163  jlee      10         3 2022/09/03      M 4500   500  5000
5 6409   msa      10         3 2020/03/18      F 4000   500  4500
6 6018  rnoh      10         3 2021/11/20      M 3500   300  3800

> emp$country <- "Korea"
> head(emp)
    id ename dept_no job_level  join_date gender base bonus total country
1 6353  skim      10         1 2022/07/06      F 4000   400  4400   Korea
2 6477 skim2      10         1 2020/06/01      M 3900   400  4300   Korea
3 6302 jpark      10         2 2021/05/01      M 3700   300  4000   Korea
4 6163  jlee      10         3 2022/09/03      M 4500   500  5000   Korea
5 6409   msa      10         3 2020/03/18      F 4000   500  4500   Korea
6 6018  rnoh      10         3 2021/11/20      M 3500   300  3800   Korea

📌 중첩 조건문

> emp$pos <- ifelse(emp$job_level < 3, "junior",
+                   ifelse(emp$job_level < 5, "intermediate", "senior"))
> emp$pos <- ifelse(emp$job_level %in% c(1,2), "junior",
+                       ifelse(emp$job_level %in% c(3,4), "intermediate", "senior"))
> emp$pos <- ifelse(emp$job_level == 1 | emp$job_level == 2, "junior",
+                   ifelse(emp$job_level == 3 | emp$job_level == 4, "intermediate", "senior"))
> emp
     id ename dept_no job_level  join_date gender base bonus total country    city special_bonus
1  6353  skim      10         1 2022/07/06      F 4000   400  4400   Korea   Seoul             0
2  6477 skim2      10         1 2020/06/01      M 3900   400  4300   Korea   Seoul             0
3  6302 jpark      10         2 2021/05/01      M 3700   300  4000   Korea   Seoul             0
4  6163  jlee      10         3 2022/09/03      M 4500   500  5000   Korea   Seoul          1000
5  6409   msa      10         3 2020/03/18      F 4000   500  4500   Korea   Seoul          1000
6  6018  rnoh      10         3 2021/11/20      M 3500   300  3800   Korea   Seoul          1000
7  6681  slee      10         4 2021/09/24      M 5000   500  5500   Korea   Seoul             0
8  6531 jpark      10         5 2020/09/22      F 4000   400  4400   Korea   Seoul             0
9  6162  ylee      20         1 2020/01/02      M 4400   400  4800   Korea Incheon             0
10 6018  jlee      20         3 2022/06/20      F 3800   300  4100   Korea Incheon          1000
11 6335 jlee2      20         3 2023/01/05      F 4700   500  5200   Korea Incheon          1000
12 6700 mkwon      20         4 2023/03/18      M 7000   600  7600   Korea Incheon             0
13 6252 hpark      20         5 2020/06/01      M 5600   400  6000   Korea Incheon             0
14 6003  dkoh      30         2 2021/11/11      F 3600   400  4000   Korea   Seoul             0
15 6224   yma      30         5 2021/10/10      F 5200   500  5700   Korea   Seoul             0
16 6081  hryu      40         2 2022/05/05      F 3200   300  3500   Korea   Seoul          1000
17 6484 jchae      40         3 2022/12/01      M 3400   500  3900   Korea   Seoul          1000
18 6195 mkwon      40         3 2022/05/09      M 3300   400  3700   Korea   Seoul          1000
19 6228   noh      40         4 2020/06/01      F 5000   500  5500   Korea   Seoul          1000
20 6670  bjin      40         5 2021/04/08      M 6000   600  6600   Korea   Seoul          1000
            pos
1        junior
2        junior
3        junior
4  intermediate
5  intermediate
6  intermediate
7  intermediate
8        senior
9        junior
10 intermediate
11 intermediate
12 intermediate
13       senior
14       junior
15       senior
16       junior
17 intermediate
18 intermediate
19 intermediate
20       senior

🔹 논리연산자

  • <=, <, >=, >, ==, !=

🔹 산술연산자

  • +, -, *, /, ^, %/% (몫), %% (나머지)

📌 csv 저장

write.csv(emp, file = "/Users/hsty9/Documents/R/datas/csv1_dup.csv")

이 글은 패스트캠퍼스 데이터 분석 Master Class의 강의자료 일부를 발췌하여 작성되었습니다.

profile
데이터 꿈나물

0개의 댓글