v1 <- c(1, 2, 3)
v1
[1] 1 2 3
v2 <- c(4:6)
v2
[1] 4 5 6
v3 <- seq(1, 7)
v3
[1] 1 2 3 4 5 6 7
v4 <- seq(1, 18, by = 2) # 2씩 증가하는 숫자
v4
[1] 1 3 5 7 9 11 13 15 17
var_date1 <- as.Date("2023-05-05")
var_date1
[1] "2023-05-05"
var_date2 <- as.Date("23/05/05")
var_date2
[1] "0023-05-05" # ???
# 🪄 포맷을 지정해줘야 함!
var_date2 <- as.Date("23/05/05", format = "%y/%m/%d")
var_date2
[1] "2023-05-05"
# matrix(입력 값, 행, 렬)
m1 <- matrix(1:12, 3, 4)
m1
[,1] [,2] [,3] [,4]
[1,] 1 4 7 10
[2,] 2 5 8 11
[3,] 3 6 9 12
m2 <- matrix(c("a", "b", "c", "d"), 2, 2)
m2
[,1] [,2]
[1,] "a" "c"
[2,] "b" "d"
a1 <- array(1:12, c(2, 3, 2))
a1
, , 1
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6
, , 2
[,1] [,2] [,3]
[1,] 7 9 11
[2,] 8 10 12
a2 <- array(1:12, c(1, 3, 4))
a2
, , 1
[,1] [,2] [,3]
[1,] 1 2 3
, , 2
[,1] [,2] [,3]
[1,] 4 5 6
, , 3
[,1] [,2] [,3]
[1,] 7 8 9
, , 4
[,1] [,2] [,3]
[1,] 10 11 12
df1 <- data.frame(id = 1:3,
+ first_name = c("Minji", "Sara", "Jahee"),
+ last_name = c("Kim", "Lee", "Park"))
df1
id first_name last_name
1 1 Minji Kim
2 2 Sara Lee
3 3 Jahee Park
df2 <- data.frame(v1, v2)
df2
v1 v2
1 1 a
2 2 b
3 3 c
df4 <- data.frame(v1, v2, v3)
Error in data.frame(v1, v2, v3) :
arguments imply differing number of rows: 3, 5
# 데이터는 모두 같은 길이를 가져야 함
# 길이가 다른 데이터를 한 데이터프레임 안에 넣으면 에러 발생
l1 <- list(v1, a2, m1, df1)
l1
[[1]]
[1] 1 2 3
[[2]]
, , 1
[,1] [,2] [,3]
[1,] 1 2 3
, , 2
[,1] [,2] [,3]
[1,] 4 5 6
, , 3
[,1] [,2] [,3]
[1,] 7 8 9
, , 4
[,1] [,2] [,3]
[1,] 10 11 12
[[3]]
[,1] [,2] [,3] [,4]
[1,] 1 4 7 10
[2,] 2 5 8 11
[3,] 3 6 9 12
[[4]]
id first_name last_name
1 1 Minji Kim
2 2 Sara Lee
3 3 Jahee Park
csv1 <- read.csv("/Users/hsty9/Documents/R/datas/emp.csv")
head(csv1)
id ename dept_no job_level join_date gender base bonus
1 6353 skim 10 1 2022/07/06 F 4000 400
2 6477 skim2 10 1 2020/06/01 M 3900 400
3 6302 jpark 10 2 2021/05/01 M 3700 300
4 6163 jlee 10 3 2022/09/03 M 4500 500
5 6409 msa 10 3 2020/03/18 F 4000 500
6 6018 rnoh 10 3 2021/11/20 M 3500 300
csv2 <- read.csv("/Users/hsty9/documents/R/datas/emp2.csv", header = F)
head(csv2)
V1 V2 V3 V4 V5 V6 V7 V8
1 6353 skim 10 1 2022/07/06 F 4000 400
2 6477 skim2 10 1 2020/06/01 M 3900 390
3 6302 jpark 10 1 2021/05/01 M 3700 370
4 6162 ylee 20 1 2020/01/02 M 4400 440
5 6003 dkoh 30 1 2021/11/11 F 3600 360
6 6081 hryu 40 1 2022/05/05 F 3200 320
names(csv2)
[1] "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8"
names(csv2) = c("id", "ename", "dept_no", "level", "join_date",
+ "gender", "base", "bonus")
head(csv2)
id ename dept_no level join_date gender base bonus
1 6353 skim 10 1 2022/07/06 F 4000 400
2 6477 skim2 10 1 2020/06/01 M 3900 390
3 6302 jpark 10 1 2021/05/01 M 3700 370
4 6162 ylee 20 1 2020/01/02 M 4400 440
5 6003 dkoh 30 1 2021/11/11 F 3600 360
6 6081 hryu 40 1 2022/05/05 F 3200 320
names(csv2)[1]
[1] "id"
names(csv2)[4] <- "job_level"
head(csv2)
id ename dept_no job_level join_date gender base bonus
1 6353 skim 10 1 2022/07/06 F 4000 400
2 6477 skim2 10 1 2020/06/01 M 3900 390
3 6302 jpark 10 1 2021/05/01 M 3700 370
4 6162 ylee 20 1 2020/01/02 M 4400 440
5 6003 dkoh 30 1 2021/11/11 F 3600 360
6 6081 hryu 40 1 2022/05/05 F 3200 320
> emp
id ename dept_no job_level join_date gender base bonus
1 6353 skim 10 1 2022/07/06 F 4000 400
2 6477 skim2 10 1 2020/06/01 M 3900 400
3 6302 jpark 10 2 2021/05/01 M 3700 300
4 6163 jlee 10 3 2022/09/03 M 4500 500
5 6409 msa 10 3 2020/03/18 F 4000 500
6 6018 rnoh 10 3 2021/11/20 M 3500 300
7 6681 slee 10 4 2021/09/24 M 5000 500
8 6531 jpark 10 5 2020/09/22 F 4000 400
9 6162 ylee 20 1 2020/01/02 M 4400 400
10 6018 jlee 20 3 2022/06/20 F 3800 300
11 6335 jlee2 20 3 2023/01/05 F 4700 500
12 6700 mkwon 20 4 2023/03/18 M 7000 600
13 6252 hpark 20 5 2020/06/01 M 5600 400
14 6003 dkoh 30 2 2021/11/11 F 3600 400
15 6224 yma 30 5 2021/10/10 F 5200 500
16 6081 hryu 40 2 2022/05/05 F 3200 300
17 6484 jchae 40 3 2022/12/01 M 3400 500
18 6195 mkwon 40 3 2022/05/09 M 3300 400
19 6228 noh 40 4 2020/06/01 F 5000 500
20 6670 bjin 40 5 2021/04/08 M 6000 600
> emp$total <- emp$base + emp$bonus
> head(emp)
id ename dept_no job_level join_date gender base bonus total
1 6353 skim 10 1 2022/07/06 F 4000 400 4400
2 6477 skim2 10 1 2020/06/01 M 3900 400 4300
3 6302 jpark 10 2 2021/05/01 M 3700 300 4000
4 6163 jlee 10 3 2022/09/03 M 4500 500 5000
5 6409 msa 10 3 2020/03/18 F 4000 500 4500
6 6018 rnoh 10 3 2021/11/20 M 3500 300 3800
> emp$country <- "Korea"
> head(emp)
id ename dept_no job_level join_date gender base bonus total country
1 6353 skim 10 1 2022/07/06 F 4000 400 4400 Korea
2 6477 skim2 10 1 2020/06/01 M 3900 400 4300 Korea
3 6302 jpark 10 2 2021/05/01 M 3700 300 4000 Korea
4 6163 jlee 10 3 2022/09/03 M 4500 500 5000 Korea
5 6409 msa 10 3 2020/03/18 F 4000 500 4500 Korea
6 6018 rnoh 10 3 2021/11/20 M 3500 300 3800 Korea
> emp$pos <- ifelse(emp$job_level < 3, "junior",
+ ifelse(emp$job_level < 5, "intermediate", "senior"))
> emp$pos <- ifelse(emp$job_level %in% c(1,2), "junior",
+ ifelse(emp$job_level %in% c(3,4), "intermediate", "senior"))
> emp$pos <- ifelse(emp$job_level == 1 | emp$job_level == 2, "junior",
+ ifelse(emp$job_level == 3 | emp$job_level == 4, "intermediate", "senior"))
> emp
id ename dept_no job_level join_date gender base bonus total country city special_bonus
1 6353 skim 10 1 2022/07/06 F 4000 400 4400 Korea Seoul 0
2 6477 skim2 10 1 2020/06/01 M 3900 400 4300 Korea Seoul 0
3 6302 jpark 10 2 2021/05/01 M 3700 300 4000 Korea Seoul 0
4 6163 jlee 10 3 2022/09/03 M 4500 500 5000 Korea Seoul 1000
5 6409 msa 10 3 2020/03/18 F 4000 500 4500 Korea Seoul 1000
6 6018 rnoh 10 3 2021/11/20 M 3500 300 3800 Korea Seoul 1000
7 6681 slee 10 4 2021/09/24 M 5000 500 5500 Korea Seoul 0
8 6531 jpark 10 5 2020/09/22 F 4000 400 4400 Korea Seoul 0
9 6162 ylee 20 1 2020/01/02 M 4400 400 4800 Korea Incheon 0
10 6018 jlee 20 3 2022/06/20 F 3800 300 4100 Korea Incheon 1000
11 6335 jlee2 20 3 2023/01/05 F 4700 500 5200 Korea Incheon 1000
12 6700 mkwon 20 4 2023/03/18 M 7000 600 7600 Korea Incheon 0
13 6252 hpark 20 5 2020/06/01 M 5600 400 6000 Korea Incheon 0
14 6003 dkoh 30 2 2021/11/11 F 3600 400 4000 Korea Seoul 0
15 6224 yma 30 5 2021/10/10 F 5200 500 5700 Korea Seoul 0
16 6081 hryu 40 2 2022/05/05 F 3200 300 3500 Korea Seoul 1000
17 6484 jchae 40 3 2022/12/01 M 3400 500 3900 Korea Seoul 1000
18 6195 mkwon 40 3 2022/05/09 M 3300 400 3700 Korea Seoul 1000
19 6228 noh 40 4 2020/06/01 F 5000 500 5500 Korea Seoul 1000
20 6670 bjin 40 5 2021/04/08 M 6000 600 6600 Korea Seoul 1000
pos
1 junior
2 junior
3 junior
4 intermediate
5 intermediate
6 intermediate
7 intermediate
8 senior
9 junior
10 intermediate
11 intermediate
12 intermediate
13 senior
14 junior
15 senior
16 junior
17 intermediate
18 intermediate
19 intermediate
20 senior
write.csv(emp, file = "/Users/hsty9/Documents/R/datas/csv1_dup.csv")
이 글은 패스트캠퍼스 데이터 분석 Master Class의 강의자료 일부를 발췌하여 작성되었습니다.