링크 : https://cafe.naver.com/yjbooks/42520
data url = https://raw.githubusercontent.com/Datamanim/datarepo/main/nba/nba.csv
main.ds <- read.csv(
file = "https://raw.githubusercontent.com/Datamanim/datarepo/main/nba/nba.csv",
stringsAsFactor = TRUE,
header = TRUE,
encoding = "UTF-8-BOM",
sep = ';'
)
head(main.ds)
> head(main.ds)
Rk Player Pos Age Tm G GS MP FG FGA FG. X3P X3PA
1 1 Precious Achiuwa C 22 TOR 73 28 23.6 3.6 8.3 0.439 0.8 2.1
2 2 Steven Adams C 28 MEM 76 75 26.3 2.8 5.1 0.547 0.0 0.0
3 3 Bam Adebayo C 24 MIA 56 56 32.6 7.3 13.0 0.557 0.0 0.1
4 4 Santi Aldama PF 21 MEM 32 0 11.3 1.7 4.1 0.402 0.2 1.5
5 5 LaMarcus Aldridge C 36 BRK 47 12 22.3 5.4 9.7 0.550 0.3 1.0
6 6 Nickeil Alexander-Walker SG 23 TOT 65 21 22.6 3.9 10.5 0.372 1.6 5.2
X3P. X2P X2PA X2P. eFG. FT FTA FT. ORB DRB TRB AST STL BLK TOV PF
1 0.359 2.9 6.1 0.468 0.486 1.1 1.8 0.595 2.0 4.5 6.5 1.1 0.5 0.6 1.2 2.1
2 0.000 2.8 5.0 0.548 0.547 1.4 2.6 0.543 4.6 5.4 10.0 3.4 0.9 0.8 1.5 2.0
3 0.000 7.3 12.9 0.562 0.557 4.6 6.1 0.753 2.4 7.6 10.1 3.4 1.4 0.8 2.6 3.1
4 0.125 1.5 2.6 0.560 0.424 0.6 1.0 0.625 1.0 1.7 2.7 0.7 0.2 0.3 0.5 1.1
5 0.304 5.1 8.8 0.578 0.566 1.9 2.2 0.873 1.6 3.9 5.5 0.9 0.3 1.0 0.9 1.7
6 0.311 2.3 5.3 0.433 0.449 1.2 1.7 0.743 0.6 2.3 2.9 2.4 0.7 0.4 1.4 1.6
PTS
1 9.1
2 6.9
3 19.1
4 4.1
5 12.9
6 10.6
# 1. Tm 컬럼은 각 팀의 이름을 의미한다. TOR팀의 평균나이를 소수 4째 자리까지 구하여라
library(dplyr)
ds1 <- main.ds
ds1.TOR <- ds1 %>% filter(Tm == "TOR") %>% summarise( avg = round(mean(Age, na.rm = TRUE) , 4))
resutl1 <- ds1.TOR
print(resutl1)
> print(resutl1)
avg
1 24.8696
그들 중 가장 많은 포지션은 무엇인지 확인하라
ds2 <- main.ds
ds2$Pos <- as.character(ds2$Pos)
# 오름차순으로 정렬
ds2 <- ds2[ order(ds2$Age) ,]
ds2.10 <- ds2 %>% filter( min(ds2$Age) == ds2$Age )
# 가장 자주나온 Pos확인
# 각 포지션 별 count
ds2.10 <- cbind(ds2.10, count = 1)
# 각 Pos별 갯수
ds2.agg <- aggregate(data = ds2.10, count ~ Pos, FUN = sum)
result2 <- ds2.agg[ ds2.agg$count == max(ds2.agg$count), ]
result2 <- result2$Pos
print(result2)
> print(result2)
[1] "SG"
# 3. 선수들의 이름은 first_name+ 공백 + last_name으로 이루어져 있다.
# 가장 많은 first_name은 무엇이며 몇 회 발생하는지 확인하라
library(stringr)
ds3 <- main.ds
temp <- ds3$Player %>% str_split(pattern=" ", simplify = TRUE)
head(temp)
ds3$first_name <- temp[,1] %>% as.factor()
ds3$last_name <- temp[,2] %>% as.factor()
tb <- table(ds3$first_name)
tb <- sort(tb, decreasing = TRUE)
result3 <- tb[1]
print( names(result3) )
> print( names(result3) )
[1] "Justin"
# 4. PTS컬럼은 경기당 평균득점수 이다. 각포지션별로 경기당 평균득점수의 평균을 구하여라
ds4 <- main.ds
ds4 <- aggregate(
data = ds4,
PTS ~ Pos,
mean
)
result4 <- ds4
result4
print(result4)
> print(result4)
Pos PTS
1 C 7.690769
2 C-PF 12.850000
3 PF 7.737500
4 PF-SF 8.500000
5 PG 8.325161
6 PG-SG 22.000000
7 SF 7.270253
8 SF-SG 8.660000
9 SG 7.810553
10 SG-PG 9.525000
11 SG-SF 7.360000
ds5 <- main.ds
str(ds5)
ds5.agg <- aggregate(
data = ds5,
G ~ Tm,
FUN = max
)
result5 <- mean(ds5.agg$G)
print(result5)
> print(result6)
[1] 16.7875
ds6 <- main.ds
ds6.temp <- ds6 %>% filter(Tm == "MIA" & (Pos == "C" | Pos == "PF") )
head(ds6.temp)
result6 <- mean(ds6.temp$MP)
print(result6)
> print(result6)
[1] 16.7875
ds7 <- main.ds
str(ds7)
m <- mean(ds7$G)
m <- m * 1.5
ds7.temp <- ds7 %>% filter( G >= m ) %>% select( c('X3P'))
ds7.temp
result7 <- mean(ds7.temp$X3P)
print(result7)
> print(result7)
[1] 1.385366
ds8 <- main.ds
m <- mean(ds8$Age)
g1 <- ds8 %>% filter(Age >= m)
g2 <- ds8 %>% filter(Age < m)
g1 <- mean(g1$G)
g2 <- mean(g2$G)
result8 <- abs(g1 - g2)
print(result8)
> print(result8)
[1] 3.787675
ds9 <- main.ds
ds9.agg <- aggregate(
data = ds9,
Age ~ Tm,
mean
)
ds9.agg <- ds9.agg[ order(ds9.agg$Age),]
result9 <- ds9.agg[1,]
result9$Tm <- as.character(result9$Tm)
result9$Tm
result9 <- result9$Tm
print(result9)
> print(result9)
[1] "MEM"
ds10 <- main.ds
ds10.agg <- aggregate(
data = ds10,
MP ~ Pos,
mean
)
result10 <- ds10.agg
print(result10)
> print(result10)
Pos MP
1 C 16.99000
2 C-PF 25.35000
3 PF 17.93750
4 PF-SF 27.30000
5 PG 19.54774
6 PG-SG 37.20000
7 SF 17.51456
8 SF-SG 20.34000
9 SG 18.55427
10 SG-PG 22.95000
11 SG-SF 19.62000