의사결정나무

hottogi·2022년 11월 9일
0
library(party)

library(datasets)
str(airquality)

formula <- Temp ~ Solar.R + Wind + Ozone

air_ctree <- ctree(formula, data = airquality)
air_ctree

plot(air_ctree)

set.seed(1234)
idx <- sample(1:nrow(iris), nrow(iris) * 0.7)
train <- iris[idx, ]
test <- iris[-idx, ]

formula <- Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width

iris_ctree <- ctree(formula, data = train)
iris_ctree

plot(iris_ctree, type = "simple")

pred <- predict(iris_ctree, test)
table(pred, test$Species)
(14 + 16 + 13) / nrow(test)

library(cvTools)
cross <- cvFolds(nrow(iris), K = 3, R = 2)

str(cross)
cross
length(cross$which)
dim(cross$subsets)
table(cross$which)

R = 1:2
K = 1:3
CNT = 0
ACC <- numeric()

for(r in R) {
  cat('\n R = ', r, '\n')
  for(k in K) {
    
    datas_ids <- cross$subsets[cross$which == k, r]
    test <- iris[datas_ids, ]
    cat('test : ', nrow(test), '\n')
    
    formual <- Species ~ .
    train <- iris[-datas_ids, ]
    cat('train : ', nrow(train), '\n')
    
    model <- ctree(Species ~ ., data = train)
    pred <- predict(model, test)
    t <- table(pred, test$Species)
    print(t)
    
    CNT <- CNT + 1
    ACC[CNT] <- (t[1, 1] + t[2, 2] + t[3, 3]) / sum(t)
  }
  
}
CNT

ACC
length(ACC)
result_acc <- mean(ACC, na.rm = T)
result_acc

library(ggplot2)
data(mpg)

t <- sample(1:nrow(mpg), 120)
train <- mpg[-t, ]
test <- mpg[t, ]
dim(train)
dim(test)

test$drv <- factor(test$drv)
formula <- hwy ~ displ + cyl + drv
tree_model <- ctree(formula, data = test)
plot(tree_model)

library(arules)
data(AdultUCI)
str(AdultUCI)
names(AdultUCI)

set.seed(1234)
choice <- sample(1:nrow(AdultUCI), 10000)
choice
adult.df <- AdultUCI[choice, ]
str(adult.df)

capital <- adult.df$`capital-gain`
hours <- adult.df$`hours-per-week`
education <- adult.df$`education-num`
race <- adult.df$race
age <- adult.df$age
income <- adult.df$income

adult_df <- data.frame(capital = capital, age = age, race = race, 
                       hours = hours, education = education, income = income)
str(adult_df)

formula <- capital ~ income + education + hours + race + age
adult_ctree <- ctree(formula, data = adult_df)
adult_ctree

plot(adult_ctree)

adultResult <- subset(adult_df, 
                      adult_df$income == 'large' &
                        adult_df$education > 14)
length(adultResult$education)
summary(adultResult$capital)
boxplot(adultResult$capital)

library(party) 
# sampling
str(iris) 
set.seed(1000) 
sampnum <- sample(2, nrow(iris), replace=TRUE, prob=c(0.7,0.3))
sampnum 
# training & testing data 구분
trData <- iris[sampnum==1,] 
head(trData) 
teData <- iris[sampnum == 2, ] 
head(teData)
shortvar <- Species~Sepal.Length+Sepal.Width+Petal.Length+Petal.Width
# 학습
citreeResult <- ctree(shortvar, data=trData)
# 예측값과 실제값 비교
table(predict(citreeResult), trData$Species)
citreeResult2 <- ctree(shortvar, data=teData)
# 테스트 데이터를 이용하여 분류
forcasted2 <- predict(citreeResult2, data=teData)
# forcasted
# teData$Species
# 예측결과와 실제값 비교
table(forcasted2, teData$Species) 
#시각화
plot(citreeResult2)

library(rpart)
library(rpart.plot)

data(iris)

rpart_model <- rpart(Species ~ ., data = iris)
rpart_model

rpart.plot(rpart_model)

weather = read.csv("C:/Rwork/dataset4/weather.csv", header = TRUE)
str(weather)
head(weather)
weather.df <- rpart(RainTomorrow ~ ., data = weather[ , c(-1, -14)], cp = 0.01)
rpart.plot(weather.df)
weather_pred <- predict(weather.df, weather)
weather_pred
weather_pred2 <- ifelse(weather_pred[ , 2] >= 0.5, 'Yes', 'No')

table(weather_pred2, weather$RainTomorrow)
(278 + 53) / nrow(weather)

library(rpart)
CARTTree <- rpart(Species~., data=iris) 
CARTTree
plot(CARTTree, margin=0.2)
text(CARTTree, cex=1)
predict(CARTTree, newdata=iris, type="class")
predicted <- predict(CARTTree, newdata=iris, type="class")
sum(predicted == iris$Species) / NROW(predicted)
real <- iris$Species
table(real, predicted)
profile

0개의 댓글

관련 채용 정보