import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score
df = pd.read_csv('/dir/to/data.csv')
X, y = df.drop('color', axis=1), df.color
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=77)
clf = DecisionTreeClassifier(max_depth=2)
clf.fit(train_X, train_y)
pred_y = clf.predict(test_X)
print(accuracy_score(test_y, pred_y))
plot_tree(clf, feature_names=X.columns)
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score
df = pd.read_csv('/dir/to/data.csv')
X, y = df.drop('color', axis=1), df.color
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=77, stratify=y)
pred_y = clf.predict(test_X)
print(accuracy_score(test_y, pred_y))
plot_tree(clf, feature_names=X.columns)