from sklearn.cluster import KMeans
import pandas as pd
# 예제 데이터
data = {'x': [1, 2, 1, 4, 5, 4, 6, 5, 8, 9, 8, 10],
'y': [1, 1, 2, 4, 4, 5, 7, 8, 1, 2, 0, 1]}
df = pd.DataFrame(data)
# Kmeans 군집화 모델 (군집 갯수 k = 3)
kmeans = KMeans(n_clusters=3, random_state=42)
# 군집화 적용
kmeans.fit(df)
# 결과를 데이터프레임에 추가
df['cluster'] = kmeans.labels_
print(df)
→ 반환
| index | x | y | cluster |
|---|---|---|---|
| 0 | 1 | 1 | 2 |
| 1 | 2 | 1 | 2 |
| 2 | 1 | 2 | 2 |
| 3 | 4 | 4 | 0 |
| 4 | 5 | 4 | 0 |
| 5 | 4 | 5 | 0 |
| 6 | 6 | 7 | 0 |
| 7 | 5 | 8 | 0 |
| 8 | 8 | 1 | 1 |
| 9 | 9 | 2 | 1 |
| 10 | 8 | 0 | 1 |
| 11 | 10 | 1 | 1 |
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
# 예시 데이터 생성
data = {
'x': [1, 2, 3, 8, 9, 10],
'y': [1, 2, 3, 8, 9, 10],
'cluster': [0, 0, 0, 1, 1, 1]
}
# DataFrame 생성
df = pd.DataFrame(data)
# 시각화
sns.lmplot(x='x', y='y', data=df, hue='cluster', fit_reg=False, legend=True)
plt.title('K-means Clustering')
plt.show()

from sklearn.cluster import KMeans
from sklearn.datasets import load_iris
import pandas as pd
# Iris 데이터 로딩
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
# Kmeans 모델 (군집 갯수 k=3)
kmeans = KMeans(n_clusters=3, random_state=42)
# 군집화
kmeans.fit(df)
df['cluster'] = kmeans.labels_
print(df.head())
→ 반환
sepal length (cm) sepal width (cm) ... petal width (cm) cluster
0 5.1 3.5 ... 0.2 1
1 4.9 3.0 ... 0.2 1
2 4.7 3.2 ... 0.2 1
3 4.6 3.1 ... 0.2 1
4 5.0 3.6 ... 0.2 1
[5 rows x 5 columns]
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import load_iris
import pandas as pd
# Iris 데이터 로딩
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
# Kmeans 모델 (군집 갯수 k=3)
kmeans = KMeans(n_clusters=3, random_state=42)
df['cluster'] = kmeans.fit_predict(df)
# 시각화
plt.scatter(df['petal length (cm)'], df['petal width (cm)'], c=df['cluster'], cmap='viridis')
plt.xlabel('petal length (cm)')
plt.ylabel('petal width (cm)')
plt.title('K-means Clustering of Iris Dataset')
plt.colorbar(label='Cluster')
plt.show()

✅ K-means 주요 특징
✅ 활용 분야
✅ 장단점