Pandas 와 Numpy 를 활용하여 Rating Matrix 와 Frequent Matrix 를 만드는 과제 <- 파일 다운로드 가능!
https://github.com/TEAMLAB-Lecture/AI-python-connect/blob/master/lab_assignments/lab_3/README.md
import numpy as np
import pandas as pd
def get_rating_matrix(filename, dtype=np.float32):
df = pd.read_csv(filename)
return df.groupby(["source", "target"])["rating"].sum().unstack().fillna(0)
get_rating_matrix("movie_rating.csv")
target | Just My Luck | Lady in the Water | Snakes on a Plane | Superman Returns | The Night Listener | You Me and Dupree |
---|---|---|---|---|---|---|
source | ||||||
Claudia Puig | 3.0 | 0.0 | 3.5 | 0.0 | 4.5 | 0.0 |
Gene Seymour | 0.0 | 3.0 | 3.5 | 0.0 | 3.0 | 3.5 |
Jack Matthews | 0.0 | 3.0 | 4.0 | 5.0 | 3.0 | 3.5 |
Lisa Rose | 3.0 | 2.5 | 3.5 | 3.5 | 3.0 | 2.5 |
Mick LaSalle | 2.0 | 3.0 | 4.0 | 3.0 | 3.0 | 0.0 |
Toby | 0.0 | 0.0 | 4.5 | 4.0 | 0.0 | 0.0 |
def get_frequent_matrix(filename, dtype=np.float32):
df = pd.read_csv(filename)
df["rating"]=1 #"rating"이 없기 때문에 모두 1로 이루어진 rating column을 생성해줌
return df.groupby(["source", "target"])["rating"].sum().unstack().fillna(0)
get_frequent_matrix("1000i.csv")
target | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
source | |||||||||||||||||||||
1 | 19 | 17 | 14 | 11 | 17 | 25 | 7 | 22 | 5 | 18 | ... | 15 | 14 | 20 | 9 | 12 | 16 | 11 | 9 | 11 | 12 |
2 | 20 | 16 | 10 | 15 | 17 | 18 | 10 | 13 | 5 | 19 | ... | 13 | 12 | 15 | 9 | 13 | 16 | 16 | 10 | 16 | 9 |
3 | 12 | 16 | 13 | 19 | 23 | 19 | 5 | 14 | 5 | 18 | ... | 10 | 14 | 10 | 17 | 15 | 16 | 11 | 17 | 9 | 11 |
4 | 14 | 14 | 19 | 11 | 11 | 18 | 7 | 16 | 7 | 17 | ... | 9 | 16 | 18 | 12 | 16 | 16 | 26 | 16 | 12 | 20 |
5 | 13 | 7 | 8 | 15 | 13 | 16 | 3 | 19 | 11 | 12 | ... | 11 | 10 | 16 | 8 | 13 | 20 | 14 | 18 | 21 | 3 |
5 rows × 50 columns