인덱스 리인덱싱 작업
lecture_count = pd.DataFrame(enroll_detail)
lecture_count = lecture_count.reset_index()
lecture_count = lecture_count.rename(columns = {'user_id':'count'})
lecture_count.tail(5)
Join
lectures = lectures.set_index('lecture_id')
full_lecture = lecture_count.join(lectures, on='lecture_id')
full_lecture.tail(5)
conda install -c conda-forge wordcloud
import numpy as np
from PIL import Image
from wordcloud import WordCloud
import matplotlib.pyplot as plt
result = ""
for number in range(1,15):
index = '{:02d}'.format(number)
filename = "Sequence_" +index + ".txt"
print(filename)
text = open("./data/"+filename, 'r', encoding='utf-8-sig')
result += text.read().replace("\n", " ")
import re
pattern = '[^\w\s]' # 특수 기호 제거
text = re.sub(pattern=pattern, repl='', string=result)
text
import matplotlib.font_manager as fm
- 이용 가능한 폰트 중 '고딕'만 선별
for f in fm.fontManager.ttflist:
if 'Gothic' in f.name:
print(f.fname)
font_path = '/System/Library/Fonts/Supplemental/AppleGothic.ttf'
wc = WordCloud(font_path=font_path, background_color="white")
wc.generate(text)
plt.figure(figsize=(50,50))
plt.axis("off")
plt.imshow(wc)
plt.show()
font_path
: 한글 폰트 설정 경로background_color
: 배경색상
- Generate a word cloud image
mask = np.array(Image.open('./data/sparta.png'))
wc = WordCloud(font_path=font_path, background_color="white", mask=mask)
wc.generate(text)
f = plt.figure(figsize=(50,50))
f.add_subplot(1,2, 1)
plt.imshow(mask, cmap=plt.cm.gray)
plt.title('Original Stencil', size=40)
plt.axis("off")
f.add_subplot(1,2, 2)
plt.imshow(wc, interpolation='bilinear')
plt.title('Sparta Cloud', size=40)
plt.axis("off")
plt.show()
f.savefig('./wordcloud_ex1.png')
format='%Y-%m-%dT%H:%M:%S.%f'
sparta_data['done_date_time'] = pd.to_datetime(sparta_data['done_date'], format=format)
sparta_data.tail(5)
sparta_data['done_date_time_weekday'] = sparta_data['done_date_time'].dt.day_name()
weeks = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekdata = sparta_data.groupby('done_date_time_weekday')['user_id'].count()
weekdata = weekdata.agg(weeks)
weekdata
plt.figure(figsize=(10,5))
plt.bar(weekdata.index, weekdata)
plt.title('요일별 수강 완료 수강생 수')
plt.xlabel('요일')
plt.ylabel('수강생(명)')
plt.xticks(rotation=90)
plt.show()
sparta_data['done_date_time_hour'] = sparta_data['done_date_time'].dt.hour
hourdata = sparta_data.groupby('done_date_time_hour')['user_id'].count()
hourdata = hourdata.sort_index()
hourdata
plt.figure(figsize=(10,5))
plt.plot(hourdata.index, hourdata)
plt.title('시간별 수강 완료 사용자 수')
plt.xlabel('시간')
plt.ylabel('사용자(명)')
plt.xticks(np.arange(24))
plt.show()
pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, margins_name='All')
data: 분석할 데이터
values: 분석할 수
index: 행 인덱스
columns: 열 인덱스
aggfunc: 분석 방법
sparta_data_pivot_table = pd.pivot_table(sparta_data, values='user_id',
index=['done_date_time_weekday'],
columns=['done_date_time_hour'],
aggfunc="count").agg(weeks)
sparta_data_pivot_table
plt.figure(figsize=(14,5))
plt.pcolor(sparta_data_pivot_table)
plt.xticks(np.arange(0.5, len(sparta_data_pivot_table.columns), 1), sparta_data_pivot_table.columns)
plt.yticks(np.arange(0.5, len(sparta_data_pivot_table.index), 1), sparta_data_pivot_table.index)
plt.title('요일별 종료 시간 히트맵')
plt.xlabel('시간')
plt.ylabel('요일')
plt.colorbar()
plt.show()