import pandas as pd
enroll = pd.read_csv('./data/enrolleds_detail.csv')
enroll_detail = enroll.groupby('lecture_id')['user_id'].count()
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'AppleGothic'
plt.figure(figsize=(22,5))
plt.bar(enroll_detail.index, enroll_detail)
plt.title('강의에 따른 수강완료 수의 합계')
plt.xticks(rotation=90)
plt.show()
lectures = pd.read_csv('./data/lectures.csv')
lecture_count = pd.DataFrame(enroll_detail).reset_index()
lecture_count = lecture_count.rename(columns={'user_id':'count'})
lectures = lectures.set_index('lecture_id')
full_lecture = lecture_count.join(lectures, on='lecture_id')
plt.figure(figsize=(22,5))
plt.bar(full_lecture['title'], full_lecture['count'])
plt.title('강의에 따른 수강완료 수의 합계')
plt.xlabel('강의명')
plt.xticks(rotation=90)
plt.show()
result = ""
for number in range(1,15):
index = '{:02}'.format(number)
filename = "Sequence_" + index + ".txt"
print(filename)
text = open('./data/'+filename, 'r', encoding='utf-8-sig')
result += text.read().replace("\n", " ")
result
text = open('./data/Sequence_01.txt')
text = text.read()
text = text.replace('\n', " ")
text
text2 = open('./data/Sequence_02.txt')
text2 = text2.read()
text2 = text2.replace('\n', " ")
text2
import re
pattern = '[^\w\s]'
text = re.sub (pattern=pattern, repl='', string=result)
text
import numpy as np
from PIL import Image
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
for f in fm.fontManager.ttflist:
if 'Gothic' in f.name:
print(f.fname)
font_path = '/Library/Fonts/AppleGothic.ttf'
wc = WordCloud(font_path=font_path, background_color='white')
wc.generate(text)
plt.figure(figsize=(50,50))
plt.axis("off")
plt.imshow(wc)
plt.show()
// 예시 Generate a word cloud image
mask = np.array(Image.open('./data/sparta.png'))
wc = WordCloud(font_path=font_path, background_color="white", mask=mask)
wc.generate(text)
f = plt.figure(figsize=(50,50))
f.add_subplot(1,2, 1)
plt.imshow(mask, cmap=plt.cm.gray)
plt.title('Original Stencil', size=40)
plt.axis("off")
f.add_subplot(1,2, 2)
plt.imshow(wc, interpolation='bilinear')
plt.title('Sparta Cloud', size=40)
plt.axis("off")
plt.show()
mask = np.array(Image.open('./data/sparta.png'))
wc = WordCloud(font_path=font_path, background_color='white', mask=mask)
wc.generate(text)
f = plt.figure(figsize=(50,50))
plt.imshow(wc, interpolation='bilinear')
plt.title('나만의 워드클라우드', size = 40)
plt.axis("off")
plt.show()
f.savefig('./data/myWordCloud.png')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.family']='AppleGothic'
sparta_data = pd.read_csv('./data/enrolleds_detail.csv')
format = '%Y-%m-%dT%H:%M:%S.%f'
sparta_data['done_date_time'] = pd.to_datetime(sparta_data['done_date'], format=format)
sparta_data['done_date_time_weekday'] = sparta_data['done_date_time'].dt.day_name()
weekdata = sparta_data.groupby('done_date_time_weekday')['user_id'].count()
weeks = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekdata = weekdata.agg(weeks)
plt.figure(figsize=(8,5))
plt.bar(weekdata.index, weekdata)
plt.title('요일별 수강완료 수강생 수')
plt.xlabel('요일')
plt.ylabel('수강생(명)')
plt.xticks(rotation=45)
plt.show()
sparta_data['done_date_time_hour'] = sparta_data['done_date_time'].dt.hour
hourdata = sparta_data.groupby('done_date_time_hour')['user_id'].count()
hourdata = hourdata.sort_index()
plt.figure(figsize=(10,5))
plt.plot(hourdata.index, hourdata)
plt.title('시간별 수강 완료 수강생 수')
plt.xlabel('시간')
plt.ylabel('사용자(명)')
plt.xticks(np.arange(24))
plt.show()
sparta_data_pivot_table = pd.pivot_table(sparta_data, values='user_id', aggfunc='count',
index=['done_date_time_weekday'],
columns=['done_date_time_hour']).agg(weeks)
plt.figure(figsize=(14,5))
plt.pcolor(sparta_data_pivot_table)
plt.xticks(np.arange(0.5, len(sparta_data_pivot_table.columns), 1), sparta_data_pivot_table.columns)
plt.yticks(np.arange(0.5, len(sparta_data_pivot_table.index), 1), sparta_data_pivot_table.index)
plt.title('요일별 종료 시간 히트맵')
plt.xlabel('시간')
plt.ylabel('요일')
plt.colorbar()
plt.show()
plt.xticks(np.arange(24)) 는
plt.xticks([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24])
의 줄임표현
lyrics = open('./data/mysong.txt')
lyrics = lyrics.read()
lyrics = lyrics.replace('\n', " ")
import re
pattern = '[^\w\s]'
lyrics = re.sub(pattern=pattern, repl='', string=lyrics)
import numpy as np
from PIL import Image
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
for f in fm.fontManager.ttflist:
if 'Gothic' in f.name:
print(f.fname)
font_path = '/Library/Fonts/AppleGothic.ttf'
word_cloud = WordCloud(font_path=font_path, background_color = 'white')
word_cloud.generate(lyrics)
plt.figure(figsize=(50,50))
plt.axis("off")
plt.imshow(word_cloud)
mask = np.array(Image.open('./data/myimage.png'))
word_cloud = WordCloud(font_path=font_path, background_color='white', mask=mask)
word_cloud.generate(lyrics)
f = plt.figure(figsize=(50,50))
plt.imshow(word_cloud, interpolation='bilinear')
plt.title('최애곡 가사 워드 클라우드', size=40)
plt.axis("off")
plt.show()
f.savefig('./data/homework.png')
##############################
sparta_data_Monday = sparta_data[ sparta_data['done_date_time_weekday'] == 'Monday' ]
sparta_data_Monday = sparta_data_Monday.groupby('done_date_time_hour')['user_id'].count()
sparta_data_Tuesday = sparta_data[ sparta_data['done_date_time_weekday'] == 'Tuesday' ]
sparta_data_Tuesday = sparta_data_Tuesday.groupby('done_date_time_hour')['user_id'].count()
plt.figure(figsize=(10,5))
plt.plot(sparta_data_Monday.index, sparta_data_Monday)
plt.plot(sparta_data_Tuesday.index, sparta_data_Tuesday)
plt.title('시간별 수강 완료자 수')
plt.xticks(np.arange(24))
plt.show()