from pandas import Series,DataFrame
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import font_manager,rc
font_name = font_manager.FontProperties(fname='c:/windows/fonts/gulim.ttc').get_name()
rc('font',family=font_name)
x = [1,2,3]
y = [90,80,70]
plt.bar(x=x,height=y,color=['orange','green','blue'])

plt.xticks(x,labels=['SQL','PL/SQL','PYTHON'])
plt.bar(x=x,height=y,color=['orange','green','blue'])

plt.text(1,91,90)
plt.text(2,81,80)
plt.text(3,71,70)
plt.text(2,90,'My Job is an Oracle Engineer!!')
plt.xticks(x,labels=['SQL','PL/SQL','PYTHON'])
plt.bar(x=x,height=y,color=['orange','green','blue'])

font = {'family' : 'sans-serif',
'color' : 'darkred',
'size' : 15}
box = {'boxstyle' : 'round',
'edgecolor' : 'black',
'facecolor' : 'white'}
plt.text(1,91,90,fontdict=font)
plt.text(2,81,80, bbox=box)
plt.text(3,71,70)
plt.text(2,90,'My Job is an Oracle Engineer!!',fontdict=font,bbox=box)
plt.xticks(x,labels=['SQL','PL/SQL','PYTHON'])
plt.bar(x=x,height=y,color=['orange','green','blue'])

emp = pd.read_csv('C:/Temp/employees.csv')
emp.info()

emp['HIRE_DATE'] = pd.to_datetime(emp['HIRE_DATE'])
emp.info()

emp['HIRE_DATE'].dt.weekday

emp['EMPLOYEE_ID'].groupby(emp['HIRE_DATE'].dt.weekday).count()

plt.bar(x=week.index,height=week)

6. x축 label 설정
plt.xticks(week.index,labels=['월','화','수','목','금','토','일'])
plt.bar(x=week.index,height=week)

7. y축 간격 설정
plt.yticks(range(0,21,5))
plt.xticks(week.index,labels=['월','화','수','목','금','토','일'])
plt.bar(x=week.index,height=week)

for i in week.index:
plt.text(i-0.1,week[i]+0.2,week[i])
plt.yticks(range(0,21,5))
plt.xticks(week.index,labels=['월','화','수','목','금','토','일'])
plt.bar(x=week.index,height=week)

quarter = emp['EMPLOYEE_ID'].groupby(emp['HIRE_DATE'].dt.quarter).count()
quarter.index

plt.pie(quarter,autopct='%1.1f%%')
plt.show()

label = [str(i) + '분기' for i in quarter.index]
plt.pie(quarter,autopct='%1.1f%%')
plt.legend(labels=label, loc='lower center',ncol=4)
plt.title('분기별 입사 현황',fontsize=10)

plt.bar(x=quarter.index, height=quarter)
plt.xticks(quarter.index, labels=label)
for i in quarter.index:
plt.text(i-0.1, quarter[i] + 0.2, quarter[i])

plt.barh(y=quarter.index, width=quarter)
plt.yticks(ticks=quarter.index, labels=label)
for i in quarter.index:
plt.text(quarter[i],i, quarter[i])

plt.subplot(1,2,1)
plt.bar(x=quarter.index, height=quarter)
plt.xticks(quarter.index, labels=label)
for i in quarter.index:
plt.text(i-0.1, quarter[i] + 0.2, quarter[i])
plt.subplot(1,2,2)
plt.barh(y=quarter.index, width=quarter)
plt.yticks(ticks=quarter.index, labels=label)
for i in quarter.index:
plt.text(quarter[i],i, quarter[i])

plt.subplot(2,1,1)
p = plt.bar(x=quarter.index, height=quarter)
plt.xticks(quarter.index, labels=label)
plt.bar_label(p)
plt.subplot(2,1,2)
p = plt.barh(y=quarter.index, width=quarter)
plt.yticks(ticks=quarter.index, labels=label)
plt.bar_label(p)

emp.EMPLOYEE_ID.groupby(emp.HIRE_DATE.dt.year).count()

emp.HIRE_DATE.dt.year.value_counts()

years = emp.HIRE_DATE.dt.year.value_counts()
years.index
years.sort_index(inplace=True)
years

plt.bar(x=years.index, height=years)

import numpy as np
cmap = plt.get_cmap('PuRd')
colors = [cmap(i) for i in np.linspace(0.1,1,8,endpoint=True)]
p = plt.bar(x=years.index, height=years, color = colors)
plt.bar_label(p)
plt.show()

plt.bar(x=years.index, height=years, color = colors)
plt.text(2005-0.3,29.5,'최대값')

plt.bar(x=years.index, height=years, color = colors)
plt.annotate(text='max', xy=(2005,29),xytext=(2001,20),arrowprops={'arrowstyle':'wedge'})

plt.bar(x=years.index, height=years, color = colors)
plt.annotate(text='max', xy=(2005,29),xytext=(2001,20),arrowprops={'arrowstyle': 'wedge',
'facecolor': 'red', # 화살표 채우기 색상
'edgecolor': 'blue'})

plt.plot(years.index, years)

plt.plot(years.index,years)
plt.xticks(years.index,[str(i) + '년' for i in years.index])

plt.plot(years.index,years)
plt.xticks(years.index,[str(i) + '년' for i in years.index])
plt.annotate(text='max', xy=(2005,29),xytext=(2001,20),arrowprops={'arrowstyle': 'wedge',
'facecolor': 'red', # 화살표 채우기 색상
'edgecolor': 'blue'})

conn = cx_Oracle.connect("sys", "oracle", "localhost:1521/xe",
mode=cx_Oracle.SYSDBA, encoding='UTF-8')
cursor = conn.cursor() # 'cusor'를 'cursor'로 수정
cursor.execute("""SELECT TRUNC(first_time), COUNT(*)
FROM v$log_history
GROUP BY TRUNC(first_time)
ORDER BY 1""")
data = cursor.fetchall()
df = pd.DataFrame(data)
df.columns = ['day', 'freq']
plt.plot(df['day'], df['freq'])
plt.xticks(rotation=45)
plt.xlabel('날짜')
plt.ylabel('빈도수')
plt.title('로그 히스토리')
plt.show()
cursor.close()
conn.close()
print('---------------------------------------------------------------------------------')
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cx_Oracle
years = emp.HIRE_DATE.dt.year.value_counts()
years.sort_index(inplace=True)
plt.plot(years.index, years)
plt.xticks(years.index, [str(i) + "년" for i in years.index])
plt.annotate(text="max",
xy=(2005, 29),
xytext=(2001, 28),
arrowprops={"arrowstyle": "wedge",
"facecolor": "red",
"edgecolor": "blue"},
color='black')
plt.text(2005, 29, "최대값")
#plt.show()
conn = cx_Oracle.connect("sys", "oracle", "192.168.56.150:1521/ora19c",
mode=cx_Oracle.SYSDBA, encoding='UTF-8')
cursor = conn.cursor()
cursor.execute("""SELECT TRUNC(first_time), COUNT(*)
FROM v$log_history
GROUP BY TRUNC(first_time)
ORDER BY 1""")
data = cursor.fetchall()
df = pd.DataFrame(data)
df.columns = ['day', 'freq']
plt.plot(df['day'], df['freq'])
plt.xticks(rotation=45)
plt.xlabel('날짜')
plt.ylabel('빈도수')
plt.title('로그 히스토리')
plt.show()
cursor.close()
conn.close()
- 
```python
import matplotlib.dates as mdates
# 3일단위
fig, ax = plt.subplots()
ax.plot('day', 'freq', data=df)
ax.xaxis.set_major_locator(mdates.DayLocator(interval=3))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y%m%d'))
plt.gcf().autofmt_xdate()
plt.show()
# 주단위
fig, ax = plt.subplots()
ax.plot('day', 'freq', data=df)
ax.xaxis.set_major_locator(mdates.WeekdayLocator(interval=1))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y%m%d'))
plt.gcf().autofmt_xdate()
plt.show()
# 월단위
fig, ax = plt.subplots()
ax.plot('day', 'freq', data=df)
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y%m%d'))
plt.gcf().autofmt_xdate()
plt.show()
# 년단위 YearLocator
# 시간단위 HourLocator



conda install -c conda-forge wordcloud
from wordcloud import WordCloud
word = {'떡볶이':100,'감자탕':50,'순대국밥':10,'치즈':120,'치킨':70,'김밥':40,'짜장면':300,'야끼만두':150,'볶음밥':200,'핫도그':90,'호두':60,'알감자':50,'짬뽕':200,'탕수육':400}
w = WordCloud(font_path = 'c:Windows/Fonts/gulim.ttc',
background_color = 'white',
width = 900,height=500).generate_from_frequencies(word)
plt.imshow(w)
plt.axis('off')

obama = open('C:/Users/itwill/Downloads/obama.txt',encoding='UTF-8').read()
w = WordCloud(font_path = 'c:Windows/Fonts/gulim.ttc',
background_color = 'white',
width = 900,height=500).generate(obama)
plt.imshow(w)
plt.axis('off')

ages = [21,24,25,26,27,29,31,37,39,40,42,45,50,51,56,59,60,69]
ages = [21, 24, 25, 26, 27, 29, 31, 37, 39, 40, 42, 44, 50, 56, 59, 60, 69]
bins = [20, 30, 40, 50, 60, 70] # 구간의 경계
labels = ['20대', '30대', '40대', '50대', '60대'] # 구간 레이블
age_distribution = pd.cut(ages, bins=bins, labels=labels, right=False)
frequency_table = age_distribution.value_counts().sort_index()
print(frequency_table)

ages = [21, 24, 25, 26, 27, 29, 31, 37, 39, 40, 42, 44, 50, 56, 59, 60, 69]
bins = [20, 30, 40, 50, 60, 70]
plt.hist(ages)
plt.hist(ages,bins=bins)

plt.hist(ages,bins=bins,density=True,histtype='step')

plt.hist(ages,bins=bins,rwidth=0.9)

plt.hist(ages,bins=bins,rwidth=0.9,orientation='horizontal',color='skyblue')

plt.hist(emp['SALARY'])

emp['SALARY'].describe()

np.percentile(emp['SALARY'],[0,25,50,75,100])

plt.boxplot(emp['SALARY'])

plt.boxplot(emp['SALARY'],vert=False)

min = np.percentile(emp['SALARY'],0)
Q1 = np.percentile(emp['SALARY'],25)
Q2 = np.percentile(emp['SALARY'],50)
Q3 = np.percentile(emp['SALARY'],75)
max = np.percentile(emp['SALARY'],100)
iqr = Q3 - Q1
lf = Q1 - 1.5 * iqr
uf = Q3 + 1.5 * iqr
lf ~ uf 범위에 벗어나면 이상치 데이터 이다.
pip install stemgraphic
import stemgraphic
stemgraphic.stem_graphic(emp['SALARY'])
