import psycopg2 conn = psycopg2.connect( 'host=localhost port=5432 dbname=source user=postgres password=postgres' ) conn.set_session(autocommit=True) cur = conn.cursor()
import pandas as pd cur.execute( f''' select * from lot_public_price where substr(pnu,1,2) = '11' ''' ) lpp_dt = pd.DataFrame( cur.fetchall(), columns=[ col.name for col in cur.description ] )
lpp_dt
lpp_dt.info()
숫자형 데이터의 확인을 위해 형변환 필요
lpp_dt.land_seqno = lpp_dt.land_seqno.astype('int') lpp_dt.pnilp = lpp_dt.pnilp.astype('int') lpp_dt.parea = lpp_dt.parea.astype('float')
lpp_dt.info()
import numpy as np lpp_dt.describe(include=np.number).transpose()
lpp_dt.describe(include=np.object).transpose()
import matplotlib.pyplot as plt plt.figure(figsize=(10,7)) plt.hist( lpp_dt.sgg_cd, bins=50, orientation='horizontal' ) plt.xlabel('count') plt.ylabel('sgg_cd') plt.show()
print(lpp_dt.sgg_cd.value_counts().mean()) print(lpp_dt.sgg_cd[(lpp_dt.sgg_cd=='11680') | (lpp_dt.sgg_cd=='11305')].value_counts()) print(lpp_dt.land_loc_cd[((lpp_dt.sgg_cd=='11680') & (lpp_dt.land_loc_cd=='10800')) | ((lpp_dt.sgg_cd=='11305') & (lpp_dt.land_loc_cd=='10300'))].value_counts())
lpp_dt_2sgg = lpp_dt[((lpp_dt.sgg_cd=='11680') & (lpp_dt.land_loc_cd=='10800')) | ((lpp_dt.sgg_cd=='11305') & (lpp_dt.land_loc_cd=='10300'))][(lpp_dt.jimok=='08') | (lpp_dt.spfc1=='14')] print(lpp_dt_2sgg.sgg_cd.value_counts())
lpp_dt_2sgg['is_gangnam'] = lpp_dt_2sgg.sgg_cd == '11680' print(lpp_dt_2sgg.is_gangnam.value_counts())
def to_csv_from_dt_column(pd_col, save_file_name): f = open(save_file_name, 'w') f.write( ','.join(pd_col.to_list()) ) f.close() to_csv_from_dt_column( lpp_dt_2sgg.pnilp.astype('string'), 'lpp_pnilp.csv' ) to_csv_from_dt_column( lpp_dt_2sgg.parea.astype('string'), 'lpp_parea.csv' ) to_csv_from_dt_column( lpp_dt_2sgg.is_gangnam.astype('int').astype('string'), 'lpp_is_gangnam.csv' )