https://towardsdatascience.com/5-amazing-pandas-features-you-probably-dont-know-about-5533498aac88
https://pandas.pydata.org/pandas-docs/stable/user_guide/style.html#Hiding-Data
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.io.formats.style.Styler.html
https://pbpython.com/pandas_dtypes.html
def convert_currency(val):
"""
Convert the string number value to a float
- Remove $
- Remove commas
- Convert to float type
"""
new_val = val.replace(',','').replace('$', '')
return float(new_val)
def convert_percent(val):
"""
Convert the percentage string to an actual floating point percent
- Remove %
- Divide by 100 to make decimal
"""
new_val = val.replace('%', '')
return float(new_val) / 100
df_2 = pd.read_csv("sales_data_types.csv",
dtype={'Customer Number': 'int'},
converters={'2016': convert_currency,
'2017': convert_currency,
'Percent Growth': convert_percent,
'Jan Units': lambda x: pd.to_numeric(x, errors='coerce'),
'Active': lambda x: np.where(x == "Y", True, False)
})
df_2.dtypes
df = df.append({'name': 'name', 'value': 'value}, ignore_index=True)
df.dtypes
# 오류시 NaN
pd.to_numeric(df['Jan Units'], errors='coerce')
# 오류시 0
pd.to_numeric(df['Jan Units'], errors='coerce').fillna(0)
df["Start_Date"] = pd.to_datetime(df[['Month', 'Day', 'Year']])
df["Jan Units"] = pd.to_numeric(df['Jan Units'], errors='coerce').fillna(0)
thirty_days_ago = datetime.date.today() - pd.Timedelta(days=30)
thirty_days_ago = thirty_days_ago.strftime('%Y-%m-%d')
result_df = pd.merge(df1, df2, how='left', on='key')
result_df = pd.merge(df1, df2, how='left', left_on='name1', right_on='name2')
df.drop(columns=['datetime'], inplace=True)
df.reset_index(inplace=True, drop=True)
df['new_col'] = df.apply(lambda x: json.loads(x['loc'])['c'][1], axis=1)
# 함수 적용
def convert_currency(val):
"""
Convert the string number value to a float
- Remove $
- Remove commas
- Convert to float type
"""
new_val = val.replace(',','').replace('$', '')
return float(new_val)
df['2016'].apply(convert_currency)
# df['2016'].apply(lambda x: x.replace('$', '').replace(',', '')).astype('float')
df["Active"] = np.where(df["Active"] == "Y", True, False)
# 열 이름 순서 axis=1
# 내림차순 ascending=False
df.sort_values(by=['datetime'], inplace=True, ascending=True)
df = df.rename(columns={'name' : 'new_name'})
df['name'] = df['name'].str.replace('a', 'b')
def pg_connect():
url = 'postgresql://postgres:pw@localhost:5432/postgres'
# sql server local 접속시
# url = 'mssql+pyodbc://localhost/db?driver=ODBC+Driver+17+for+SQL+Server?trusted_connection=yes'
engine = create_engine(url, client_encoding='utf8', use_batch_mode=True)
return engine
pg_engine = pg_connect()
df.to_sql('target_table', con=pg_engine,
if_exists='append',
chunksize=1000,
index=False)
df.to_sql('target_table', con=pg_engine,
if_exists='append',
chunksize=1000,
index=False,
method='multi')
df.rename(columns=df.iloc[0])
df.drop(df.index[0])