Data Import
- Excel data
index_col = 0
으로 설정해야 데이터 불러올 때, unnamed column이 생성되지 않는다.
df = pd.read_excel('data/card_notportal_1.xlsx', index_col=0)
path = '파일이 위치한경로/'
file_list = os.listdir(path)
file_list_py = [file for file in file_list if file.startswith('파일명')]
df = pd.DataFrame()
for i in file_list_py:
data = pd.read_csv(path + i)
df = pd.concat([df,data])
df = df.reset_index(drop = True)
- csv 파일 불러올 때 tokenizing error
file_path='C:/Users/Desktop/~~~/card_att_dist_1.csv'
f=open(file_path, encoding='utf-8')
reader=csv.reader(f)
csv_list=[]
for i in reader:
csv_list.append(i)
f.close()
log_df=pd.DataFrame(csv_list)
log_df
import requests
res = requests.get('https://github.com/e9t/nsmc/raw/master/ratings_train.txt')
with open('nsmc_train.csv', 'wb') as f:
f.write(res.content)
import pymssql
conn = pymssql.conncet(server='', user='아이디', password='패스워드', database='데이터베이스')
cursor = conn.cursor()
query = "SELECT * FROM DATABASE"
data = pd.read_sql(sql=query, con=conn)
conn.close()
df = pd.DataFrame(data)
import xlwings as xw
book = xw.Book('file.xlsx')
df = book.sheets(1).used_range.options(pd.DataFrame).value
my_zip = zipfile.ZipFile(path, 'r')
zip_list = my_zip.namelist()
data = my_zip.read(zip_list[0])
data = data.decode('utf-8)
path = ~~ +'.dat'
file = open(path, 'rb')
data = file.read()
data = data.decode('utf-8')
pd.read_tabel(path, sep=',', names=['DATE', tag_name])
datasets.make_classifications()
datasets.make_blobs()
Data 저장
import os
os.mkdir('log')
if not os.path.isdir('log'):
os.mkdir('log')
movie.to_csv('movie2.csv', index=False, encoding='utf-8-sig')
movie.to_excel('movie2.xlsx', index=False)
- 여러 dataframe을 여러 시트로 하나의 엑셀 저장
writer = pd.ExcelWriter('자격증.xlsx', engine='openpyxl')
certi.to_excel(writer, sheet_name='certification')
certi_tags.to_excel(writer, sheet_name='certification_tags')
writer.save()
f = open("data/OCR/v2/google_card_ids_3.txt", 'w')
for i in range(len(df1)):
data = df1.iloc[i,0]+'\n'
f.write(data)
f.close()
x=[1,2,3,4,5]
import pickle
with open('test.pkl', 'wb') as f:
pickle.dump(x,f)
with open('test.pkl', 'rb') as f:
p=pickle.load(f)
- joblib
pickle은 하나의 파일만 저장. joblib 여러개 저장가능.
import joblib
with open('data/joblib.pkl', 'wb') as f:
joblib.dump({'주식가격2016':s2016, '주식가격2017': s2017}, f)
with open('data/joblib.pkl', 'rb') as f:
data=joblib.load(f)
locals().update(data)