# Import pandas as a alias 'pd'
import pandas as pd
import numpy as np
# Load the CSV file "marathon_results_2017.csv" under "data" folder
"""
Step 1. 원시 Data Read Files (원시 Data Read 3개)
"""
marathon_2015 = pd.read_csv("../data/marathon_results_2015.csv")
marathon_2016 = pd.read_csv("../data/marathon_results_2016.csv")
marathon_2017 = pd.read_csv("../data/marathon_results_2017.csv")
"""
Step 2. Year column 추가
각각의 년도를 2015 , 2016,2017
"""
marathon_2015['Year'] = '2015'
marathon_2016['Year'] = '2016'
marathon_2017['Year'] = '2017'
"""
Step 3. Merge Files (원시 Data 3개 Merge)
"""
marathon_2015_2017 = pd.concat([marathon_2015,marathon_2016,marathon_2017],
ignore_index=True, sort=False)
"""
Step 4. Drop columns(안 쓰는 Data Drop)
"""
marathon_2015_2017 = marathon_2015_2017.drop(['Unnamed: 0','Bib', 'Citizen', 'Unnamed: 9', 'Proj Time', 'Unnamed: 8'],
axis='columns')
"""
Step 5. Convert columns value(시간을 초 단위로 변환)
1) 초기화(numpy)
marathon_2015_2017['5K'] == '-')] = '0:0:0' --> 5K,10K,15K,20K,Half , 25K,30K , 35K , 40K,Pace ,Official Time
"""
marathon_2015_2017['5K'] = np.where(marathon_2015_2017['5K'] == '-', '0:0:0', marathon_2015_2017['5K'])
marathon_2015_2017['10K'] = np.where(marathon_2015_2017['10K'] == '-', '0:0:0', marathon_2015_2017['10K'])
marathon_2015_2017['15K'] = np.where(marathon_2015_2017['15K'] == '-', '0:0:0', marathon_2015_2017['15K'])
marathon_2015_2017['20K'] = np.where(marathon_2015_2017['20K'] == '-', '0:0:0', marathon_2015_2017['20K'])
marathon_2015_2017['Half'] = np.where(marathon_2015_2017['Half'] == '-', '0:0:0', marathon_2015_2017['Half'])
marathon_2015_2017['25K'] = np.where(marathon_2015_2017['25K'] == '-', '0:0:0', marathon_2015_2017['25K'])
marathon_2015_2017['30K'] = np.where(marathon_2015_2017['30K'] == '-', '0:0:0', marathon_2015_2017['30K'])
marathon_2015_2017['35K'] = np.where(marathon_2015_2017['35K'] == '-', '0:0:0', marathon_2015_2017['35K'])
marathon_2015_2017['40K'] = np.where(marathon_2015_2017['40K'] == '-', '0:0:0', marathon_2015_2017['40K'])
marathon_2015_2017['Pace'] = np.where(marathon_2015_2017['Pace'] == '-', '0:0:0', marathon_2015_2017['Pace'])
marathon_2015_2017['Official Time'] = np.where(marathon_2015_2017['Official Time'] == '-', '0:0:0',
marathon_2015_2017['Official Time'])
"""
Step 5. Convert columns value(시간을 초 단위로 변환)
2) Timedelta 유형으로 변환(초변환)
# 타임 델타는 시간 단위의 절대적인 차이이며 차이 단위 (예 : 일,시, 분, 초)로 표시.
# 이 메소드는 인식 된 timedelta 형식 / 값의 인수를 Timedelta 유형으로 변환
"""
marathon_2015_2017['5K'] = pd.to_timedelta(marathon_2015_2017['5K'])
marathon_2015_2017['10K'] = pd.to_timedelta(marathon_2015_2017['10K'])
marathon_2015_2017['15K'] = pd.to_timedelta(marathon_2015_2017['15K'])
marathon_2015_2017['20K'] = pd.to_timedelta(marathon_2015_2017['20K'])
marathon_2015_2017['Half'] = pd.to_timedelta(marathon_2015_2017['Half'])
marathon_2015_2017['25K'] = pd.to_timedelta(marathon_2015_2017['25K'])
marathon_2015_2017['30K'] = pd.to_timedelta(marathon_2015_2017['30K'])
marathon_2015_2017['35K'] = pd.to_timedelta(marathon_2015_2017['35K'])
marathon_2015_2017['40K'] = pd.to_timedelta(marathon_2015_2017['40K'])
marathon_2015_2017['Pace'] = pd.to_timedelta(marathon_2015_2017['Pace'])
marathon_2015_2017['Official Time'] = pd.to_timedelta(marathon_2015_2017['Official Time'])
# Convert time to seconds value using astype method
# int64 :부호 있는 64비트 정수형
marathon_2015_2017['5K'] = marathon_2015_2017['5K'].astype('m8[s]').astype(np.int64)
marathon_2015_2017['10K'] = marathon_2015_2017['10K'].astype('m8[s]').astype(np.int64)
marathon_2015_2017['15K'] = marathon_2015_2017['15K'].astype('m8[s]').astype(np.int64)
marathon_2015_2017['20K'] = marathon_2015_2017['20K'].astype('m8[s]').astype(np.int64)
marathon_2015_2017['Half'] = marathon_2015_2017['Half'].astype('m8[s]').astype(np.int64)
marathon_2015_2017['25K'] = marathon_2015_2017['25K'].astype('m8[s]').astype(np.int64)
marathon_2015_2017['30K'] = marathon_2015_2017['30K'].astype('m8[s]').astype(np.int64)
marathon_2015_2017['35K'] = marathon_2015_2017['35K'].astype('m8[s]').astype(np.int64)
marathon_2015_2017['40K'] = marathon_2015_2017['40K'].astype('m8[s]').astype(np.int64)
marathon_2015_2017['Pace'] = marathon_2015_2017['Pace'].astype('m8[s]').astype(np.int64)
marathon_2015_2017['Official Time'] = marathon_2015_2017['40K'].astype('m8[s]').astype(np.int64)
"""
Step 6. Save file(marathon_2015_2017v3.csv로 저장)
"""
marathon_2015_2017.to_csv("../data/marathon_2015_2017v3.csv",index = None, header=True)