CSV
CSVs are very common way of representing tabula data, so data that fits into tables like a spreadsheet.
And CSV, stands for Comma Separated Values.
There's actually a inbuilt library that helps us with CSVs because Python is a language that's used really heavily for data processing, data analysis. There's a lot of great tools for working with tabula data.
import csv
with open("weather_data.csv") as data_file:
data = csv.reader(data_file)
temperatures = []
for row in data:
if row[1] != "temp":
temperatures.append(int(row[1]))
print(temperatures)
#output
[12, 14, 15, 14, 21, 22, 24]
Pandas
Pandas is super helpful and super powerful to perform data analysis on tabula data.
documentation
https://pandas.pydata.org/docs/getting_started/index.html#getting-started
import pandas
data = pandas.read_csv("weather_data.csv")
print(data)
#output
day temp condition
0 Monday 12 Sunny
1 Tuesday 14 Rain
2 Wednesday 15 Rain
3 Thursday 14 Cloudy
4 Friday 21 Sunny
5 Saturday 22 Sunny
6 Sunday 24 Sunny
print(data["temp"])
#output
0 12
1 14
2 15
3 14
4 21
5 22
6 24
Name: temp, dtype: int64
Data Frame
import pandas
data = pandas.read_csv("weather_data.csv")
print(type(data))
#output
#<class 'pandas.core.frame.DataFrame'>
A dara frame is kind of the equivalent of whole table.
So every single sheet inside an Excel file or inside a Google sheet file would be considered a data frame in Pandas.
https://pandas.pydata.org/docs/reference/frame.html#serialization-io-conversion
Series
print(type(data["temp"]))
#output
#<class 'pandas.core.series.Series'>
THe series is the other super important concept in pandas.
And the Series is basically equivalent to a list. It's kind of like a single column in the table.
https://pandas.pydata.org/docs/reference/series.html#computations-descriptive-stats
Whole table is basically a data frame in pandas and every single column is a series kind of like a list in pandas.
import pandas
data = pandas.read_csv("weather_data.csv")
temp_list = data["temp"].to_list()
average = sum(temp_list) / len(temp_list)
print(average)
#output
17.428571428571427
series.mean()
print(data["temp"].mean())
#output
17.428571428571427
series.max()
print(data["temp"].max())
#output
24
print(data["condition"])
print(data.condition)
위 두줄의 값은 똑같음
#output
0 Sunny
1 Rain
2 Rain
3 Cloudy
4 Sunny
5 Sunny
6 Sunny
Name: condition, dtype: object
# Get data in Row
print(data[data.day == "Monday"])
#output
day temp condition
0 Monday 12 Sunny
print(data[data.temp == data.temp.max()])
#output
day temp condition
6 Sunday 24 Sunny
monday = data[data.day == "Monday"]
monday_temp = int(monday.temp)
monday_temp_F = monday_temp * 9/5 + 32
print(monday_temp_F)
#output
53.6
# Create a dataframe from scratch
data_dict = {
"students": ["Amy", "James", "Angela"],
"scores": [76, 56, 65]
}
data = pandas.DataFrame(data_dict)
print(data)
#output
students scores
0 Amy 76
1 James 56
2 Angela 65
data.to_csv("new_data.csv")
new_data.csv
,students,scores
0,Amy,76
1,James,56
2,Angela,65
Squirrel Count
import pandas
data = pandas.read_csv("2018_Central_Park_Squirrel_Census_-_Squirrel_Data.csv")
gray_squirrel_count = len(data[data["Primary Fur Color"] == "Gray"])
cinnamon_squirrel_count = len(data[data["Primary Fur Color"] == "Cinnamon"])
black_squirrel_count = len(data[data["Primary Fur Color"] == "Black"])
data_dict = {
"Colours": ["Gray", "Cinnamon", "Black"],
"Count": [gray_squirrel_count, cinnamon_squirrel_count, black_squirrel_count]
}
df = pandas.DataFrame(data_dict)
df.to_csv("squirrel_count.csv")
squirrel_count.csv
,Colours,Count
0,Gray,2473
1,Cinnamon,392
2,Black,103
turtle.onscreenclick()
def get_mouse_click_coor(x, y):
print(x, y)
turtle.onscreenclick(get_mouse_click_coor)
turtle.mainloop() # Which is keep our screen open
Final
import turtle
import pandas
screen = turtle.Screen()
screen.title("U.S. States Game")
image = "blank_states_img.gif"
screen.addshape(image)
turtle.shape(image)
data = pandas.read_csv("50_states.csv")
all_states = data.state.to_list()
guessed_states = []
while len(guessed_states) < 50:
answer_state = screen.textinput(title=f"{len(guessed_states)}/50 States Correct",
prompt="What's another state's name?").title()
if answer_state == "Exit":
missing_states = []
for state in all_states:
if state not in guessed_states:
missing_states.append(state)
new_data = pandas.DataFrame(missing_states)
new_data.to_csv("states_to_learn.csv")
break
# If answer_state is one of the states in all the states of the 50_states.csv
# If they got it right:
# Create a turtle to write the name of the state at the state's x and y coordate
if answer_state in all_states:
guessed_states.append(answer_state)
t = turtle.Turtle()
t.hideturtle()
t.penup()
state_data = data[data.state == answer_state]
t.goto(int(state_data.x), int(state_data.y))
t.write(answer_state) # t.write(state_data.state.item())