https://github.com/justmarkham/DAT8/blob/master/data/chipotle.tsv
import pandas as pd
data_url = 'https://raw.githubusercontent.com/Datamanim/pandas/main/chipo.csv'
df = pd.read_csv(data_url)
# ์๋น๋ฐ์ดํฐ :
print('๐ quantity ๊ฐ์ด 3์ธ ๋ฐ์ดํฐ ์์ 5๊ฐ ์ถ๋ ฅ:')
print(df.loc[df['quantity']==3].head())
df['quantity']==3
: quantity
๋ผ๋ ์ด์ ๊ฐ์ด 3๊ณผ ๊ฐ์ ๋น๊ตํ์ฌ True/False
์ค์ ๋ฐํ.df.loc[์กฐ๊ฑด]
: .loc[]
๋ ์กฐ๊ฑด์ ๋ง๋ ํ๋ค์ ์ ํ, quantity == 3
์ธ ํ๋ค๋ง ์ ํ.head()
: ์์ 5๊ฐ ํ๋ง ๋ณด์ฌ์ฃผ๋ ํจ์..head(3)
์ด๋ฉด ์์ 3๊ฐ, .head(10)
์ด๋ฉด 10๊ฐ.๐ quantity ๊ฐ์ด 3์ธ ๋ฐ์ดํฐ ์์ 5๊ฐ ์ถ๋ ฅ:
order_id quantity item_name choice_description item_price
409 178 3 Chicken Bowl [[Fresh Tomato Salsa (Mild), Tomatillo-Green C... $32.94
445 193 3 Bowl [Braised Carnitas, Pinto Beans, [Sour Cream, C... $22.20
689 284 3 Canned Soft Drink [Diet Coke] $3.75
818 338 3 Bottled Water NaN $3.27
850 350 3 Canned Soft Drink [Sprite] $3.75
print('๐ quantity ๊ฐ์ด 3์ธ ๋ฐ์ดํฐ ์์ 5๊ฐ๋ฅผ ์ถ๋ ฅํ๋๋ฐ ์ธ๋ฑ์ค๋ฅผ 0๋ถํฐ ์์ํ๊ฒ ์ฌ์ ์:')
print(df.loc[df['quantity']==3].head().reset_index(drop=True))
.reset_index()
: .reset_index(drop=True)
:๐ quantity ๊ฐ์ด 3์ธ ๋ฐ์ดํฐ ์์ 5๊ฐ๋ฅผ ์ถ๋ ฅํ๋๋ฐ ์ธ๋ฑ์ค๋ฅผ 0๋ถํฐ ์์ํ๊ฒ ์ฌ์ ์:
order_id quantity item_name choice_description item_price
0 178 3 Chicken Bowl [[Fresh Tomato Salsa (Mild), Tomatillo-Green C... $32.94
1 193 3 Bowl [Braised Carnitas, Pinto Beans, [Sour Cream, C... $22.20
2 284 3 Canned Soft Drink [Diet Coke] $3.75
3 338 3 Bottled Water NaN $3.27
4 350 3 Canned Soft Drink [Sprite] $3.75
print('๐ quantity, item_price์ ์ปฌ๋ผ์ผ๋ก ๊ตฌ์ฑ๋ ์๋ก์ด ๋ฐ์ดํฐ ํ๋ ์์ ์ ์:')
print(df[['quantity', 'item_price']])
๐ quantity, item_price์ ์ปฌ๋ผ์ผ๋ก ๊ตฌ์ฑ๋ ์๋ก์ด ๋ฐ์ดํฐ ํ๋ ์์ ์ ์:
quantity item_price
0 1 $2.39
1 1 $3.39
2 1 $3.39
3 1 $2.39
4 2 $16.98
... ... ...
4617 1 $11.75
4618 1 $11.75
4619 1 $11.25
4620 1 $8.75
4621 1 $8.75
[4622 rows x 2 columns]
โ ์
df['quantity', 'item_price']
์ด๋ ๊ฒ ์ฐ๋ฉด ์๋๋์?
์ด๊ฑด Pandas ์ ์ฅ์์ ํํ ํ๋๋ฅผ ์ธ๋ฑ์ฑํ ๊ฒ์ฒ๋ผ ํด์๋๋ค. ๋ ๋ผ์df[['quantity', 'item_price']]
์ฒ๋ผ ๋๊ดํธ 2๊ฐ๋ฅผ ์์ฑํด์ผ๋ง ์ปฌ๋ผ ๋ ๊ฐ๋ฅผ ์ ํํ๋ผ๋ ์๋ฏธ๋ก ํด์๋๋ค.
ํํ ์๋ฏธ ๋์ ์ฌ๋ถ df['quantity']
ํ๋์ ์ปฌ๋ผ (์๋ฆฌ์ฆ ๋ฐํ) โ df[['quantity']]
ํ๋์ ์ปฌ๋ผ (๋ฐ์ดํฐํ๋ ์ ๋ฐํ) โ df[['quantity', 'item_price']]
๋ ๊ฐ ์ด์์ ์ปฌ๋ผ ์ ํ โ df['quantity', 'item_price']
ํํ ํค โ ํด๋น ์ด๋ฆ ๊ฐ์ง ์ด์ ์ฐพ์ (์ค๋ฅ) โ ์ค๋ฅ
print('๐ต item_price์ $๋ฅผ ์ ๊ฑฐํ๊ณ float ํ์
์ผ๋ก ๋ณ๊ฒฝํ์ฌ item_price_float์ ์ ์ฅ:')
df['item_price_float'] = df['item_price'].str.replace('$', '', regex=False).astype(float)
print(df['item_price_float'].head())
.str.replace('$', '', regex=False)
โ $
๊ธฐํธ๋ฅผ ์์ฐ .str.replace()
: Pandas๋ ๊ธฐ๋ณธ์ ์ผ๋ก pat์ ์ ๊ทํํ์(Regex)์ผ๋ก ์ธ์ํ์ฌ, regex=True
๊ฐ ๊ธฐ๋ณธ๊ฐ$
๋ "๋ฌธ์ฅ์ ๋"์ ์๋ฏธํ๋ ๋ฉํ๋ฌธ์๋ก regex=False
๋ฅผ ์์ฑํด ๋ฌธ์์ ์๋ฏธ๋ฅผ ์์ ์ค.astype(float)
โ ๋ฌธ์์ด "3.50" โ ์ค์ 3.50์ผ๋ก ๋ณํ๐ต item_price์ $๋ฅผ ์ ๊ฑฐํ๊ณ float ํ์
์ผ๋ก ๋ณ๊ฒฝํ์ฌ item_price_float์ ์ ์ฅ:
0 2.39
1 3.39
2 3.39
3 2.39
4 16.98
Name: item_price_float, dtype: float64
print('๐ item_name์ด "Chicken Salad Bowl"์ธ ๊ฒ๊ณผ item_price_float๊ฐ 9 ์ดํ์ธ ๊ฒ์ ์ฐพ์๋ผ:')
filtered_df = df.loc[(df.item_name == "Chicken Salad Bowl") & (df.item_price_float <= 9)]
print("โ๏ธ ์กฐ๊ฑด์ ๋ง๋ ์์ 5๊ฐ: ")
print(filtered_df.head())
print('โ๏ธ ์ ์ฒด ๊ฐ์:', len(filtered_df))
๐ item_name์ด "Chicken Salad Bowl"์ธ ๊ฒ๊ณผ item_price_float๊ฐ 9 ์ดํ์ธ ๊ฒ์ ์ฐพ์๋ผ:
โ๏ธ ์กฐ๊ฑด์ ๋ง๋ ์์ 5๊ฐ:
order_id quantity item_name choice_description item_price item_price_float
44 20 1 Chicken Salad Bowl [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... $8.75 8.75
256 111 1 Chicken Salad Bowl [Fresh Tomato Salsa, [Fajita Vegetables, Rice,... $8.75 8.75
526 220 1 Chicken Salad Bowl [Roasted Chili Corn Salsa, [Black Beans, Sour ... $8.75 8.75
528 221 1 Chicken Salad Bowl [Tomatillo Green Chili Salsa, [Fajita Vegetabl... $8.75 8.75
529 221 1 Chicken Salad Bowl [Tomatillo Green Chili Salsa, [Fajita Vegetabl... $8.75 8.75
โ๏ธ ์ ์ฒด ๊ฐ์: 56
print('๐ต item_price_float๋ฅผ ์ค๋ฆ์ฐจ์์ผ๋ก ์ ๋ฆฌํ๊ณ index๋ฅผ ์ด๊ธฐํ:')
print(df.sort_values('item_price_float').reset_index(drop=True).head())
๐ต item_price_float๋ฅผ ์ค๋ฆ์ฐจ์์ผ๋ก ์ ๋ฆฌํ๊ณ index๋ฅผ ์ด๊ธฐํ:
order_id quantity item_name choice_description item_price item_price_float
0 471 1 Bottled Water NaN $1.09 1.09
1 338 1 Canned Soda [Coca Cola] $1.09 1.09
2 1575 1 Canned Soda [Dr. Pepper] $1.09 1.09
3 47 1 Canned Soda [Dr. Pepper] $1.09 1.09
4 1014 1 Canned Soda [Coca Cola] $1.09 1.09
print('๐ต item_price_float๋ฅผ ๋ด๋ฆผ์ฐจ์์ผ๋ก ์ ๋ฆฌํ๊ณ index๋ฅผ ์ด๊ธฐํ:')
print(df.sort_values('item_price_float', ascending=False).reset_index(drop=True).head())
๐ต item_price_float๋ฅผ ๋ด๋ฆผ์ฐจ์์ผ๋ก ์ ๋ฆฌํ๊ณ index๋ฅผ ์ด๊ธฐํ:
order_id quantity item_name choice_description item_price item_price_float
0 1443 15 Chips and Fresh Tomato Salsa NaN $44.25 44.25
1 1398 3 Carnitas Bowl [Roasted Chili Corn Salsa, [Fajita Vegetables,... $35.25 35.25
2 511 4 Chicken Burrito [Fresh Tomato Salsa, [Fajita Vegetables, Rice,... $35.00 35.00
3 1443 4 Chicken Burrito [Fresh Tomato Salsa, [Rice, Black Beans, Chees... $35.00 35.00
4 1443 3 Veggie Burrito [Fresh Tomato Salsa, [Fajita Vegetables, Rice,... $33.75 33.75
print('๐ช item_name์ "Chips"๊ฐ ํฌํจ๋๋ ์์ 5๊ฐ ๋ฐ์ดํฐ ์ถ์ถ:')
print(df.loc[df.item_name.str.contains('Chips')].head())
.str.contains('Chips')
โ ๋ฌธ์์ด "Chips"๊ฐ ํฌํจ๋์ด ์๋์ง ์ฌ๋ถ๋ฅผ True/False
๋ก ๋ฐํstr.contains('chips', case=False)
: ๋์๋ฌธ์ ๋ฌด์str.contains('Chips', regex=False)
: ์ ๊ทํํ์ ์ฌ์ฉ ์ํจ๐ช item_name์ "Chips"๊ฐ ํฌํจ๋๋ ์์ 5๊ฐ ๋ฐ์ดํฐ ์ถ์ถ:
order_id quantity item_name choice_description item_price item_price_float
0 1 1 Chips and Fresh Tomato Salsa NaN $2.39 2.39
3 1 1 Chips and Tomatillo-Green Chili Salsa NaN $2.39 2.39
6 3 1 Side of Chips NaN $1.69 1.69
10 5 1 Chips and Guacamole NaN $4.45 4.45
14 7 1 Chips and Guacamole NaN $4.45 4.45
print('๐ฅ item_name์ "Steak Salad" ๋๋ "Bowl"๊ฐ ํฌํจ๋๋ ๋ฐ์ดํฐ ์ถ์ถ:')
answer = df.loc[(df.item_name == 'Steak Salad') | (df.item_name == 'Bowl')]
print(answer)
order_id quantity ... item_price item_price_float
445 193 3 ... $22.20 22.20
664 276 1 ... $8.99 8.99
673 279 1 ... $7.40 7.40
752 311 1 ... $8.99 8.99
893 369 1 ... $8.99 8.99
3502 1406 1 ... $8.69 8.69
[6 rows x 6 columns]
print('๐ฅ item_name์ "Steak Salad" ๋๋ "Bowl"๊ฐ ํฌํจ๋๋ ๋ฐ์ดํฐ ์ถ์ถ:')
print(answer.drop_duplicates('item_name'))
order_id quantity ... item_price item_price_float
445 193 3 ... $22.20 22.20
664 276 1 ... $8.99 8.99
print('๐ฅ item_name์ "Steak Salad" ๋๋ "Bowl"๊ฐ ํฌํจ๋๋ ๋ฐ์ดํฐ ์ถ์ถ:')
print(answer.drop_duplicates('item_name', keep='last'))
order_id quantity ... item_price item_price_float
673 279 1 ... $7.40 7.40
3502 1406 1 ... $8.69 8.69
print("๐ ์ง์๋ฒ์งธ ์ปฌ๋ผ๋ง ์ถ๋ ฅ:")
print(df.iloc[:,::2].head())
slicing[start : end : step]
::2
๋ ์ฒ์๋ถํฐ ๋๊น์ง 2์นธ์ฉ ๊ฑด๋๋ฐ๊ธฐ[:,::2]
::
: ํ ์์น, ๋ชจ๋ ํ ์ ํ::2
: ์ด ์์น, ์ด ์ธ๋ฑ์ค๋ฅผ 2์นธ์ฉ ๊ฑด๋๋ฐ๋ฉฐ ์ ํdf.iloc[::2, :]
: ๋ชจ๋ ์ด์์ ํ ์ธ๋ฑ์ค๋ฅผ 2์นธ์ฉ ๊ฑด๋๋ฐ๋ฉฐ ์ ํ๐ ์ง์๋ฒ์งธ ์ปฌ๋ผ๋ง ์ถ๋ ฅ:
order_id item_name item_price
0 1 Chips and Fresh Tomato Salsa $2.39
1 1 Izze $3.39
2 1 Nantucket Nectar $3.39
3 1 Chips and Tomatillo-Green Chili Salsa $2.39
4 2 Chicken Bowl $16.98
์ฐธ๊ณ , https://www.datamanim.com/dataset/99_pandas/pandasMain.html#filtering-sorting