%cd
import _ as _
from _ import _ as _
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
pandas.read_csv("파일 경로")
pandas.read_csv("파일 경로", index_col="인덱스로 쓰고 싶은 컬럼명")
import chardet
with open("파일 경로", "rb") as f:
data = f.read()
chardet.detect(data)
pandas.read_csv(
"파일 경로"
, encoding="감지된 인코딩"
, index_col="인덱스로 쓰고 싶은 컬럼명"
)
pandas.read_csv(
"파일 경로"
, encoding="감지된 인코딩"
, index_col="인덱스로 쓰고 싶은 컬럼명"
, low_memory=False
)
pandas.DataFrame.head()
pandas.DataFrame.tail()
pandas.DataFrame.shape()
pandas.DataFrame.info()
pandas.DataFrame.describe()
pandas.DataFrame["컬럼명"].describe()
pandas.DataFrame.corr()
pandas.DataFrame.corr(numeric_only=True)
pandas.DataFrame.corr(numeric_only=True)["특정 컬럼명"].abs().sort_values(ascending=False)
pandas.DataFrame.pivot_table(
values = "출력할 데이터로 사용할 컬럼명"
, aggfunc = "그룹화 결과로 출력할 함수"
, index = ["데이터 요약 시 함께 사용할 컬럼명"]
)
DataFrame.pivot_table(values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All', observed=<no_default>, sort=True)
Parameters:
- values: list-like or scalar, optional
- Column or columns to aggregate.
- index: column, Grouper, array, or list of the previous
- Keys to group by on the pivot table index. If a list is passed, it can contain any of the other types (except list). If an array is passed, it must be the same length as the data and will be used in the same manner as column values.
- columns: column, Grouper, array, or list of the previous
- Keys to group by on the pivot table column. If a list is passed, it can contain any of the other types (except list). If an array is passed, it must be the same length as the data and will be used in the same manner as column values.
- aggfunc: function, list of functions, dict, default “mean”
- If a list of functions is passed, the resulting pivot table will have hierarchical columns whose top level are the function names (inferred from the function objects themselves). If a dict is passed, the key is column to aggregate and the value is function or list of functions. If margin=True, aggfunc will be used to calculate the partial aggregates.
- fill_value: scalar, default None
- Value to replace missing values with (in the resulting pivot table, after aggregation).
df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
"bar", "bar", "bar", "bar"],
"B": ["one", "one", "one", "two", "two",
"one", "one", "two", "two"],
"C": ["small", "large", "large", "small",
"small", "large", "small", "small",
"large"],
"D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
"E": [2, 4, 5, 5, 6, 6, 8, 9, 9]})
table = pd.pivot_table(df, values='D', index=['A', 'B'],
columns=['C'], aggfunc="sum")
table
C large small
A B
bar one 4.0 5.0
two 7.0 6.0
foo one 4.0 1.0
two NaN 6.0
pandas.DataFrame.loc[(앞 인덱스명, 뒤 인덱스명), 컬럼명]
pandas.DataFrame.astype("dtype")
pandas.DataFrame.astype({"column": "dtype"})
pandas.to_numeric(arg)
pandas.to_numeric(arg, errors = "coerce") # 숫자형으로 바꿀 수 없는 값을 결측치(np.nan)로 대체
| 기능 | astype | to_numeric |
|---|---|---|
| 데이터 프레임에 적용 | O | apply 함수로만 가능 |
| 실수의 정수 변환 | O (소수점 이하 내림 처리) | X |
| 메서드 형태로 적용 | O | X |
| 변환할 수 없는 데이터 처리 | 에러 발생 | NaN으로 변환(errors='coerce') |

pandas.unique()
pandas.DataFrame.count()
pandas.DataFrame.size
pandas.Series.value_counts()
pandas.isna(obj)
pandas.DataFrame.isna()
pandas.notna(obj)
pandas.DataFrame.notna()
pandas.DataFrame.fillna([value, method, axis, ...])
pandas.DataFrame.dropna(*[, axis, how, thresh, ...])
https://pandas.pydata.org/docs/reference/general_functions.html