Only includes basic operation methods. Refer to Pandas User Guide for more complex methods and explanation.
import numpy as np
import pandas as pd
df.head(n)
-> shows first n rows (default = 5)
df.tail(n)
-> shows last n rows (default = 5)
df.shape()
-> gives the axis dimensions
df.info()
df.describe()
-> summarizes descriptive stats
df.columns
df.index-> returns array of column/label names
df.to_numpy()
-> converts into np ndarray. This works only when all the columns have same data type
df.copy()
df.empty
-> checks if DF is empty
df["col_name"]
df.loc["label_name"]
-> selecting on multi-axis by label is also possible ex) df.loc["20130102":"20130104", ["A", "B"]]
df.iloc[loc]
-> selects with integer position locations
df[5:10]
df.at[dates[0], "A"]
-> gets fast access to scalar value
df[bool_vec]
-> selects where value of bool_vec is true
df1.loc[lambda df: df['A'] > 0, :]
-> selects rows where value at column 'A' is greater than 0
df1.loc[:, lambda df: ['A', 'B']]
-> selects columns 'A' and 'B'
df.sample(n=n,axis=1)
-> random selection of column (selection of row if axis = 0)
df.where(lambda x: x>4)
df.mask(df >= 0)
-> inverse boolean operation of where selection
df.query('(a < b) & (b < c)')
df["new_col_name"] = values
df.insert(col_loc,"column",values)
df.assign(col_name=df["col1"] + df["col2"])
-> assign() method creates a new column derived from existing columns
del df["col_name"]
col = df.pop("col_name")
df + df2
df - df.iloc[1]
df * 5 + 2
df & df2
-> bit operation
df.T
-> transpose
np.exp(df)
df.add(df2, fill_value=0)
-> fill_value option treats NaN as 0 when doing operation
df.duplicated('col', keep='first'/'last'/False)
-> returns bool vec for rows for duplicates except for the (keep) occurrence
df.drop_duplicates('col', keep='last')
-> drops duplicate rows
df.duplicated('col').sum()
-> checks if there is any duplicate
df.isnull()
-> checks for NaN values returning bool vec
df.isnull().sum()
-> counts NaN values of each column
df.isnull().sum(1)
-> counts NaN values of each row
df.isnotnull()
-> checks real values returning bool vec
df['col'].fillna(value=0)
-> fills NaN values with 0
df.dropna(how="any")
-> drops any rows that have missing data
df.apply(function)
-> applies function to the data ex) df.apply(lambda x: x.max() - x.min())
s.value_counts()
-> counts the number of discrete values when s is Series
import matplotlib.pyplot as plt
plt.figure(num=fig_num, figsize=(x,y))
df.plot(x='A',y='B')
df.plot.bar()
df.plot.barh(stacked=True)
df.plot.hist(stacked=True, bins=20, orientation='horizontal', cumulative=True)
df.plot.box(color=color, vert=False, positions=[1,3,5])
df.boxplot(by='x')
df.plot.area()
df.plot.scatter()
scatter_matrix(df, alpha=0.2, figsize=(6,6), diagonal='kde')
df.plot.hexbin()
df.plot.pie()
df.plot.kde()
-> density plot