import pandas as pd
import numpy as np
import sklearn
from sklearn.preprocessing import LabelEncoder
pd.get_dummies(df, columns=[...], drop_first=False)
from mlxtend.preprocessing import TransactionEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import PloynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from sklearn.metrics import roc_auc_score
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.svm import SVC
from sklearn.svm import SVR
from sklearn.svm import LinearSVC
from sklearn.svm import LinearSVR
from sklearn.svm import NuSVC
from sklearn.svm import NuSVR
SVC(C=1.0, kernel='rbf')
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import plot_tree
plot_tree(clf, feature_names=clf.feature_names_in_, class_names=['0','1'], filled=True, fontsize=15)
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import AdaBoostClassfier
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RnadomForestRegressor
from xgboost import XGBClassifier
from xgboost import XGBRegressor
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(binary=True)
X_train_cv = cv.fit_transform(X_train['content'])
from sklearn.naive_bayes import BernoulliNB
CountVectorizer(binary=False)
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.cluster import KMeans
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import scipy.stats as stats
import statsmodels
import statsmodels.formula.api as smf
import statsmodels.api as sm
import statsmodels.stats.api as sms
import mlxtend
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder
from scipy.stats import shapiro
statistic, p_value = shapiro(data)
from scipy.stats import wilcoxon
statistic, p_value = wilcoxon(x, y=None)
from scipy.stats import ttest_1samp
statistic, p_value = ttest_1samp(data, popmean=3, alternative='two-sided')
from scipy.stats import levene
statistic, p_value = levene(*samples)
from scipy.stats import ttest_ind
statistic, p_value = ttest_ind(data1, data2, equal_var=True, alternative='less')
statistic, p_value = ttest_ind(data1, data2, equal_var=False, alternative='less')
from scipy.stats import ttest_rel
statistic, p_value = ttest_rel(data1, data2, alternative='greater'
from scipy.stats import kruskal
stats.kruskal(*data)
from scipy.stats import levene
statistic, p_value = levene(*samples)
import pingouin as pg
pg.welch_anova(df, dv=feature_name, between=target_name)
from scipy.stats import f_oneway
f_oneway(*data)
import statsmodels.stats as sm
import statsmodels.formula.api as smf
sm.pairwise_tukeyhsd(data, groups, alpha=0.05)
import statsmodels.formula.api as smf
import statsmodels.stats as sm
model = smf.ols(formula='mpg ~ C(cyl) + C(am) + C(cyl):C(am)', data=df)
sm.anova.anova_lm(model, typ=2)
from scipy.stats import chisquare
statistics, p_value = chisquare(f_obs, f_exp=None)
from scipy.stats import chi2_contingency
chi, p, df, expect = chi2_contingency(pd.crosstab(df['a'], df['b']))
from scipy.stats import pearsonr
from scipy.stats import spearmanr
from scipy.stats import kendalltau
from scipy.stats import spearmanr
from scipy.stats import shapiro
from scipy.stats import normaltest
from scipy.stats import kstest
kstest(data, 'norm')
from scipy.stats import mannwhitneyu
import inspect
def get_object_type(obj):
if inspect.ismodule(obj):
return "module"
elif inspect.isfunction(obj):
return "function"
elif inspect.ismethod(obj):
return "method"
elif inspect.isclass(obj):
return "class"
else:
return "unknown"
def list_all_methods(module):
return list_all(module, inspect.ismethod)
def list_all_functions(module):
return list_all(module, inspect.isfunction)
def list_all_classes(module):
return list_all(module, inspect.isclass)
def list_all(module, predict=lambda x: True):
return [(name, get_object_type(obj))
for name, obj in inspect.getmembers(module)
if not name.startswith('_') and predict(obj)]
def hypothesis_test(p_value, alpha=0.05):
if (p_value <= alpha):
print(f"p_value = {p_value} <= {alpha}, null hypothesis is rejected.")
else:
print(f"p_value = {p_value} > {alpha}, null hypothesis is accepted.")
def predict(y_test, y_pred, score_func, **kwargs):
print(f"{score_func.__name__} = {score_func(y_test, y_pred, **kwargs)}")
import inspect
from sklearn.ensemble import RandomForestClassifier
print(inspect.signature(RandomForestClassifier.fit))