reviews = pd.read_csv(...)
reviews.groupby('taster_twitter_handle') => dataframe이 조각 조각
reviews.groupby('taster_twitter_handle').apply(labmda df : 어쩌고) => 조각조각 dataframe마다 작업
reviews.groupby('taster_twitter_handle').taster_twitter_handle.count() => series, key : taster_twitter_handle의 값들, value : count
reviews = pd.read_csv(...)
best_rating_per_price=reviews.groupby('price').points.max()
best_rating_per_price=reviews.groupby('price').apply(lambda df : df.loc[df.points.idxmax()]).loc[:,['points']]
reviews.groupby('variety').price.agg([min,max])
loc이 붙으면 series
reviews.groupby('variety').apply(lambda df : df.sort_values(by='price', ascending=True)).loc[:, 'price']
reviews.groupby('variety').apply(lambda df : df.sort_values(by='price', ascending=True))
reviews.groupby(['country', 'variety']).apply(lambda df : df.sort_values(by='points'))
# 대충 csv로 읽은 dataframe
home_data = pd.read_csv(iowa_file_path)
X = home_data.SalePrice
X = home_data['SalePrice']
X = home_data[['SalePrice']]
X = home_data[['SalePrice', 'LotArea]]
features = ['LotArea', 'YearBuilt', '1stFlrSF', '2ndFlrSF', 'FullBath', 'BedroomAbvGr', 'TotRmsAbvGrd']
X = home_data[features]
.value_counts()