분류, 회귀, 시계열 예측, 자연어 처리, cv 가능하다
라벨 데이터 설정
점수를 어떤 기준으로 할 것인지 체크
from azureml.core import Workspace
from azureml.core import Experiment
ws = Workspace.from_config()
print('Workspace name: ' + ws.name,
'Azure region: ' + ws.location,
'Subscription id: ' + ws.subscription_id,
'Resource group: ' + ws.resource_group, sep='\\n')
experiment = Experiment(workspace=ws, name="diabetes-experiment")
from azureml.opendatasets import Diabetes
from sklearn.model_selection import train_test_split
x_df = Diabetes.get_tabular_dataset().to_pandas_dataframe().dropna()
y_df = x_df.pop("Y")
X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=66)
print(X_train)
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
import math
alphas = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
for alpha in alphas:
run = experiment.start_logging()
run.log("alpha_value", alpha)
model = Ridge(alpha=alpha)
model.fit(X=X_train, y=y_train)
y_pred = model.predict(X=X_test)
rmse = math.sqrt(mean_squared_error(y_true=y_test, y_pred=y_pred))
run.log("rmse", rmse)
model_name = "model_alpha_" + str(alpha) + ".pkl"
filename = "outputs/" + model_name
joblib.dump(value=model, filename=filename)
run.upload_file(name=model_name, path_or_stream=filename)
run.complete()
print(f"{alpha} exp completed")
import numpy as np
from azureml.core import Dataset
np.savetxt('features.csv', X_train, delimiter=',')
np.savetxt('labels.csv', y_train, delimiter=',')
datastore = ws.get_default_datastore()
datastore.upload_files(files=['./features.csv', './labels.csv'],
target_path='diabetes-experiment/',
overwrite=True)
input_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'diabetes-experiment/features.csv')])
output_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'diabetes-experiment/labels.csv')])
import sklearn
from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration
model = Model.register(workspace=ws,
model_name='diabetes-experiment-model',
model_path=f"./{str(best_run.get_file_names()[0])}",
model_framework=Model.Framework.SCIKITLEARN,
model_framework_version=sklearn.__version__,
sample_input_dataset=input_dataset,
sample_output_dataset=output_dataset,
resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5),
description='Ridge regression model to predict diabetes progression.',
tags={'area': 'diabetes', 'type': 'regression'})
print('Name:', model.name)
print('Version:', model.version)
service_name = 'diabetes-service'
service = Model.deploy(ws, service_name, [model], overwrite=True)
service.wait_for_deployment(show_output=True)
import json
input_payload = json.dumps({
'data': X_train[0:2].values.tolist(),
'method': 'predict'
})
output = service.run(input_payload)
print(output)