kubectl -n moey920 get experiment random -o yaml
apiVersion: kubeflow.org/v1beta1
kind: Experiment
metadata:
annotations:
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"kubeflow.org/v1beta1","kind":"Experiment","metadata":{"annotations":{},"name":"random","namespace":"moey920"},"spec":{"algorithm":{"algorithmName":"random"},"maxFailedTrialCount":3,"maxTrialCount":12,"objective":{"additionalMetricNames":["Train-accuracy"],"goal":0.99,"objectiveMetricName":"Validation-accuracy","type":"maximize"},"parallelTrialCount":3,"parameters":[{"feasibleSpace":{"max":"0.03","min":"0.01"},"name":"lr","parameterType":"double"},{"feasibleSpace":{"max":"5","min":"2"},"name":"num-layers","parameterType":"int"},{"feasibleSpace":{"list":["sgd","adam","ftrl"]},"name":"optimizer","parameterType":"categorical"}],"trialTemplate":{"primaryContainerName":"training-container","trialParameters":[{"description":"Learning rate for the training model","name":"learningRate","reference":"lr"},{"description":"Number of training model layers","name":"numberLayers","reference":"num-layers"},{"description":"Training model optimizer (sdg, adam or ftrl)","name":"optimizer","reference":"optimizer"}],"trialSpec":{"apiVersion":"batch/v1","kind":"Job","spec":{"template":{"spec":{"containers":[{"command":["python3","/opt/mxnet-mnist/mnist.py","--batch-size=64","--lr=${trialParameters.learningRate}","--num-layers=${trialParameters.numberLayers}","--optimizer=${trialParameters.optimizer}"],"image":"docker.io/kubeflowkatib/mxnet-mnist:v1beta1-45c5727","name":"training-container"}],"restartPolicy":"Never"}}}}}}}
creationTimestamp: "2021-12-03T09:04:06Z"
finalizers:
- update-prometheus-metrics
generation: 1
name: random
namespace: moey920
resourceVersion: "34104516"
uid: da5597ce-7e40-4492-a59f-8fbc13350b89
spec:
algorithm:
algorithmName: random
maxFailedTrialCount: 3
maxTrialCount: 12
metricsCollectorSpec:
collector:
kind: StdOut
objective:
additionalMetricNames:
- Train-accuracy
goal: 0.99
metricStrategies:
- name: Validation-accuracy
value: max
- name: Train-accuracy
value: max
objectiveMetricName: Validation-accuracy
type: maximize
parallelTrialCount: 3
parameters:
- feasibleSpace:
max: "0.03"
min: "0.01"
name: lr
parameterType: double
- feasibleSpace:
max: "5"
min: "2"
name: num-layers
parameterType: int
- feasibleSpace:
list:
- sgd
- adam
- ftrl
name: optimizer
parameterType: categorical
resumePolicy: LongRunning
trialTemplate:
failureCondition: status.conditions.#(type=="Failed")#|#(status=="True")#
primaryContainerName: training-container
successCondition: status.conditions.#(type=="Complete")#|#(status=="True")#
trialParameters:
- description: Learning rate for the training model
name: learningRate
reference: lr
- description: Number of training model layers
name: numberLayers
reference: num-layers
- description: Training model optimizer (sdg, adam or ftrl)
name: optimizer
reference: optimizer
trialSpec:
apiVersion: batch/v1
kind: Job
spec:
template:
spec:
containers:
- command:
- python3
- /opt/mxnet-mnist/mnist.py
- --batch-size=64
- --lr=${trialParameters.learningRate}
- --num-layers=${trialParameters.numberLayers}
- --optimizer=${trialParameters.optimizer}
image: docker.io/kubeflowkatib/mxnet-mnist:v1beta1-45c5727
name: training-container
restartPolicy: Never
status:
conditions:
- lastTransitionTime: "2021-12-03T09:04:06Z"
lastUpdateTime: "2021-12-03T09:04:06Z"
message: Experiment is created
reason: ExperimentCreated
status: "True"
type: Created
- lastTransitionTime: "2021-12-03T09:04:57Z"
lastUpdateTime: "2021-12-03T09:04:57Z"
message: Experiment is running
reason: ExperimentRunning
status: "True"
type: Running
currentOptimalTrial:
bestTrialName: ""
observation:
metrics: null
parameterAssignments: null
runningTrialList:
- random-b5d4plgx
- random-qwllpfh9
- random-vbc2tscx
startTime: "2021-12-03T09:04:06Z"
trials: 3
trialsRunning: 3
마지막 값 status.conditions.type이 Succeeded이면 실험이 완료된 것입니다.