https://github.com/kubeflow/katib
Revision 24160cb2920adcaebd807ba612da778c44606bd6 authored by Richard Liu on 28 November 2018, 06:52:51 UTC, committed by k8s-ci-robot on 28 November 2018, 06:52:51 UTC
* downgrade to 1.10.1

* Delete pods

* Fix job-name

* Set successfulJobsHistoryLimit to 0

* Add comments
1 parent b7145b3
Raw File
Tip revision: 24160cb2920adcaebd807ba612da778c44606bd6 authored by Richard Liu on 28 November 2018, 06:52:51 UTC
Downgrade kubernetes dependency to 1.10.1 (#256)
Tip revision: 24160cb
workerConfigMap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: worker-template
  namespace: kubeflow
data:
  cpuWorkerTemplate.yaml : |-
    apiVersion: batch/v1
    kind: Job
    metadata:
      name: {{.WorkerID}}
      namespace: kubeflow
    spec:
      template:
        spec:
          containers:
          - name: {{.WorkerID}}
            image: katib/mxnet-mnist-example
            command:
            - "python"
            - "/mxnet/example/image-classification/train_mnist.py"
            - "--batch-size=64"
            {{- with .HyperParameters}}
            {{- range .}}
            - "{{.Name}}={{.Value}}"
            {{- end}}
            {{- end}}
          restartPolicy: Never
  gpuWorkerTemplate.yaml : |-
    apiVersion: batch/v1
    kind: Job
    metadata:
      name: {{.WorkerID}}
      namespace: kubeflow
    spec:
      template:
        spec:
          containers:
          - name: {{.WorkerID}}
            image: katib/mxnet-mnist-example:gpu
            command:
            - "python"
            - "/mxnet/example/image-classification/train_mnist.py"
            - "--batch-size=64"
            {{- with .HyperParameters}}
            {{- range .}}
            - "{{.Name}}={{.Value}}"
            {{- end}}
            {{- end}}
            resources:
              limits:
                nvidia.com/gpu: 1
          restartPolicy: Never
back to top