https://github.com/kubeflow/katib
Raw File
Tip revision: e0bd5ee9545b661a5bf0cbdc55bd38e96995fd46 authored by YujiOshima on 20 August 2018, 14:41:16 UTC
allow same study name on multiple job
Tip revision: e0bd5ee
workerConfigMap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: worker-template
  namespace: katib
data:
  cpuWorkerTemplate.yaml : |-
    apiVersion: batch/v1
    kind: Job
    metadata:
      name: {{.WorkerId}}
      namespace: katib
    spec:
      template:
        spec:
          containers:
          - name: {{.WorkerId}}
            image: katib/mxnet-mnist-example
            command:
            - "python"
            - "/mxnet/example/image-classification/train_mnist.py"
            - "--batch-size=64"
            {{- with .HyperParameters}}
            {{- range .}}
            - "{{.Name}}={{.Value}}"
            {{- end}}
            {{- end}}
          restartPolicy: Never
  gpuWorkerTemplate.yaml : |-
    apiVersion: batch/v1
    kind: Job
    metadata:
      name: {{.WorkerId}}
      namespace: katib
    spec:
      template:
        spec:
          containers:
          - name: {{.WorkerId}}
            image: katib/mxnet-mnist-example:gpu
            command:
            - "python"
            - "/mxnet/example/image-classification/train_mnist.py"
            - "--batch-size=64"
            {{- with .HyperParameters}}
            {{- range .}}
            - "{{.Name}}={{.Value}}"
            {{- end}}
            {{- end}}
            resources:
              limits:
                nvidia.com/gpu: 1
          restartPolicy: Never
back to top