Files
charts/bitnami/pytorch/templates/statefulset.yml
Alejandro Moreno 77db1bb0da Add PyTorch chart
2019-05-14 09:46:42 +02:00

164 lines
5.9 KiB
YAML

{{- if .Values.pytorch.distributed.enabled }}
apiVersion: apps/v1beta2
kind: StatefulSet
metadata:
name: {{ include "pytorch.fullname" . }}-worker
labels:
app.kubernetes.io/name: {{ include "pytorch.name" . }}
helm.sh/chart: {{ include "pytorch.chart" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/component: "worker"
spec:
selector:
matchLabels:
app.kubernetes.io/name: {{ include "pytorch.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: "worker"
replicas: {{ sub .Values.pytorch.distributed.worldSize 1 }}
template:
metadata:
labels:
app.kubernetes.io/name: {{ include "pytorch.name" . }}
helm.sh/chart: {{ include "pytorch.chart" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: "worker"
spec:
{{- include "pytorch.imagePullSecrets" . | nindent 6 }}
{{- if .Values.securityContext.enabled }}
securityContext:
fsGroup: {{ .Values.securityContext.fsGroup }}
runAsUser: {{ .Values.securityContext.runAsUser }}
{{- end }}
{{- if .Values.nodeSelector }}
nodeSelector:
{{ toYaml .Values.nodeSelector | indent 8 }}
{{- end }}
{{- if .Values.tolerations }}
tolerations:
{{ toYaml .Values.tolerations | indent 8 }}
{{- end }}
{{- if .Values.affinity }}
affinity:
{{ toYaml .Values.affinity | indent 8 }}
{{- end }}
{{- if .Values.pytorch.cloneFilesFromGit.enabled }}
initContainers:
- name: git-clone-repository
image: {{ include "git.image" . }}
imagePullPolicy: {{ .Values.git.pullPolicy | quote }}
command: [ '/bin/sh', '-c' , 'git clone {{ .Values.pytorch.cloneFilesFromGit.repository }} /app && cd /app && git checkout {{ .Values.pytorch.cloneFilesFromGit.revision }}']
volumeMounts:
- name: git-cloned-files
mountPath: /app
{{- end }}
containers:
- name: worker
image: {{ include "pytorch.image" . }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
command:
- bash
- -c
- |
RANK=${POD_NAME##*-}
((RANK++))
export RANK
{{- if .Values.pytorch.entrypoint.file }}
python {{ .Values.pytorch.entrypoint.file }} {{ if .Values.pytorch.entrypoint.args }}{{ .Values.pytorch.entrypoint.args }}{{ end }}
{{- end }}
sleep infinity
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: MASTER_ADDR
value: {{ include "pytorch.fullname" . }}
- name: MASTER_PORT
value: {{ .Values.service.port | quote }}
- name: WORLD_SIZE
value: {{ .Values.pytorch.distributed.worldSize | quote }}
{{- if .Values.pytorch.extraEnvVars }}
{{ toYaml .Values.pytorch.extraEnvVars | indent 8 }}
{{- end }}
{{- if .Values.livenessProbe.enabled }}
livenessProbe:
exec:
command:
- python
- -c
- import torch; torch.__version__
initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.livenessProbe.periodSeconds }}
timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }}
successThreshold: {{ .Values.livenessProbe.successThreshold }}
failureThreshold: {{ .Values.livenessProbe.failureThreshold }}
{{- end }}
{{- if .Values.readinessProbe.enabled }}
readinessProbe:
exec:
command:
- python
- -c
- import torch; torch.__version__
initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }}
successThreshold: {{ .Values.readinessProbe.successThreshold }}
failureThreshold: {{ .Values.readinessProbe.failureThreshold }}
{{- end }}
resources: {{ toYaml .Values.resources | nindent 12 }}
volumeMounts:
{{- if .Values.pytorch.configMap }}
- name: ext-files
mountPath: /app
{{- else if .Files.Glob "files/*" }}
- name: local-files
mountPath: /app
{{- else if .Values.pytorch.cloneFilesFromGit.enabled }}
- name: git-cloned-files
mountPath: /app
{{- end }}
- name: data
mountPath: {{ .Values.persistence.mountPath }}
volumes:
{{- if .Values.pytorch.configMap }}
- name: ext-files
configMap:
name: {{ .Values.pytorch.configMap }}
{{- else if .Files.Glob "files/*" }}
- name: local-files
configMap:
name: {{ include "pytorch.fullname" . }}-files
{{- else if .Values.pytorch.cloneFilesFromGit.enabled }}
- name: git-cloned-files
emptyDir: {}
{{- end }}
{{- if .Values.persistence.enabled }}
volumeClaimTemplates:
- metadata:
name: data
labels:
app.kubernetes.io/name: {{ include "pytorch.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- if .Values.persistence.annotations }}
annotations: {{ toYaml .Values.persistence.annotations | nindent 8 }}
{{- end }}
spec:
accessModes: {{ toYaml .Values.persistence.accessModes | nindent 8 }}
{{- if .Values.persistence.storageClass }}
{{- if (eq "-" .Values.persistence.storageClass) }}
storageClassName: ""
{{- else }}
storageClassName: {{ .Values.persistence.storageClass | quote }}
{{- end }}
{{- end }}
resources:
requests:
storage: {{ .Values.persistence.size | quote }}
{{- else }}
- name: data
emptyDir: {}
{{- end }}
{{- end }}