mirror of
https://github.com/bitnami/charts.git
synced 2026-03-13 14:57:24 +08:00
164 lines
5.9 KiB
YAML
164 lines
5.9 KiB
YAML
{{- if .Values.pytorch.distributed.enabled }}
|
|
apiVersion: apps/v1beta2
|
|
kind: StatefulSet
|
|
metadata:
|
|
name: {{ include "pytorch.fullname" . }}-worker
|
|
labels:
|
|
app.kubernetes.io/name: {{ include "pytorch.name" . }}
|
|
helm.sh/chart: {{ include "pytorch.chart" . }}
|
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
|
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
|
app.kubernetes.io/component: "worker"
|
|
spec:
|
|
selector:
|
|
matchLabels:
|
|
app.kubernetes.io/name: {{ include "pytorch.name" . }}
|
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
|
app.kubernetes.io/component: "worker"
|
|
replicas: {{ sub .Values.pytorch.distributed.worldSize 1 }}
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app.kubernetes.io/name: {{ include "pytorch.name" . }}
|
|
helm.sh/chart: {{ include "pytorch.chart" . }}
|
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
|
app.kubernetes.io/component: "worker"
|
|
spec:
|
|
{{- include "pytorch.imagePullSecrets" . | nindent 6 }}
|
|
{{- if .Values.securityContext.enabled }}
|
|
securityContext:
|
|
fsGroup: {{ .Values.securityContext.fsGroup }}
|
|
runAsUser: {{ .Values.securityContext.runAsUser }}
|
|
{{- end }}
|
|
{{- if .Values.nodeSelector }}
|
|
nodeSelector:
|
|
{{ toYaml .Values.nodeSelector | indent 8 }}
|
|
{{- end }}
|
|
{{- if .Values.tolerations }}
|
|
tolerations:
|
|
{{ toYaml .Values.tolerations | indent 8 }}
|
|
{{- end }}
|
|
{{- if .Values.affinity }}
|
|
affinity:
|
|
{{ toYaml .Values.affinity | indent 8 }}
|
|
{{- end }}
|
|
{{- if .Values.pytorch.cloneFilesFromGit.enabled }}
|
|
initContainers:
|
|
- name: git-clone-repository
|
|
image: {{ include "git.image" . }}
|
|
imagePullPolicy: {{ .Values.git.pullPolicy | quote }}
|
|
command: [ '/bin/sh', '-c' , 'git clone {{ .Values.pytorch.cloneFilesFromGit.repository }} /app && cd /app && git checkout {{ .Values.pytorch.cloneFilesFromGit.revision }}']
|
|
volumeMounts:
|
|
- name: git-cloned-files
|
|
mountPath: /app
|
|
{{- end }}
|
|
containers:
|
|
- name: worker
|
|
image: {{ include "pytorch.image" . }}
|
|
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
|
command:
|
|
- bash
|
|
- -c
|
|
- |
|
|
RANK=${POD_NAME##*-}
|
|
((RANK++))
|
|
export RANK
|
|
{{- if .Values.pytorch.entrypoint.file }}
|
|
python {{ .Values.pytorch.entrypoint.file }} {{ if .Values.pytorch.entrypoint.args }}{{ .Values.pytorch.entrypoint.args }}{{ end }}
|
|
{{- end }}
|
|
sleep infinity
|
|
env:
|
|
- name: POD_NAME
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.name
|
|
- name: MASTER_ADDR
|
|
value: {{ include "pytorch.fullname" . }}
|
|
- name: MASTER_PORT
|
|
value: {{ .Values.service.port | quote }}
|
|
- name: WORLD_SIZE
|
|
value: {{ .Values.pytorch.distributed.worldSize | quote }}
|
|
{{- if .Values.pytorch.extraEnvVars }}
|
|
{{ toYaml .Values.pytorch.extraEnvVars | indent 8 }}
|
|
{{- end }}
|
|
{{- if .Values.livenessProbe.enabled }}
|
|
livenessProbe:
|
|
exec:
|
|
command:
|
|
- python
|
|
- -c
|
|
- import torch; torch.__version__
|
|
initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }}
|
|
periodSeconds: {{ .Values.livenessProbe.periodSeconds }}
|
|
timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }}
|
|
successThreshold: {{ .Values.livenessProbe.successThreshold }}
|
|
failureThreshold: {{ .Values.livenessProbe.failureThreshold }}
|
|
{{- end }}
|
|
{{- if .Values.readinessProbe.enabled }}
|
|
readinessProbe:
|
|
exec:
|
|
command:
|
|
- python
|
|
- -c
|
|
- import torch; torch.__version__
|
|
initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }}
|
|
periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
|
|
timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }}
|
|
successThreshold: {{ .Values.readinessProbe.successThreshold }}
|
|
failureThreshold: {{ .Values.readinessProbe.failureThreshold }}
|
|
{{- end }}
|
|
resources: {{ toYaml .Values.resources | nindent 12 }}
|
|
volumeMounts:
|
|
{{- if .Values.pytorch.configMap }}
|
|
- name: ext-files
|
|
mountPath: /app
|
|
{{- else if .Files.Glob "files/*" }}
|
|
- name: local-files
|
|
mountPath: /app
|
|
{{- else if .Values.pytorch.cloneFilesFromGit.enabled }}
|
|
- name: git-cloned-files
|
|
mountPath: /app
|
|
{{- end }}
|
|
- name: data
|
|
mountPath: {{ .Values.persistence.mountPath }}
|
|
volumes:
|
|
{{- if .Values.pytorch.configMap }}
|
|
- name: ext-files
|
|
configMap:
|
|
name: {{ .Values.pytorch.configMap }}
|
|
{{- else if .Files.Glob "files/*" }}
|
|
- name: local-files
|
|
configMap:
|
|
name: {{ include "pytorch.fullname" . }}-files
|
|
{{- else if .Values.pytorch.cloneFilesFromGit.enabled }}
|
|
- name: git-cloned-files
|
|
emptyDir: {}
|
|
{{- end }}
|
|
{{- if .Values.persistence.enabled }}
|
|
volumeClaimTemplates:
|
|
- metadata:
|
|
name: data
|
|
labels:
|
|
app.kubernetes.io/name: {{ include "pytorch.name" . }}
|
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
|
{{- if .Values.persistence.annotations }}
|
|
annotations: {{ toYaml .Values.persistence.annotations | nindent 8 }}
|
|
{{- end }}
|
|
spec:
|
|
accessModes: {{ toYaml .Values.persistence.accessModes | nindent 8 }}
|
|
{{- if .Values.persistence.storageClass }}
|
|
{{- if (eq "-" .Values.persistence.storageClass) }}
|
|
storageClassName: ""
|
|
{{- else }}
|
|
storageClassName: {{ .Values.persistence.storageClass | quote }}
|
|
{{- end }}
|
|
{{- end }}
|
|
resources:
|
|
requests:
|
|
storage: {{ .Values.persistence.size | quote }}
|
|
{{- else }}
|
|
- name: data
|
|
emptyDir: {}
|
|
{{- end }}
|
|
{{- end }}
|