You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

137 lines
4.5 KiB
YAML

1 year ago
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: {{ include "dcgm-exporter.fullname" . }}
namespace: {{ include "dcgm-exporter.namespace" . }}
labels:
{{- include "dcgm-exporter.labels" . | nindent 4 }}
app.kubernetes.io/component: "dcgm-exporter"
spec:
updateStrategy:
type: RollingUpdate
{{- with .Values.rollingUpdate }}
rollingUpdate:
maxUnavailable: {{ .maxUnavailable }}
maxSurge: {{ .maxSurge }}
{{- end }}
selector:
matchLabels:
{{- include "dcgm-exporter.selectorLabels" . | nindent 6 }}
app.kubernetes.io/component: "dcgm-exporter"
template:
metadata:
labels:
{{- include "dcgm-exporter.selectorLabels" . | nindent 8 }}
app.kubernetes.io/component: "dcgm-exporter"
{{- if .Values.podAnnotations }}
annotations:
{{- toYaml .Values.podAnnotations | nindent 8 }}
{{- end }}
spec:
{{- if .Values.runtimeClassName }}
runtimeClassName: {{ .Values.runtimeClassName }}
{{- end }}
priorityClassName: {{ .Values.priorityClassName | default "system-node-critical" }}
{{- if .Values.hostNetwork }}
hostNetwork: {{ .Values.hostNetwork }}
dnsPolicy: ClusterFirstWithHostNet
{{- end }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "dcgm-exporter.serviceAccountName" . }}
{{- if .Values.podSecurityContext }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
{{- end }}
{{- if .Values.affinity }}
affinity:
{{- toYaml .Values.affinity | nindent 8 }}
{{- end }}
{{- if .Values.nodeSelector }}
nodeSelector:
{{- toYaml .Values.nodeSelector | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 6 }}
{{- end }}
volumes:
- name: "pod-gpu-resources"
hostPath:
path: {{ .Values.kubeletPath }}
{{- range .Values.extraHostVolumes }}
- name: {{ .name | quote }}
hostPath:
path: {{ .hostPath | quote }}
{{- end }}
{{- with .Values.extraConfigMapVolumes }}
{{- toYaml . | nindent 6 }}
{{- end }}
containers:
- name: exporter
securityContext:
{{- toYaml .Values.securityContext | nindent 10 }}
{{- if .Values.image.tag }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
{{- else }}
image: "{{ .Values.image.repository }}:{{ .Chart.AppVersion }}"
{{- end }}
imagePullPolicy: "{{ .Values.image.pullPolicy }}"
args:
{{- range $.Values.arguments }}
- {{ . }}
{{- end }}
env:
- name: "DCGM_EXPORTER_KUBERNETES"
value: "true"
- name: "DCGM_EXPORTER_LISTEN"
value: "{{ .Values.service.address }}"
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
{{- if .Values.extraEnv }}
{{- toYaml .Values.extraEnv | nindent 8 }}
{{- end }}
ports:
- name: "metrics"
containerPort: {{ .Values.service.port }}
volumeMounts:
- name: "pod-gpu-resources"
readOnly: true
mountPath: "/var/lib/kubelet/pod-resources"
{{- if .Values.extraVolumeMounts }}
{{- toYaml .Values.extraVolumeMounts | nindent 8 }}
{{- end }}
livenessProbe:
httpGet:
path: /health
port: {{ .Values.service.port }}
initialDelaySeconds: 45
periodSeconds: 5
readinessProbe:
httpGet:
path: /health
port: {{ .Values.service.port }}
initialDelaySeconds: 45
{{- if .Values.resources }}
resources:
{{- toYaml .Values.resources | nindent 10 }}
{{- end }}