K8S支持GPU

1.Centos安装NVIDIA驱动

查看显卡型号

yum install pciutils

lspci | grep -i vga

nvdia官网下载驱动

https://www.nvidia.com.tw/Download/index.aspx?lang=tw

安装

https://yinguobing.com/install-nvidia-driver-centos-7/

安装完成

[root@localhost ~]# nvidia-smi
Tue Apr  4 16:12:13 2023
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.182.03   Driver Version: 470.182.03   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  NVIDIA GeForce ...  Off  | 00000000:17:00.0 N/A |                  N/A |
| 30%   34C    P8    N/A /  N/A |      0MiB /  2001MiB |     N/A      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+

驱动卸载

sh NVIDIA-Linux-x86_64-470.182.03.run --uninstall
yum remove nvidia-*
rpm -qa|grep -i nvid|sort
yum  remove kmod-nvidia-*
reboot

2.安装nvidia-docker支持

docker

https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html

安装docker-ce

yum  install -y tar bzip2 make automake gcc gcc-c++ vim pciutils elfutils-libelf-devel libglvnd-devel iptables
yum-config-manager --add-repo=https://download.docker.com/linux/centos/docker-ce.repo
yum repolist -v
#由于 CentOS 不支持较新版本的 Docker-CE 所需的特定版本的 containerd.io 包，因此一种选择是手动安装 containerd.io 包，然后继续安装 docker-ce 包。
yum install -y https://download.docker.com/linux/centos/7/x86_64/stable/Packages/containerd.io-1.4.3-3.1.el7.x86_64.rpm
yum install docker-ce -y
systemctl --now enable docker

安装nvidia-docker2

#docker 19.3以后不推荐使用docker2

distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.repo | \
  sudo tee /etc/yum.repos.d/nvidia-docker.repo
sudo yum install -y nvidia-docker2
sudo pkill -SIGHUP dockerd

安装nvidia-container-toolkit

distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
   && curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.repo | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
sudo yum clean expire-cache
sudo yum install -y nvidia-container-toolkit
#添加运行时  ##这里只是添加了运行时没有设为默认
sudo nvidia-ctk runtime configure --runtime=docker

###设置默认运行时为nvidia

cat /etc/docker/daemon.json
{
    ...
    "default-runtime": "nvidia",    //这里要单独设置
    "runtimes": {
        "nvidia": {
            "args": [],
            "path": "nvidia-container-runtime"
        }
    }
}

###重启docker

systemctl daemon-reload
systemctl restart docker

验证docker调用GPU是否正常

[root@localhost ~]#  docker run --rm --gpus all nvidia/cuda:11.6.2-base-ubuntu20.04 nvidia-smi
Tue Apr  4 08:10:33 2023
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.182.03   Driver Version: 470.182.03   CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  NVIDIA GeForce ...  Off  | 00000000:17:00.0 N/A |                  N/A |
| 30%   34C    P8    N/A /  N/A |      0MiB /  2001MiB |     N/A      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+

docker info

[root@localhost fangzhou]# docker info
....
 Runtimes: io.containerd.runc.v2 nvidia runc
 Default Runtime: nvidia  #确认docker的运行时已经更改
....

containerd

安装nvidia-docker2

#docker 19.3以后不推荐使用docker2

distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.repo | \
  sudo tee /etc/yum.repos.d/nvidia-docker.repo
sudo yum install -y nvidia-docker2
sudo pkill -SIGHUP dockerd

安装nvidia-container-toolkit

distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
   && curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.repo | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
sudo yum clean expire-cache
sudo yum install -y nvidia-container-toolkit
#添加运行时  ##这里只是添加了运行时没有设为默认
sudo nvidia-ctk runtime configure --runtime=docker

###设置默认运行时为nvidia

vim /etc/containerd/config.toml

...
    [plugins."io.containerd.grpc.v1.cri".containerd]
      default_runtime_name = "nvidia"
      disable_snapshot_annotations = true
      discard_unpacked_layers = false
      ignore_rdt_not_enabled_errors = false
      no_pivot = false
      snapshotter = "overlayfs"

      [plugins."io.containerd.grpc.v1.cri".containerd.default_runtime]
        base_runtime_spec = ""
        container_annotations = []
        pod_annotations = []
        privileged_without_host_devices = false
        runtime_engine = ""
        runtime_root = ""
        runtime_type = ""

        [plugins."io.containerd.grpc.v1.cri".containerd.default_runtime.options]

      [plugins."io.containerd.grpc.v1.cri".containerd.runtimes]

        [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
          base_runtime_spec = ""
          container_annotations = []
          pod_annotations = []
          privileged_without_host_devices = false
          runtime_engine = ""
          runtime_root = ""
          runtime_type = "io.containerd.runc.v2"

          [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
            BinaryName = ""
            CriuImagePath = ""
            CriuPath = ""
            CriuWorkPath = ""
            IoGid = 0
            IoUid = 0
            NoNewKeyring = false
            NoPivotRoot = false
            Root = ""
            ShimCgroup = ""
            SystemdCgroup = true

          [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
            privileged_without_host_devices = false
            runtime_engine = ""
            runtime_root = ""
            runtime_type = "io.containerd.runc.v2"
            [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
              BinaryName = "/usr/bin/nvidia-container-runtime"
              SystemdCgroup = true
...

查看container运行时

crictl info

...
  "config": {
    "containerd": {
      "snapshotter": "overlayfs",
      "defaultRuntimeName": "nvidia",
      "defaultRuntime": {
        "runtimeType": "",
        "runtimePath": "",
        "runtimeEngine": "",
        "PodAnnotations": [],
        "ContainerAnnotations": [],
        "runtimeRoot": "",
        "options": {},
        "privileged_without_host_devices": false,
        "baseRuntimeSpec": "",
        "cniConfDir": "",
        "cniMaxConfNum": 0
      },
      "untrustedWorkloadRuntime": {
        "runtimeType": "",
        "runtimePath": "",
        "runtimeEngine": "",
        "PodAnnotations": [],
        "ContainerAnnotations": [],
        "runtimeRoot": "",
        "options": {},
        "privileged_without_host_devices": false,
        "baseRuntimeSpec": "",
        "cniConfDir": "",
        "cniMaxConfNum": 0
      },
      "runtimes": {
        "nvidia": {
          "runtimeType": "io.containerd.runc.v2",
          "runtimePath": "",
          "runtimeEngine": "",
          "PodAnnotations": null,
          "ContainerAnnotations": null,
          "runtimeRoot": "",
          "options": {
            "BinaryName": "/usr/bin/nvidia-container-runtime",
            "SystemdCgroup": true
          },
          "privileged_without_host_devices": false,
          "baseRuntimeSpec": "",
          "cniConfDir": "",
          "cniMaxConfNum": 0
        },
        "runc": {
          "runtimeType": "io.containerd.runc.v2",
          "runtimePath": "",
          "runtimeEngine": "",
          "PodAnnotations": [],
          "ContainerAnnotations": [],
          "runtimeRoot": "",
          "options": {
            "BinaryName": "",
            "CriuImagePath": "",
            "CriuPath": "",
            "CriuWorkPath": "",
            "IoGid": 0,
            "IoUid": 0,
            "NoNewKeyring": false,
            "NoPivotRoot": false,
            "Root": "",
            "ShimCgroup": "",
            "SystemdCgroup": true
          },
          "privileged_without_host_devices": false,
          "baseRuntimeSpec": "",
          "cniConfDir": "",
          "cniMaxConfNum": 0
        }
      },
      "noPivot": false,
      "disableSnapshotAnnotations": true,
      "discardUnpackedLayers": false,
      "ignoreRdtNotEnabledErrors": false
...

3.daemonset方式安装k8s支持gpu插件（官方版本）

kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.13.0/nvidia-device-plugin.yml

# Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: nvidia-device-plugin-daemonset
  namespace: kube-system
spec:
  selector:
    matchLabels:
      name: nvidia-device-plugin-ds
  updateStrategy:
    type: RollingUpdate
  template:
    metadata:
      labels:
        name: nvidia-device-plugin-ds
    spec:
      tolerations:
      - key: nvidia.com/gpu
        operator: Exists
        effect: NoSchedule
      # Mark this pod as a critical add-on; when enabled, the critical add-on
      # scheduler reserves resources for critical add-on pods so that they can
      # be rescheduled after a failure.
      # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
      priorityClassName: "system-node-critical"
      containers:
      - image: nvcr.io/nvidia/k8s-device-plugin:v0.13.0
        name: nvidia-device-plugin-ctr
        env:
          - name: FAIL_ON_INIT_ERROR
            value: "false"
        securityContext:
          allowPrivilegeEscalation: false
          capabilities:
            drop: ["ALL"]
        volumeMounts:
        - name: device-plugin
          mountPath: /var/lib/kubelet/device-plugins
      volumes:
      - name: device-plugin
        hostPath:
          path: /var/lib/kubelet/device-plugins

查看插件日志

[root@localhost ~]# kubectl  logs -f nvidia-device-plugin-daemonset-5l5pm -n kube-system
2023/04/04 09:00:53 Starting FS watcher.
2023/04/04 09:00:53 Starting OS watcher.
2023/04/04 09:00:53 Starting Plugins.
2023/04/04 09:00:53 Loading configuration.
2023/04/04 09:00:53 Updating config with default resource matching patterns.
2023/04/04 09:00:53
Running with config:
{
  "version": "v1",
  "flags": {
    "migStrategy": "none",
    "failOnInitError": false,
    "nvidiaDriverRoot": "/",
    "gdsEnabled": false,
    "mofedEnabled": false,
    "plugin": {
      "passDeviceSpecs": false,
      "deviceListStrategy": "envvar",
      "deviceIDStrategy": "uuid"
    }
  },
  "resources": {
    "gpus": [
      {
        "pattern": "*",
        "name": "nvidia.com/gpu"
      }
    ]
  },
  "sharing": {
    "timeSlicing": {}
  }
}
2023/04/04 09:00:53 Retreiving plugins.
2023/04/04 09:00:53 Detected NVML platform: found NVML library
2023/04/04 09:00:53 Detected non-Tegra platform: /sys/devices/soc0/family file not found
2023/04/04 09:00:53 Starting GRPC server for 'nvidia.com/gpu'
2023/04/04 09:00:53 Starting to serve 'nvidia.com/gpu' on /var/lib/kubelet/device-plugins/nvidia-gpu.sock
2023/04/04 09:00:53 Registered device plugin for 'nvidia.com/gpu' with Kubelet

部署测试项目

apiVersion: batch/v1
kind: Job
metadata:
  name: gpu-feature-discovery
  labels:
    app.kubernetes.io/name: gpu-feature-discovery
    app.kubernetes.io/version: 0.8.0
    app.kubernetes.io/part-of: nvidia-gpu
spec:
  template:
    metadata:
      labels:
        app.kubernetes.io/name: gpu-feature-discovery
        app.kubernetes.io/version: 0.8.0
        app.kubernetes.io/part-of: nvidia-gpu
    spec:
      nodeName: NODE_NAME
      containers:
        - image: nvcr.io/nvidia/gpu-feature-discovery:v0.8.0
          name: gpu-feature-discovery
          args:
            - "--oneshot"
          volumeMounts:
            - name: output-dir
              mountPath: "/etc/kubernetes/node-feature-discovery/features.d"
            - name: host-sys
              mountPath: "/sys"
          securityContext:
            privileged: true
      volumes:
        - name: output-dir
          hostPath:
            path: "/etc/kubernetes/node-feature-discovery/features.d"
        - name: host-sys
          hostPath:
            path: "/sys"
      restartPolicy: Never

查看项目日志

[root@localhost ~]# kubectl logs -f gpu-pod
[Vector addition of 50000 elements]
Copy input data from the host memory to the CUDA device
CUDA kernel launch with 196 blocks of 256 threads
Copy output data from the CUDA device to the host memory
Test PASSED
Done

4.gpu-operator（官方升级版）

###官方插件完整版

概述 — NVIDIA 云原生技术文档

入门 — NVIDIA 云原生技术文档

需要先安装gpu驱动和nvidia-container-toolkit

centos7

#centos版本和nvidia-container版本一致

helm install --wait --generate-name   -n gpu-operator --create-namespace       nvidia/gpu-operator       --set driver.enabled=false       --set toolkit.enabled=false      --set toolkit.version=1.13.1-centos7

#镜像替代

docker pull vk1602/node-feature-discovery:v0.12.1

5.第4范式VGPU调度插件

###和官方插件选一个就行

https://github.com/4paradigm/k8s-vgpu-scheduler/blob/master/README_cn.md

VGPU必须安装nvdia-docker2不使用nvidia-container-toolkit

给支持gpu的节点打标签

kubectl label nodes {nodeid} gpu=on

安装vgpu

#添加仓库
helm repo add vgpu-charts https://4paradigm.github.io/k8s-vgpu-scheduler
#查看K8S版本
kubectl version
#指定版本安装
helm install vgpu vgpu-charts/vgpu --set scheduler.kubeScheduler.imageTag=v1.26.4 -n kube-system

查看安装

kubectl get pods -n kube-system

[root@localhost GPU]# kubectl get pod -n kube-system
NAME                                      READY   STATUS    RESTARTS   AGE
....
vgpu-device-plugin-zztcz                  2/2     Running   0          18s  #这俩pod正常即可，建议看一下该pod下的device-plugin日志
vgpu-scheduler-7555657c58-pzlrg           2/2     Running   0          18s

显存切割测试

apiVersion: v1
kind: Pod
metadata:
  name: gpu-pod
spec:
  containers:
    - name: ubuntu-container
      image: ubuntu:18.04
      command: ["bash", "-c", "sleep 86400"]
      resources:
        limits:
          nvidia.com/gpu: 1 # 请求2个vGPUs 
          nvidia.com/gpumem: 3000 # 每个vGPU申请3000m显存 （可选，整数类型）
          nvidia.com/gpucores: 30 # 每个vGPU的算力为30%实际显卡的算力 （可选，整数类型）

[root@localhost ~]# nvidia-smi
Wed Apr 12 13:38:01 2023
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  NVIDIA TITAN RTX    Off  | 00000000:17:00.0 Off |                  N/A |
| 41%   36C    P8    25W / 280W |      0MiB / 24576MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+
[root@localhost ~]# kubectl exec -it gpu-pod nvidia-smi
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
[4pdvGPU Warn(7:139721868449600:util.c:149)]: new_uuid=GPU-9539b1f3-3f8e-444a-1657-8ccb8bdb2b90 1
[4pdvGPU Msg(7:139721868449600:libvgpu.c:871)]: Initializing.....
[4pdvGPU Msg(7:139721868449600:device.c:249)]: driver version=12000
Wed Apr 12 05:38:14 2023
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  NVIDIA TITAN RTX    Off  | 00000000:17:00.0 Off |                  N/A |
| 40%   36C    P8    25W / 280W |      0MiB /  3000MiB |      0%      Default | 
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+
[4pdvGPU Msg(7:139721868449600:multiprocess_memory_limit.c:457)]: Calling exit handler 7

查看监控信息

[root@localhost ~]# kubectl get svc -n kube-system
NAME                         TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)                         AGE
...
vgpu-device-plugin-monitor   NodePort    10.68.75.180    <none>        31992:31992/TCP                 23m
vgpu-scheduler               NodePort    10.68.29.124    <none>        443:31242/TCP,31993:31993/TCP   23m


http://{nodeip}:{monitorPort}/metrics

DCGM监控

$ helm repo add gpu-helm-charts \
  https://nvidia.github.io/dcgm-exporter/helm-charts
  

$ helm repo update


$ helm install \ 
    --generate-name \ 
    gpu-helm-charts/dcgm-exporter

FAQ

1.使用该插件的时候，如果需要调度GPU，则该pod不能包含特权关键字：securityContext，否则插件会报错，pod会无限创建

其它

https://virtaitech.com/ （收费版）

JZTXT

K8S上的GPU调度