GPU docker环境部署(从驱动安装开始)

文 | MESeraph

01 | 卸载CUDA

sudo apt-get remove cuda*
cd /usr/local/
rm -rf cuda cuda-10.1/

02 | 卸载显卡驱动

sudo /usr/bin/nvidia-uninstall
sudo apt-get --purge remove nvidia-*
sudo apt-get purge nvidia*
sudo apt-get purge libnvidia*

03 | 安装显卡驱动

sudo chmod +x NVIDIA-Linux-x86_64-460.73.01.run
./NVIDIA-Linux-x86_64-460.73.01.run

04 | 安装CUDA11.2

wget https://developer.download.nvidia.com/compute/cuda/11.2.1/local_installers/cuda_11.2.1_460.32.03_linux.run
chmod +x ./cuda_11.2.1_460.32.03_linux.run
./cuda_11.2.1_460.32.03_linux.run
注:驱动驱动安装选项

配置~/.bashrc环境变量如下:

export PATH=$PATH:/usr/local/cuda-11.2/bin
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2/lib64

执行如下命令:

source ~/.bashrc

05 | 更换镜像源

sudo cp /etc/apt/sources.list /etc/apt/sources.list.bak
sudo sed -i 's/archive.ubuntu.com/mirrors.ustc.edu.cn/g' /etc/apt/sources.list
sudo apt update

05 | 安装docker

安装依赖包

sudo apt install apt-transport-https ca-certificates software-properties-common curl

添加GPG密钥,需指定源

curl -fsSL https://mirrors.ustc.edu.cn/docker-ce/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] https://mirrors.ustc.edu.cn/docker-ce/linux/ubuntu \
$(lsb_release -cs) stable"

安装

sudo apt update
sudo apt-get install docker-ce docker-ce-cli containerd.io

测试

sudo docker run hello-world

06 | 安装nvidia-docker

添加库包

curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | \
     sudo apt-key add -
     
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)

curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \
  sudo tee /etc/apt/sources.list.d/nvidia-docker.list

sudo apt-get update

安装

sudo apt-get install -y nvidia-docker2

修改默认启动runtime(/etc/docker/daemon.json)

{
	"default-runtime": "nvidia",
    "runtimes": {
        "nvidia": {
            "path": "nvidia-container-runtime",
            "runtimeArgs": []
        }
    }
}

添加docker用户组并将当前用户添加至docker用户组

sudo groupadd docker
sudo gpasswd -a ${USER} docker

重启docker

sudo systemctl restart docker
newgrp docker

测试

docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi

安装bash补全工具

sudo apt install bash-completion
source /etc/profile.d/bash_completion.sh

07 | 创建tensorrt容器环境

docker run --gpus all -itd  --name=nvidia --hostname=nvidia --network=host --cap-add=IPC_LOCK  --privileged=true nvcr.io/nvidia/tensorrt:21.03-py3

进入容器测试

cd /workspace/tensorrt/samples
make -j4
cd /workspace/tensorrt/bin
./sample_mnist

猜你喜欢

转载自blog.csdn.net/pengshuyes/article/details/118054208
今日推荐