文 | MESeraph
01 | 卸载CUDA
sudo apt-get remove cuda*
cd /usr/local/
rm -rf cuda cuda-10.1/
02 | 卸载显卡驱动
sudo /usr/bin/nvidia-uninstall
sudo apt-get --purge remove nvidia-*
sudo apt-get purge nvidia*
sudo apt-get purge libnvidia*
03 | 安装显卡驱动
sudo chmod +x NVIDIA-Linux-x86_64-460.73.01.run
./NVIDIA-Linux-x86_64-460.73.01.run
04 | 安装CUDA11.2
wget https://developer.download.nvidia.com/compute/cuda/11.2.1/local_installers/cuda_11.2.1_460.32.03_linux.run
chmod +x ./cuda_11.2.1_460.32.03_linux.run
./cuda_11.2.1_460.32.03_linux.run
注:驱动驱动安装选项
配置~/.bashrc环境变量如下:
export PATH=$PATH:/usr/local/cuda-11.2/bin
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2/lib64
执行如下命令:
source ~/.bashrc
05 | 更换镜像源
sudo cp /etc/apt/sources.list /etc/apt/sources.list.bak
sudo sed -i 's/archive.ubuntu.com/mirrors.ustc.edu.cn/g' /etc/apt/sources.list
sudo apt update
05 | 安装docker
安装依赖包
sudo apt install apt-transport-https ca-certificates software-properties-common curl
添加GPG密钥,需指定源
curl -fsSL https://mirrors.ustc.edu.cn/docker-ce/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] https://mirrors.ustc.edu.cn/docker-ce/linux/ubuntu \
$(lsb_release -cs) stable"
安装
sudo apt update
sudo apt-get install docker-ce docker-ce-cli containerd.io
测试
sudo docker run hello-world
06 | 安装nvidia-docker
添加库包
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | \
sudo apt-key add -
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \
sudo tee /etc/apt/sources.list.d/nvidia-docker.list
sudo apt-get update
安装
sudo apt-get install -y nvidia-docker2
修改默认启动runtime(/etc/docker/daemon.json)
{
"default-runtime": "nvidia",
"runtimes": {
"nvidia": {
"path": "nvidia-container-runtime",
"runtimeArgs": []
}
}
}
添加docker用户组并将当前用户添加至docker用户组
sudo groupadd docker
sudo gpasswd -a ${USER} docker
重启docker
sudo systemctl restart docker
newgrp docker
测试
docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi
安装bash补全工具
sudo apt install bash-completion
source /etc/profile.d/bash_completion.sh
07 | 创建tensorrt容器环境
docker run --gpus all -itd --name=nvidia --hostname=nvidia --network=host --cap-add=IPC_LOCK --privileged=true nvcr.io/nvidia/tensorrt:21.03-py3
进入容器测试
cd /workspace/tensorrt/samples
make -j4
cd /workspace/tensorrt/bin
./sample_mnist