>>> import torch
>>> print(torch.__version__)
1.2.0+cu92
CUDA torch.cuda.is_available()返回false
CUDA torch.cuda.is_available()返回false
torch.__version__ #查看pytorch版本
torch.version.cuda #查看pytorch版本 查询cuda版本none,需要重新编译cuda
cuda安装完后发现没有/dev/nvidia-uvm
cd /usr/local/cuda/samples/1_Utilities/deviceQuery
# make
# ./deviceQuery
训练框架启动容器:
docker run -itd --shm-size=256G --name zhang_test --privileged=true -v /etc/libibverbs.d:/etc/libibverbs.d \
-v /usr/local/ib_lib64/:/usr/local/ib_lib64/ -v /home:/home -v /data:/data -v /am:/am \
-v /var/lib/nvidia-docker/volumes/nvidia_driver/396.37/:/usr/local/nvidia \
-v /dafs/userdata/:/dafs/userdata/ -v /dafs/groupdata/:/dafs/groupdata/ --network=host --entrypoint=/bin/bash \
-itd reg.test.com/dadltp/pytorch_1.2_cuda_9.2_test:latest
docker run -itd --shm-size=256G --privileged=true -v /etc/libibverbs.d:/etc/libibverbs.d \
-v /usr/local/ib_lib64/:/usr/local/ib_lib64/ -v /home:/home -v /data:/data -v /am:/am \
-v /var/lib/nvidia-docker/volumes/nvidia_driver/396.37/:/usr/local/nvidia \
-v //userdata/dafs/userdata/:/dahuafs -v /dafs/groupdata/:/dafs/groupdata/ --
=host --entrypoint=/bin/bash \
-itd 镜像ID