一、卸载

卸载原有驱动
方法一:找到显卡驱动.run文件:
$ sh NVIDIA-Linux-x86_64-418.126.02.run --uninstall
方法二:清除所有nvidia相关文件和依赖
$ yum remove nvidia-*
为了删除干净还可以(目标就是把nvidia-driver的相关组件都清理掉):
rpm -qa|grep -i nvid|sort
yum  remove kmod-nvidia-*

卸载原有cuda

如果是rpm安装
sudo rpm -e cuda-repo[补全]
sudo yum -y remove nvidia-*
sudo yum -y remove cuda-*
sudo rm -rf /usr/local/cuda*
如果是.run安装
sudo nvidia-uninstall #卸载驱动
sudo /usr/local/cuda/cuda-uninstaller #卸载cuda,9.2版本的后缀名为.pl

清理gcc环境

sudo yum autoremove -y gcc*
sudo yum autoremove -y cpp*
sudo yum autoremove -y dkms

sudo rm -rf /opt/rh #删除由devtoolset安装的gcc
sudo rm -rf /usr/lib/gcc
sudo rm -rf /usr/libexec/gcc
whereis gcc

sudo yum install -y gcc
sudo yum install -y gcc-c++
sudo yum install -y glibc-devel

gcc -v

安装cuda以及nvidia driver

#install cuda from .run file from https://developer.nvidia.com/cuda-downloads
sudo sh cuda_10.1.105_418.39_linux.run # 也可在此安装driver,更新版本的用下条命令安装
sudo sh NVIDIA-Linux-x86_64-418.56.run # install the driver

#install cuda from .rpm file
修改cuda源,/etc/yum.repos.d/cuda.repo
https://mirrors.aliyun.com/nvidia-cuda/rhel7/x86_64/

安装cudnn

rm -rf cuda
tar -xzvf cudnn-10.1-linux-x64-v7.5.1.10.tgz
sudo cp cuda/include/cudnn.h /usr/local/cuda/include
sudo cp cuda/lib64/libcudnn* /usr/local/cuda/lib64
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*

测试

reboot
nvidia-smi

测试时如果报错Failed to initialize NVML: Driver/library version mismatch参考NVML

编译安装gcc5.4.0

#从http://ftp.tsukuba.wide.ad.jp/software/gcc/releases/下载
cd gcc-5.4.0
./contrib/download_prerequisites
mkdir build
cd build
../configure --enable-checking=release --enable-languages=c,c++ --disable-multilib #--prefix指定安装路径,默认/usr/local
make -j $(nproc)
sudo make install

编译gcc时如果出现error "Unable to find a suitable type for HOST_WIDE_INT

unset LIBRARY_PATH CPATH C_INCLUDE_PATH PKG_CONFIG_PATH CPLUS_INCLUDE_PATH INCLUDE

修改gcc环境

# 添加/etc/profile.d/gcc5.4path.sh,影响root及所有账号,内容为
export PATH=/usr/local/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/usr/local/lib64:$LD_LIBRARY_PATH
# 添加/etc/sudoers.d/sudopath,对sudo path产生影响
Defaults secure_path = /sbin:/usr/sbin:/usr/local/bin:/usr/bin:/usr/local/openmpi/bin:/bin

二、实战

 1001  2021-08-27 14:53:47  root sudo sh cuda_11.1.0_455.23.05_linux.run
 1002  2021-08-27 15:03:47  root yum remove nvidia-*
 1003  2021-08-27 15:05:25  root rpm -qa|grep -i nvid|sort
 1004  2021-08-27 15:06:04  root yum  remove yum-plugin-nvidia-1.0.2-1.el7.elrepo.noarch
 1005  2021-08-27 15:06:11  root ls
 1006  2021-08-27 15:06:38  root yum remove cuda
 1007  2021-08-27 15:06:42  root yum remove cuda*
 1008  2021-08-27 15:06:51  root nvidia-smi
 1009  2021-08-27 15:06:58  root nvcc -V
 1010  2021-08-27 15:08:23  root sudo dpkg -l |grep cuda
 1011  2021-08-27 15:09:19  root sudo rpm -e cuda*
 1012  2021-08-27 15:09:43  root sudo yum -y remove nvidia-*
 1013  2021-08-27 15:09:50  root sudo yum -y remove cuda-*
 1014  2021-08-27 15:10:09  root sudo rm -rf /usr/local/cuda*
 1015  2021-08-27 15:10:29  root sudo nvidia-uninstall
 1016  2021-08-27 15:10:49  root sudo /usr/local/cuda/cuda-uninstaller 
 1017  2021-08-27 15:11:25  root ls
 1018  2021-08-27 15:11:37  root sudo sh cuda_11.1.0_455.23.05_linux.run
 1019  2021-08-27 15:14:08  root rpm -qa cuda
 1020  2021-08-27 15:14:58  root rpm -qa cuda*
 1021  2021-08-27 15:15:22  root nvcc -V
 1022  2021-08-27 15:16:33  root wget https://developer.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-rhel7-10-1-local-10.1.105-418.39-1.0-1.x86_64.rpm
 1023  2021-08-27 15:21:30  root ls
 1024  2021-08-27 15:21:44  root sudo rpm -i cuda-repo-rhel7-10-1-local-10.1.105-418.39-1.0-1.x86_64.rpm
 1025  2021-08-27 15:22:50  root yum search cuda*
 1026  2021-08-27 15:23:01  root yum search cuda
 1027  2021-08-27 15:23:50  root yum list | grep cuda
 1028  2021-08-27 15:24:25  root sudo yum clean all
 1029  2021-08-27 15:24:37  root sudo yum install cuda
 1030  2021-08-27 15:31:37  root nvidia-smi
 1031  2021-08-27 15:31:48  root nvcc -V