软件安装anaconda3 fastp ascp  数据下载 处理 质控

anaconda3下载安装配置

wget https://repo.anaconda.com/archive/Anaconda3-2020.11-Linux-x86_64.sh
bash *.sh
sudo vi /etc/profile
export PATH=$PATH:/home/messia/anaconda3/bin
source /etc/profile


channels:
- conda-forge
- bioconda
- r
- defaults
channels:
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/menpo/
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/msys2/
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
- defaults
show_channel_urls: true


conda update --all #conda更新到最新版本
conda update conda
conda update anaconda
conda clean --tarballs
conda clean -a

conda install fastp #
libstdc++.so.6: version `GLIBCXX_3.4.22' not found (required by fastp)
https://www.jianshu.com/p/fe08ef2bacba
conda install -c hcc aspera-cli -y

ascp下载测序数据

密钥地址:   /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh

paper: Transcriptome analysis of an apple (Malus × domestica) yellow fruit somatic mutation identifies a gene network module highly associated with anthocyanin and epigenetic regulation     PRJNA287523/SRP062637     ENA下载   ENA Browser (ebi.ac.uk)

ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/008/SRR2176358/SRR2176358.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/009/SRR2176359/SRR2176359.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/000/SRR2176360/SRR2176360.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/001/SRR2176361/SRR2176361.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/002/SRR2176362/SRR2176362.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/003/SRR2176363/SRR2176363.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/004/SRR2176364/SRR2176364.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/005/SRR2176365/SRR2176365.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/006/SRR2176366/SRR2176366.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/007/SRR2176367/SRR2176367.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/008/SRR2176368/SRR2176368.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/009/SRR2176369/SRR2176369.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/000/SRR2176370/SRR2176370.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/001/SRR2176371/SRR2176371.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/002/SRR2176372/SRR2176372.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/003/SRR2176373/SRR2176373.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/004/SRR2176374/SRR2176374.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/005/SRR2176375/SRR2176375.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/006/SRR2176376/SRR2176376.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/007/SRR2176377/SRR2176377.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/008/SRR2176378/SRR2176378.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/009/SRR2176379/SRR2176379.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/000/SRR2176380/SRR2176380.fastq.gz /home/messia/rnaseq/rawdata
ascp -l 100M -P 33001 -QT -k 2 -i /home/messia/anaconda3/etc/asperaweb_id_dsa.openssh era-fasp@fasp.sra.ebi.ac.uk:/vol1/fastq/SRR217/001/SRR2176381/SRR2176381.fastq.gz /home/messia/rnaseq/rawdata

 数据处理—— 简化名称

linux 重命名一个文件  mv test1.txt   test2.txt 

mv SRR2176358.fastq.gz BLO_S1_Rep1.fastq.gz
mv SRR2176359.fastq.gz BLO_S1_Rep2.fastq.gz
mv SRR2176360.fastq.gz BLO_S1_Rep3.fastq.gz
mv SRR2176361.fastq.gz KID_S1_Rep1.fastq.gz
mv SRR2176362.fastq.gz KID_S1_Rep2.fastq.gz
mv SRR2176363.fastq.gz KID_S1_Rep3.fastq.gz
mv SRR2176364.fastq.gz BLO_S2_Rep1.fastq.gz
mv SRR2176365.fastq.gz BLO_S2_Rep2.fastq.gz
mv SRR2176366.fastq.gz BLO_S2_Rep3.fastq.gz
mv SRR2176367.fastq.gz KID_S2_Rep1.fastq.gz
mv SRR2176368.fastq.gz KID_S2_Rep2.fastq.gz
mv SRR2176369.fastq.gz KID_S2_Rep3.fastq.gz
mv SRR2176370.fastq.gz BLO_S3_Rep1.fastq.gz
mv SRR2176371.fastq.gz BLO_S3_Rep2.fastq.gz
mv SRR2176372.fastq.gz BLO_S3_Rep3.fastq.gz
mv SRR2176373.fastq.gz KID_S3_Rep1.fastq.gz
mv SRR2176374.fastq.gz KID_S3_Rep2.fastq.gz
mv SRR2176375.fastq.gz KID_S3_Rep3.fastq.gz
mv SRR2176376.fastq.gz BLO_S4_Rep1.fastq.gz
mv SRR2176377.fastq.gz BLO_S4_Rep2.fastq.gz
mv SRR2176378.fastq.gz BLO_S4_Rep3.fastq.gz
mv SRR2176379.fastq.gz KID_S4_Rep1.fastq.gz
mv SRR2176380.fastq.gz KID_S4_Rep2.fastq.gz
mv SRR2176381.fastq.gz KID_S4_Rep3.fastq.gz

QC

ls *.fastq.gz  > sample0.lst
fastp -i ../rawdata/BLO_S1_Rep1.fastq.gz -o ../cleandata/BLO_S1_Rep1.fastq.gz -h ../cleandata/BLO_S1_Rep1.fastq.gz.html -j ../cleandata/BLO_S1_Rep1.fastq.gz.json
awk '{print "fastp -i ../rawdata/"$1" -o ../cleandata/"$1" -h ../cleandata/"$1".html -j ../cleandata/"$1".json"}' sample0.lst  > fastp.sh