123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143 |
- #!/bin/sh
- #PBS -N noUMI_varscan_pair
- #PBS -j oe
- #PBS -l ncpus=12
- #PBS -l nodes=1
- #PBS -l mem=20G
- #inputpath=$1
- #sample=$2
- source /home/liuxiangqiong/miniconda3/bin/activate base
- hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta
- target=/cgdata/liuxiangqiong/work62pancancer/Client/v0/script/refdata/NanOnco_Plus_Panel_v2.0_Covered_b37_cg.parY2X.sort.bed
- #min_af=0.005 for blood,0.01 for tissue
- outputdir=${inputpath}/1SV_varscan_pair
- #######################1.pileup file########################################################################
- samtools mpileup -B -q 10 -d 1000000 -f ${hg19} -l $target ${bam_dir}/${tumor}_clean.bam > ${outputdir}/${tumor}.pileup
- samtools mpileup -B -q 10 -d 1000000 -f ${hg19} -l $target ${bam_dir}/${normal}_clean.bam > ${outputdir}/${normal}.pileup
- ########################2.pair########################################################################
- varscan somatic ${outputdir}/${normal}.pileup ${outputdir}/${tumor}.pileup --output-snp ${outputdir}/${sample}.snp --output-indel ${outputdir}/${sample}.indel --tumor-purity 0.01 --output-vcf
- ##对原始的结果进行合并并注释
- less ${outputdir}/${sample}.snp.vcf | grep -vE "^#" > ${outputdir}/${sample}.merge.Somatic.vcf
- less ${outputdir}/${sample}.indel.vcf | grep -vE "^#" >> ${outputdir}/${sample}.merge.Somatic.vcf
- #3.3 将邻近的位点进行合并#3.3 将邻近的位点进行合并
- python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${outputdir}/${sample}.merge.Somatic.vcf -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${outputdir}/${sample}.merge.Somatic.combind1.vcf
- grep "^#" ${outputdir}/${sample}.snp.vcf >${outputdir}/${sample}.merge.Somatic.combind.vcf
- less ${outputdir}/${sample}.merge.Somatic.combind1.vcf >>${outputdir}/${sample}.merge.Somatic.combind.vcf
- rm -rf ${outputdir}/${sample}.merge.Somatic.combind1.vcf
- #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.merge.Somatic.combind.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.combind.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
- #5.2先拆分再标准化对齐
- vt decompose ${outputdir}/${sample}.merge.Somatic.combind.vcf |vt normalize - -r ${hg19} -o ${outputdir}/${sample}.merge.Somatic.combind.norm.vcf
- conda deactivate
- #5.3对经过标准化的进行vep注释
- source /home/liuxiangqiong/miniconda3/bin/activate ensemble-vep
- PERL5LIB=/home/liuxiangqiong/miniconda3/envs/ensemble-vep/share/ensembl-vep-105.0-1/Bio/EnsEMBL/Variation/Utils/DB/HTS/Faidx
- export PERL5LIB=$PERL5LIB
- inputdir=${outputdir}/${sample}.merge.Somatic.combind.norm.vcf
- outdir1=${outputdir}/${sample}.merge.Somatic.combind.norm.vepanno.vcf
- vep --cache --fork 16 \
- -i ${inputdir} \
- -o ${outdir1} \
- --assembly GRCh37 \
- --cache_version 104 \
- --dir_cache /home/liuxiangqiong/miniconda3/share/ensembl-vep-88.9-0/hg19 \
- --refseq \
- --canonical \
- --offline \
- --hgvs \
- --fasta /cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta \
- --hgvsg \
- --max_af \
- --check_existing \
- --numbers \
- --symbol \
- --canonical \
- --offline \
- -vcf \
- --use_given_ref \
- --af_gnomad \
- --check_existing
- #5.4对vep的再进行annovar注释
- outdir2=${outputdir}/${sample}.merge.Somatic.combind.norm.vepanno.annovar
- perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outdir1} /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outdir2} -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
- conda deactivate
- source /home/liuxiangqiong/miniconda3/bin/activate base
- ########################3.Run the processSomatic subcommand to divide the output into separate files based on somatic status and confidence########
- #3.1获得高置信的突变
- varscan processSomatic ${outputdir}/${sample}.snp.vcf --min-tumor-freq 0.001 --max-normal-freq 0.05 --p-value 0.5
- varscan processSomatic ${outputdir}/${sample}.indel.vcf --min-tumor-freq 0.001 --max-normal-freq 0.05 --p-value 0.5
- #3.2merge the hc
- less ${outputdir}/${sample}.snp.Somatic.hc.vcf | grep -vE "^#"> ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf
- less ${outputdir}/${sample}.indel.Somatic.hc.vcf | grep -vE "^#" >> ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf
- #3.3 将邻近的位点进行合并#3.3 将邻近的位点进行合并
- python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${outputdir}/${sample}.merge.Somatic.hc.nohead.combind.vcf
- grep "^#" ${outputdir}/${sample}.snp.Somatic.hc.vcf >${outputdir}/${sample}.merge.Somatic.hc.combind.vcf
- less ${outputdir}/${sample}.merge.Somatic.hc.nohead.combind.vcf >>${outputdir}/${sample}.merge.Somatic.hc.combind.vcf
- #rm -rf ${outputdir}/${sample}.merge.Somatic.hc.nohead.combind.vcf
- #########################4.filter the false positive########################################################################
- grep -v "#" ${outputdir}/${sample}.merge.Somatic.hc.combind.vcf | awk '{a=$2+10;print $1"\t"$2"\t"a}' > ${outputdir}/${sample}.merge.Somatic.hc.combind.bed
- conda deactivate
- source /home/liuxiangqiong/miniconda3/bin/activate python2
- bam-readcount -q 20 -b 20 -w 100 -f ${hg19} -l ${outputdir}/${sample}.merge.Somatic.hc.combind.bed ${bam_dir}/${tumor}_clean.bam >${outputdir}/${sample}.merge.Somatic.hc.combind.readcount
- conda deactivate
- source /home/liuxiangqiong/miniconda3/bin/activate base
- varscan fpfilter \
- ${outputdir}/${sample}.merge.Somatic.hc.combind.vcf \
- ${outputdir}/${sample}.merge.Somatic.hc.combind.readcount \
- --min-var-count 5 \
- --min-var-basequal 20 \
- --min-ref-basequal 20 \
- --min-var-freq ${min_af} \
- --output-file ${outputdir}/${sample}.merge.Somatic.hc.combind.pass.vcf
- ##########################5.注释########################################################################
- #5.1对没有过滤的进行注释对高置信的过滤
- #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.merge.Somatic.hc.head.combind.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.hc.head.combind.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
- #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.snp.Somatic.hc.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.hc.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
- #5.2先拆分再标准化对齐
- vt decompose ${outputdir}/${sample}.merge.Somatic.hc.combind.pass.vcf |vt normalize - -r ${hg19} -o ${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vcf
- conda deactivate
- #5.3对经过标准化的进行vep注释
- source /home/liuxiangqiong/miniconda3/bin/activate ensemble-vep
- PERL5LIB=/home/liuxiangqiong/miniconda3/envs/ensemble-vep/share/ensembl-vep-105.0-1/Bio/EnsEMBL/Variation/Utils/DB/HTS/Faidx
- export PERL5LIB=$PERL5LIB
- inputdir0=${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vcf
- outdir1_0=${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vepanno.vcf
- vep --cache --fork 16 \
- -i ${inputdir0} \
- -o ${outdir1_0} \
- --assembly GRCh37 \
- --cache_version 104 \
- --dir_cache /home/liuxiangqiong/miniconda3/share/ensembl-vep-88.9-0/hg19 \
- --refseq \
- --canonical \
- --offline \
- --hgvs \
- --fasta /cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta \
- --hgvsg \
- --max_af \
- --check_existing \
- --numbers \
- --symbol \
- --canonical \
- --offline \
- -vcf \
- --use_given_ref \
- --af_gnomad \
- --check_existing
- #5.4对vep的再进行annovar注释
- outdir2_0=${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vepanno.annovar
- perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outdir1_0} /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outdir2_0} -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
- ###############################6.删除中间文件
- #rm -rf ${outputdir}/${normal}.pileup
- #rm -rf ${outputdir}/${tumor}.pileup
- rm -rf ${outputdir}/${sample}.merge.Somatic.hc.readcount
|