#!/bin/sh #PBS -N noUMI_varscan_pair #PBS -j oe #PBS -l ncpus=12 #PBS -l nodes=1 #PBS -l mem=20G #inputpath=$1 #sample=$2 source /home/liuxiangqiong/miniconda3/bin/activate base hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta target=/cgdata/liuxiangqiong/work62pancancer/Client/v0/script/refdata/NanOnco_Plus_Panel_v2.0_Covered_b37_cg.parY2X.sort.bed #min_af=0.005 for blood,0.01 for tissue outputdir=${inputpath}/1SV_varscan_pair #######################1.pileup file######################################################################## samtools mpileup -B -q 10 -d 1000000 -f ${hg19} -l $target ${bam_dir}/${tumor}_clean.bam > ${outputdir}/${tumor}.pileup samtools mpileup -B -q 10 -d 1000000 -f ${hg19} -l $target ${bam_dir}/${normal}_clean.bam > ${outputdir}/${normal}.pileup ########################2.pair######################################################################## varscan somatic ${outputdir}/${normal}.pileup ${outputdir}/${tumor}.pileup --output-snp ${outputdir}/${sample}.snp --output-indel ${outputdir}/${sample}.indel --tumor-purity 0.01 --output-vcf ########################3.Run the processSomatic subcommand to divide the output into separate files based on somatic status and confidence######## #3.1获得高置信的突变 varscan processSomatic ${outputdir}/${sample}.snp.vcf --min-tumor-freq 0.001 --max-normal-freq 0.05 --p-value 0.5 varscan processSomatic ${outputdir}/${sample}.indel.vcf --min-tumor-freq 0.001 --max-normal-freq 0.05 --p-value 0.5 #3.2merge the hc less ${outputdir}/${sample}.snp.Somatic.hc.vcf | grep -vE "^#"> ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf less ${outputdir}/${sample}.indel.Somatic.hc.vcf | grep -vE "^#" >> ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf #3.3 将邻近的位点进行合并#3.3 将邻近的位点进行合并 #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${outputdir}/${sample}.merge.Somatic.hc.nohead.combind.vcf #grep "^#" ${outputdir}/${sample}.snp.Somatic.hc.vcf >${outputdir}/${sample}.merge.Somatic.hc.combind.vcf #less ${outputdir}/${sample}.merge.Somatic.hc.nohead.combind.vcf >>${outputdir}/${sample}.merge.Somatic.hc.combind.vcf #rm -rf ${outputdir}/${sample}.merge.Somatic.hc.nohead.combind.vcf ##将VCF按照染色体进行拆分 python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/vcfsplit_v0_20220929_finish.py -i ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf fun() { hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${file} -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${file}_chr.vcf & wait echo "fun is end." } for file in ${outputdir}/${sample}.merge.Somatic.hc.nohead_*.vcf; do fun ${tumor} ${file} ${bam_dir} & done wait echo "the vcfmergeing is finished" grep "^#" ${outputdir}/${sample}.snp.Somatic.hc.vcf >${outputdir}/${sample}.merge.Somatic.hc.combind.vcf cat ${outputdir}/${sample}.merge.Somatic.hc.nohead_*.vcf_chr.vcf>>${outputdir}/${sample}.merge.Somatic.hc.combind.vcf rm -rf ${outputdir}/${sample}.merge.Somatic.hc.nohead_*.vcf rm -rf ${outputdir}/${sample}.merge.Somatic.hc.nohead_*.vcf_chr.vcf #########################4.filter the false positive######################################################################## grep -v "#" ${outputdir}/${sample}.merge.Somatic.hc.combind.vcf | awk '{a=$2+10;print $1"\t"$2"\t"a}' > ${outputdir}/${sample}.merge.Somatic.hc.combind.bed #将bed分成多个小文件 split -l 3000 ${outputdir}/${sample}.merge.Somatic.hc.combind.bed -d -a 2 ${outputdir}/${sample}.merge.Somatic.hc.combind.bed_ conda deactivate source /home/liuxiangqiong/miniconda3/bin/activate python2 #bam-readcount -q 20 -b 20 -w 100 -f ${hg19} -l ${outputdir}/${sample}.merge.Somatic.hc.combind.bed ${bam_dir}/${tumor}_clean.bam >${outputdir}/${sample}.merge.Somatic.hc.combind.readcount fun() { hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta bam-readcount -q 20 -b 20 -w 100 -f ${hg19} -l ${file} ${bam_dir}/${tumor}_clean.bam >${file}.readcount & wait echo "fun is end." } for file in ${outputdir}/${sample}.merge.Somatic.hc.combind.bed_*; do fun ${inputpath} ${tumor} ${file} ${bam_dir} & done wait echo "the bam-readcount is finished" cat ${outputdir}/${sample}.merge.Somatic.hc.combind.bed_*readcount>${outputdir}/${sample}.merge.Somatic.hc.combind.readcount rm -rf ${outputdir}/${sample}.merge.Somatic.hc.combind.bed_*readcount rm -rf ${outputdir}/${sample}.merge.Somatic.hc.combind.bed_* conda deactivate source /home/liuxiangqiong/miniconda3/bin/activate base varscan fpfilter \ ${outputdir}/${sample}.merge.Somatic.hc.combind.vcf \ ${outputdir}/${sample}.merge.Somatic.hc.combind.readcount \ --min-var-count 5 \ --min-var-basequal 20 \ --min-ref-basequal 20 \ --min-var-freq ${min_af} \ --output-file ${outputdir}/${sample}.merge.Somatic.hc.combind.pass.vcf ##########################5.注释######################################################################## #5.1对没有过滤的进行注释对高置信的过滤 #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.merge.Somatic.hc.head.combind.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.hc.head.combind.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40 #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.snp.Somatic.hc.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.hc.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40 #5.2先拆分再标准化对齐 #vt decompose ${outputdir}/${sample}.merge.Somatic.hc.combind.pass.vcf |vt normalize - -r ${hg19} -o ${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vcf conda deactivate #5.3对经过标准化的进行vep注释 source /home/liuxiangqiong/miniconda3/bin/activate ensemble-vep PERL5LIB=/home/liuxiangqiong/miniconda3/envs/ensemble-vep/share/ensembl-vep-105.0-1/Bio/EnsEMBL/Variation/Utils/DB/HTS/Faidx export PERL5LIB=$PERL5LIB inputdir0=${outputdir}/${sample}.merge.Somatic.hc.combind.pass.vcf outdir1_0=${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vepanno.vcf vep --cache --fork 16 \ -i ${inputdir0} \ -o ${outdir1_0} \ --assembly GRCh37 \ --cache_version 104 \ --dir_cache /home/liuxiangqiong/miniconda3/share/ensembl-vep-88.9-0/hg19 \ --refseq \ --canonical \ --offline \ --hgvs \ --fasta /cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta \ --hgvsg \ --max_af \ --check_existing \ --numbers \ --symbol \ --canonical \ --offline \ -vcf \ --use_given_ref \ --af_gnomad \ --check_existing #5.4对vep的再进行annovar注释 outdir2_0=${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vepanno.annovar perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outdir1_0} /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outdir2_0} -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20220829,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40 ###############################6.删除中间文件 #rm -rf ${outputdir}/${normal}.pileup #rm -rf ${outputdir}/${tumor}.pileup rm -rf ${outputdir}/${sample}.merge.Somatic.hc.readcount conda deactivate source /home/liuxiangqiong/miniconda3/bin/activate base #对varscan的结果进行过滤 python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_somatic_filter_varscan_v0_20220929_finish.py -i ${inputpath} -s ${sample} ##对varscan和vardict的方法结合获得最后的突变 #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_somatic_filter_mergeresult_v0_20220929_finish.py -i ${inputpath} -s ${sample} #########################################################################原始的结果进行合并并注释##################### ##对原始的结果进行合并并注释 less ${outputdir}/${sample}.snp.vcf | grep -vE "^#" > ${outputdir}/${sample}.merge.Somatic.vcf less ${outputdir}/${sample}.indel.vcf | grep -vE "^#" >> ${outputdir}/${sample}.merge.Somatic.vcf #3.3 将邻近的位点进行合并#3.3 将邻近的位点进行合并 #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${outputdir}/${sample}.merge.Somatic.vcf -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${outputdir}/${sample}.merge.Somatic.combind1.vcf #grep "^#" ${outputdir}/${sample}.snp.vcf >${outputdir}/${sample}.merge.Somatic.combind.vcf #less ${outputdir}/${sample}.merge.Somatic.combind1.vcf >>${outputdir}/${sample}.merge.Somatic.combind.vcf #rm -rf ${outputdir}/${sample}.merge.Somatic.combind1.vcf ##将VCF按照染色体进行拆分 python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/vcfsplit_v0_20220929_finish.py -i ${outputdir}/${sample}.merge.Somatic.vcf fun() { hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${file} -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${file}_chr.vcf & wait echo "fun is end." } for file in ${outputdir}/${sample}.merge.Somatic_*.vcf; do fun ${tumor} ${file} ${bam_dir} & done wait echo "the vcfmergeing is finished" grep "^#" ${outputdir}/${sample}.snp.vcf >${outputdir}/${sample}.merge.Somatic.combind.vcf cat ${outputdir}/${sample}.merge.Somatic_*.vcf_chr.vcf>>${outputdir}/${sample}.merge.Somatic.combind.vcf rm -rf ${outputdir}/${sample}.merge.Somatic_*.vcf rm -rf ${outputdir}/${sample}.merge.Somatic_*.vcf_chr.vcf #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.merge.Somatic.combind.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.combind.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40 #5.2先拆分再标准化对齐 vt decompose ${outputdir}/${sample}.merge.Somatic.combind.vcf |vt normalize - -r ${hg19} -o ${outputdir}/${sample}.merge.Somatic.combind.norm.vcf conda deactivate #5.3对经过标准化的进行vep注释 source /home/liuxiangqiong/miniconda3/bin/activate ensemble-vep PERL5LIB=/home/liuxiangqiong/miniconda3/envs/ensemble-vep/share/ensembl-vep-105.0-1/Bio/EnsEMBL/Variation/Utils/DB/HTS/Faidx export PERL5LIB=$PERL5LIB inputdir=${outputdir}/${sample}.merge.Somatic.combind.norm.vcf outdir1=${outputdir}/${sample}.merge.Somatic.combind.norm.vepanno.vcf vep --cache --fork 16 \ -i ${inputdir} \ -o ${outdir1} \ --assembly GRCh37 \ --cache_version 104 \ --dir_cache /home/liuxiangqiong/miniconda3/share/ensembl-vep-88.9-0/hg19 \ --refseq \ --canonical \ --offline \ --hgvs \ --fasta /cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta \ --hgvsg \ --max_af \ --check_existing \ --numbers \ --symbol \ --canonical \ --offline \ -vcf \ --use_given_ref \ --af_gnomad \ --check_existing #5.4对vep的再进行annovar注释 outdir2=${outputdir}/${sample}.merge.Somatic.combind.norm.vepanno.annovar perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outdir1} /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outdir2} -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20220829,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40 conda deactivate