#!/bin/sh #PBS -N noUMI_varscan_pair_combined #PBS -j oe #PBS -l ncpus=12 #PBS -l nodes=1 #PBS -l mem=20G source /home/liuxiangqiong/miniconda3/bin/activate base #inputpath=/cgdata/bioproject/pancancer602gene/CGB0402_1 #sample=Lib52939T #tumor=Lib52939TTT #bam_dir=/cgdata/pancancer/analyse/CGB0402_1/bamfile #设置相邻位点合并长度 #length=15 #min_af=0.005 for blood,0.01 for tissue outputdir=${inputpath}/1SV_varscan_pair hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta target=/cgdata/liuxiangqiong/work62pancancer/Client/v0/script/refdata/NanOnco_Plus_Panel_v2.0_Covered_b37_cg.parY2X.sort.bed target1=/cgdata/liuxiangqiong/work62pancancer/pipeline/v0/refdata/NanOnco_Plus_Panel_v2.0_Covered_b37_cg.parY2X.sort ##将VCF按照染色体进行拆分 python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/vcfsplit_v0_20220929_finish.py -i ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf fun() { hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${file} -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n ${length} -p 0.5 -o ${file}_chr.vcf & wait echo "fun is end." } for file in ${outputdir}/${sample}.merge.Somatic.hc.nohead_*.vcf; do fun ${tumor} ${file} ${bam_dir} & done wait echo "the vcfmergeing is finished" grep "^#" ${outputdir}/${sample}.snp.Somatic.hc.vcf >${outputdir}/${sample}.merge.Somatic.hc.combind1.vcf cat ${outputdir}/${sample}.merge.Somatic.hc.nohead_*.vcf_chr.vcf>>${outputdir}/${sample}.merge.Somatic.hc.combind1.vcf rm -rf ${outputdir}/${sample}.merge.Somatic.hc.nohead_*.vcf rm -rf ${outputdir}/${sample}.merge.Somatic.hc.nohead_*.vcf_chr.vcf #########################4.filter the false positive######################################################################## grep -v "#" ${outputdir}/${sample}.merge.Somatic.hc.combind1.vcf | awk '{a=$2+10;print $1"\t"$2"\t"a}' > ${outputdir}/${sample}.merge.Somatic.hc.combind1.bed #将bed分成多个小文件 split -l 3000 ${outputdir}/${sample}.merge.Somatic.hc.combind1.bed -d -a 2 ${outputdir}/${sample}.merge.Somatic.hc.combind1.bed_ conda deactivate source /home/liuxiangqiong/miniconda3/bin/activate python2 #bam-readcount -q 20 -b 20 -w 100 -f ${hg19} -l ${outputdir}/${sample}.merge.Somatic.hc.combind1.bed ${bam_dir}/${tumor}_clean.bam >${outputdir}/${sample}.merge.Somatic.hc.combind1.readcount fun() { hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta bam-readcount -q 20 -b 20 -w 100 -f ${hg19} -l ${file} ${bam_dir}/${tumor}_clean.bam >${file}.readcount & wait echo "fun is end." } for file in ${outputdir}/${sample}.merge.Somatic.hc.combind1.bed_*; do fun ${inputpath} ${tumor} ${file} ${bam_dir} & done wait echo "the bam-readcount is finished" cat ${outputdir}/${sample}.merge.Somatic.hc.combind1.bed_*readcount>${outputdir}/${sample}.merge.Somatic.hc.combind1.readcount rm -rf ${outputdir}/${sample}.merge.Somatic.hc.combind1.bed_*readcount rm -rf ${outputdir}/${sample}.merge.Somatic.hc.combind1.bed_* conda deactivate source /home/liuxiangqiong/miniconda3/bin/activate base varscan fpfilter \ ${outputdir}/${sample}.merge.Somatic.hc.combind1.vcf \ ${outputdir}/${sample}.merge.Somatic.hc.combind1.readcount \ --min-var-count 5 \ --min-var-basequal 20 \ --min-ref-basequal 20 \ --min-var-freq ${min_af} \ --output-file ${outputdir}/${sample}.merge.Somatic.hc.combind1.pass.vcf ##########################5.注释######################################################################## #5.1对没有过滤的进行注释对高置信的过滤 #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.merge.Somatic.hc.head.combind1.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.hc.head.combind1.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40 #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.snp.Somatic.hc.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.hc.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40 #5.2先拆分再标准化对齐 #vt decompose ${outputdir}/${sample}.merge.Somatic.hc.combind1.pass.vcf |vt normalize - -r ${hg19} -o ${outputdir}/${sample}.merge.Somatic.hc.combind1.pass.norm.vcf conda deactivate #5.3对经过标准化的进行vep注释 source /home/liuxiangqiong/miniconda3/bin/activate ensemble-vep PERL5LIB=/home/liuxiangqiong/miniconda3/envs/ensemble-vep/share/ensembl-vep-105.0-1/Bio/EnsEMBL/Variation/Utils/DB/HTS/Faidx export PERL5LIB=$PERL5LIB inputdir0=${outputdir}/${sample}.merge.Somatic.hc.combind1.pass.vcf outdir1_0=${outputdir}/${sample}.merge.Somatic.hc.combind1.pass.norm.vepanno.vcf vep --cache --fork 16 \ -i ${inputdir0} \ -o ${outdir1_0} \ --assembly GRCh37 \ --cache_version 104 \ --dir_cache /home/liuxiangqiong/miniconda3/share/ensembl-vep-88.9-0/hg19 \ --refseq \ --canonical \ --offline \ --hgvs \ --fasta /cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta \ --hgvsg \ --max_af \ --check_existing \ --numbers \ --symbol \ --canonical \ --offline \ -vcf \ --use_given_ref \ --af_gnomad \ --check_existing #5.4对vep的再进行annovar注释 outdir2_0=${outputdir}/${sample}.merge.Somatic.hc.combind1.pass.norm.vepanno.annovar perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outdir1_0} /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outdir2_0} -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20220829,intervar_20170202,icgc28,exac03,esp6500siv2_all -operation g,f,f,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40 ###############################6.删除中间文件 #rm -rf ${outputdir}/${normal}.pileup #rm -rf ${outputdir}/${tumor}.pileup rm -rf ${outputdir}/${sample}.merge.Somatic.hc.readcount conda deactivate source /home/liuxiangqiong/miniconda3/bin/activate base #对varscan的结果进行过滤 python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_somatic_filter_varscan_v0_20220929_finish.py -i ${inputpath} -s ${sample} ##对varscan和vardict的方法结合获得最后的突变 #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_somatic_filter_mergeresult_v0_20220929_finish.py -i ${inputpath} -s ${sample} #########################################################################原始的结果进行合并并注释##################### ##对原始的结果进行合并并注释 less ${outputdir}/${sample}.snp.vcf | grep -vE "^#" > ${outputdir}/${sample}.merge.Somatic.vcf less ${outputdir}/${sample}.indel.vcf | grep -vE "^#" >> ${outputdir}/${sample}.merge.Somatic.vcf #3.3 将邻近的位点进行合并#3.3 将邻近的位点进行合并 #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${outputdir}/${sample}.merge.Somatic.vcf -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${outputdir}/${sample}.merge.Somatic.combind11.vcf #grep "^#" ${outputdir}/${sample}.snp.vcf >${outputdir}/${sample}.merge.Somatic.combind1.vcf #less ${outputdir}/${sample}.merge.Somatic.combind11.vcf >>${outputdir}/${sample}.merge.Somatic.combind1.vcf #rm -rf ${outputdir}/${sample}.merge.Somatic.combind11.vcf ##将VCF按照染色体进行拆分 python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/vcfsplit_v0_20220929_finish.py -i ${outputdir}/${sample}.merge.Somatic.vcf fun() { hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${file} -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${file}_chr.vcf & wait echo "fun is end." } for file in ${outputdir}/${sample}.merge.Somatic_*.vcf; do fun ${tumor} ${file} ${bam_dir} & done wait echo "the vcfmergeing is finished" grep "^#" ${outputdir}/${sample}.snp.vcf >${outputdir}/${sample}.merge.Somatic.combind1.vcf cat ${outputdir}/${sample}.merge.Somatic_*.vcf_chr.vcf>>${outputdir}/${sample}.merge.Somatic.combind1.vcf rm -rf ${outputdir}/${sample}.merge.Somatic_*.vcf rm -rf ${outputdir}/${sample}.merge.Somatic_*.vcf_chr.vcf #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.merge.Somatic.combind1.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.combind1.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40 #5.2先拆分再标准化对齐 vt decompose ${outputdir}/${sample}.merge.Somatic.combind1.vcf |vt normalize - -r ${hg19} -o ${outputdir}/${sample}.merge.Somatic.combind1.norm.vcf conda deactivate #5.3对经过标准化的进行vep注释 source /home/liuxiangqiong/miniconda3/bin/activate ensemble-vep PERL5LIB=/home/liuxiangqiong/miniconda3/envs/ensemble-vep/share/ensembl-vep-105.0-1/Bio/EnsEMBL/Variation/Utils/DB/HTS/Faidx export PERL5LIB=$PERL5LIB inputdir=${outputdir}/${sample}.merge.Somatic.combind1.norm.vcf outdir1=${outputdir}/${sample}.merge.Somatic.combind1.norm.vepanno.vcf vep --cache --fork 16 \ -i ${inputdir} \ -o ${outdir1} \ --assembly GRCh37 \ --cache_version 104 \ --dir_cache /home/liuxiangqiong/miniconda3/share/ensembl-vep-88.9-0/hg19 \ --refseq \ --canonical \ --offline \ --hgvs \ --fasta /cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta \ --hgvsg \ --max_af \ --check_existing \ --numbers \ --symbol \ --canonical \ --offline \ -vcf \ --use_given_ref \ --af_gnomad \ --check_existing #5.4对vep的再进行annovar注释 outdir2=${outputdir}/${sample}.merge.Somatic.combind1.norm.vepanno.annovar perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outdir1} /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outdir2} -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20220829,intervar_20170202,icgc28,exac03,esp6500siv2_all -operation g,f,f,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40 conda deactivate