123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229 |
- #!/bin/sh
- #PBS -N noUMI_varscan_pair
- #PBS -j oe
- #PBS -l ncpus=12
- #PBS -l nodes=1
- #PBS -l mem=20G
- #inputpath=$1
- #sample=$2
- source /home/liuxiangqiong/miniconda3/bin/activate base
- hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta
- target=/cgdata/liuxiangqiong/work62pancancer/Client/v0/script/refdata/NanOnco_Plus_Panel_v2.0_Covered_b37_cg.parY2X.sort.bed
- target1=/cgdata/liuxiangqiong/work62pancancer/pipeline/v0/refdata/NanOnco_Plus_Panel_v2.0_Covered_b37_cg.parY2X.sort
- #min_af=0.005 for blood,0.01 for tissue
- outputdir=${inputpath}/1SV_varscan_pair
- #######################1.pileup file########################################################################
- #samtools mpileup -B -q 10 -d 1000000 -f ${hg19} -l $target ${bam_dir}/${tumor}_clean.bam > ${outputdir}/${tumor}.pileup
- #samtools mpileup -B -q 10 -d 1000000 -f ${hg19} -l $target ${bam_dir}/${normal}_clean.bam > ${outputdir}/${normal}.pileup
- ##for tumor
- for i in `seq 1 8`
- do
- samtools mpileup -B -q 10 -d 10000000 -f ${hg19} -l ${target1}-$i.bed ${bam_dir}/${tumor}_clean.bam > ${outputdir}/${tumor}_$i.pileup&
- done
- wait
- cat ${outputdir}/${tumor}_*.pileup >${outputdir}/${tumor}.pileup
- rm -rf ${outputdir}/${tumor}_*.pileup
- #for normal
- for i in `seq 1 8`
- do
- samtools mpileup -B -q 10 -d 10000000 -f ${hg19} -l ${target1}-$i.bed ${bam_dir}/${normal}_clean.bam > ${outputdir}/${normal}_$i.pileup&
- done
- wait
- cat ${outputdir}/${normal}_*.pileup >${outputdir}/${normal}.pileup
- rm -rf ${outputdir}/${normal}_*.pileup
- ########################2.pair########################################################################
- varscan somatic ${outputdir}/${normal}.pileup ${outputdir}/${tumor}.pileup --output-snp ${outputdir}/${sample}.snp --output-indel ${outputdir}/${sample}.indel --tumor-purity 0.01 --output-vcf
- ########################3.Run the processSomatic subcommand to divide the output into separate files based on somatic status and confidence########
- #3.1获得高置信的突变
- varscan processSomatic ${outputdir}/${sample}.snp.vcf --min-tumor-freq 0.001 --max-normal-freq 0.05 --p-value 0.5
- varscan processSomatic ${outputdir}/${sample}.indel.vcf --min-tumor-freq 0.001 --max-normal-freq 0.05 --p-value 0.5
- #3.2merge the hc
- less ${outputdir}/${sample}.snp.Somatic.hc.vcf | grep -vE "^#"> ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf
- less ${outputdir}/${sample}.indel.Somatic.hc.vcf | grep -vE "^#" >> ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf
- #3.3 将邻近的位点进行合并#3.3 将邻近的位点进行合并
- #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${outputdir}/${sample}.merge.Somatic.hc.nohead.combind.vcf
- #grep "^#" ${outputdir}/${sample}.snp.Somatic.hc.vcf >${outputdir}/${sample}.merge.Somatic.hc.combind.vcf
- #less ${outputdir}/${sample}.merge.Somatic.hc.nohead.combind.vcf >>${outputdir}/${sample}.merge.Somatic.hc.combind.vcf
- #rm -rf ${outputdir}/${sample}.merge.Somatic.hc.nohead.combind.vcf
- ##将VCF按照染色体进行拆分
- python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/vcfsplit_v0_20220929_finish.py -i ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf
- fun() {
- hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta
- python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${file} -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${file}_chr.vcf &
- wait
- echo "fun is end."
- }
- for file in ${outputdir}/${sample}.merge.Somatic.hc.nohead_*.vcf;
- do
- fun ${tumor} ${file} ${bam_dir} &
- done
- wait
- echo "the vcfmergeing is finished"
- grep "^#" ${outputdir}/${sample}.snp.Somatic.hc.vcf >${outputdir}/${sample}.merge.Somatic.hc.combind.vcf
- cat ${outputdir}/${sample}.merge.Somatic.hc.nohead_*.vcf_chr.vcf>>${outputdir}/${sample}.merge.Somatic.hc.combind.vcf
- rm -rf ${outputdir}/${sample}.merge.Somatic.hc.nohead_*.vcf
- rm -rf ${outputdir}/${sample}.merge.Somatic.hc.nohead_*.vcf_chr.vcf
- #########################4.filter the false positive########################################################################
- grep -v "#" ${outputdir}/${sample}.merge.Somatic.hc.combind.vcf | awk '{a=$2+10;print $1"\t"$2"\t"a}' > ${outputdir}/${sample}.merge.Somatic.hc.combind.bed
- #将bed分成多个小文件
- split -l 3000 ${outputdir}/${sample}.merge.Somatic.hc.combind.bed -d -a 2 ${outputdir}/${sample}.merge.Somatic.hc.combind.bed_
- conda deactivate
- source /home/liuxiangqiong/miniconda3/bin/activate python2
- #bam-readcount -q 20 -b 20 -w 100 -f ${hg19} -l ${outputdir}/${sample}.merge.Somatic.hc.combind.bed ${bam_dir}/${tumor}_clean.bam >${outputdir}/${sample}.merge.Somatic.hc.combind.readcount
- fun() {
- hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta
- bam-readcount -q 20 -b 20 -w 100 -f ${hg19} -l ${file} ${bam_dir}/${tumor}_clean.bam >${file}.readcount &
- wait
- echo "fun is end."
- }
- for file in ${outputdir}/${sample}.merge.Somatic.hc.combind.bed_*;
- do
- fun ${inputpath} ${tumor} ${file} ${bam_dir} &
- done
- wait
- echo "the bam-readcount is finished"
- cat ${outputdir}/${sample}.merge.Somatic.hc.combind.bed_*readcount>${outputdir}/${sample}.merge.Somatic.hc.combind.readcount
- rm -rf ${outputdir}/${sample}.merge.Somatic.hc.combind.bed_*readcount
- rm -rf ${outputdir}/${sample}.merge.Somatic.hc.combind.bed_*
- conda deactivate
- source /home/liuxiangqiong/miniconda3/bin/activate base
- varscan fpfilter \
- ${outputdir}/${sample}.merge.Somatic.hc.combind.vcf \
- ${outputdir}/${sample}.merge.Somatic.hc.combind.readcount \
- --min-var-count 5 \
- --min-var-basequal 20 \
- --min-ref-basequal 20 \
- --min-var-freq ${min_af} \
- --output-file ${outputdir}/${sample}.merge.Somatic.hc.combind.pass.vcf
- ##########################5.注释########################################################################
- #5.1对没有过滤的进行注释对高置信的过滤
- #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.merge.Somatic.hc.head.combind.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.hc.head.combind.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
- #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.snp.Somatic.hc.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.hc.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
- #5.2先拆分再标准化对齐
- #vt decompose ${outputdir}/${sample}.merge.Somatic.hc.combind.pass.vcf |vt normalize - -r ${hg19} -o ${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vcf
- conda deactivate
- #5.3对经过标准化的进行vep注释
- source /home/liuxiangqiong/miniconda3/bin/activate ensemble-vep
- PERL5LIB=/home/liuxiangqiong/miniconda3/envs/ensemble-vep/share/ensembl-vep-105.0-1/Bio/EnsEMBL/Variation/Utils/DB/HTS/Faidx
- export PERL5LIB=$PERL5LIB
- inputdir0=${outputdir}/${sample}.merge.Somatic.hc.combind.pass.vcf
- outdir1_0=${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vepanno.vcf
- vep --cache --fork 16 \
- -i ${inputdir0} \
- -o ${outdir1_0} \
- --assembly GRCh37 \
- --cache_version 104 \
- --dir_cache /home/liuxiangqiong/miniconda3/share/ensembl-vep-88.9-0/hg19 \
- --refseq \
- --canonical \
- --offline \
- --hgvs \
- --fasta /cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta \
- --hgvsg \
- --max_af \
- --check_existing \
- --numbers \
- --symbol \
- --canonical \
- --offline \
- -vcf \
- --use_given_ref \
- --af_gnomad \
- --check_existing
- #5.4对vep的再进行annovar注释
- outdir2_0=${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vepanno.annovar
- perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outdir1_0} /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outdir2_0} -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20220829,intervar_20170202,icgc28,exac03,esp6500siv2_all -operation g,f,f,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
- ###############################6.删除中间文件
- #rm -rf ${outputdir}/${normal}.pileup
- #rm -rf ${outputdir}/${tumor}.pileup
- rm -rf ${outputdir}/${sample}.merge.Somatic.hc.readcount
- conda deactivate
- source /home/liuxiangqiong/miniconda3/bin/activate base
- #对varscan的结果进行过滤
- python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_somatic_filter_varscan_v0_20220929_finish.py -i ${inputpath} -s ${sample}
- ##对varscan和vardict的方法结合获得最后的突变
- #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_somatic_filter_mergeresult_v0_20220929_finish.py -i ${inputpath} -s ${sample}
- #########################################################################原始的结果进行合并并注释#####################
- ##对原始的结果进行合并并注释
- less ${outputdir}/${sample}.snp.vcf | grep -vE "^#" > ${outputdir}/${sample}.merge.Somatic.vcf
- less ${outputdir}/${sample}.indel.vcf | grep -vE "^#" >> ${outputdir}/${sample}.merge.Somatic.vcf
- #3.3 将邻近的位点进行合并#3.3 将邻近的位点进行合并
- #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${outputdir}/${sample}.merge.Somatic.vcf -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${outputdir}/${sample}.merge.Somatic.combind1.vcf
- #grep "^#" ${outputdir}/${sample}.snp.vcf >${outputdir}/${sample}.merge.Somatic.combind.vcf
- #less ${outputdir}/${sample}.merge.Somatic.combind1.vcf >>${outputdir}/${sample}.merge.Somatic.combind.vcf
- #rm -rf ${outputdir}/${sample}.merge.Somatic.combind1.vcf
- ##将VCF按照染色体进行拆分
- python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/vcfsplit_v0_20220929_finish.py -i ${outputdir}/${sample}.merge.Somatic.vcf
- fun() {
- hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta
- python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${file} -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${file}_chr.vcf &
- wait
- echo "fun is end."
- }
- for file in ${outputdir}/${sample}.merge.Somatic_*.vcf;
- do
- fun ${tumor} ${file} ${bam_dir} &
- done
- wait
- echo "the vcfmergeing is finished"
- grep "^#" ${outputdir}/${sample}.snp.vcf >${outputdir}/${sample}.merge.Somatic.combind.vcf
- cat ${outputdir}/${sample}.merge.Somatic_*.vcf_chr.vcf>>${outputdir}/${sample}.merge.Somatic.combind.vcf
- rm -rf ${outputdir}/${sample}.merge.Somatic_*.vcf
- rm -rf ${outputdir}/${sample}.merge.Somatic_*.vcf_chr.vcf
- #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.merge.Somatic.combind.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.combind.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
- #5.2先拆分再标准化对齐
- vt decompose ${outputdir}/${sample}.merge.Somatic.combind.vcf |vt normalize - -r ${hg19} -o ${outputdir}/${sample}.merge.Somatic.combind.norm.vcf
- conda deactivate
- #5.3对经过标准化的进行vep注释
- source /home/liuxiangqiong/miniconda3/bin/activate ensemble-vep
- PERL5LIB=/home/liuxiangqiong/miniconda3/envs/ensemble-vep/share/ensembl-vep-105.0-1/Bio/EnsEMBL/Variation/Utils/DB/HTS/Faidx
- export PERL5LIB=$PERL5LIB
- inputdir=${outputdir}/${sample}.merge.Somatic.combind.norm.vcf
- outdir1=${outputdir}/${sample}.merge.Somatic.combind.norm.vepanno.vcf
- vep --cache --fork 16 \
- -i ${inputdir} \
- -o ${outdir1} \
- --assembly GRCh37 \
- --cache_version 104 \
- --dir_cache /home/liuxiangqiong/miniconda3/share/ensembl-vep-88.9-0/hg19 \
- --refseq \
- --canonical \
- --offline \
- --hgvs \
- --fasta /cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta \
- --hgvsg \
- --max_af \
- --check_existing \
- --numbers \
- --symbol \
- --canonical \
- --offline \
- -vcf \
- --use_given_ref \
- --af_gnomad \
- --check_existing
- #5.4对vep的再进行annovar注释
- outdir2=${outputdir}/${sample}.merge.Somatic.combind.norm.vepanno.annovar
- perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outdir1} /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outdir2} -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20220829,intervar_20170202,icgc28,exac03,esp6500siv2_all -operation g,f,f,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
- conda deactivate
|