s2_noUMI_sv_calling_varscan_pair_20220705.pbs 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. #!/bin/sh
  2. #PBS -N noUMI_varscan_pair
  3. #PBS -j oe
  4. #PBS -l ncpus=12
  5. #PBS -l nodes=1
  6. #PBS -l mem=20G
  7. #inputpath=$1
  8. #sample=$2
  9. source /home/liuxiangqiong/miniconda3/bin/activate base
  10. hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta
  11. target=/cgdata/liuxiangqiong/work62pancancer/Client/v0/script/refdata/NanOnco_Plus_Panel_v2.0_Covered_b37_cg.parY2X.sort.bed
  12. #min_af=0.005 for blood,0.01 for tissue
  13. outputdir=${inputpath}/1SV_varscan_pair
  14. #######################1.pileup file########################################################################
  15. samtools mpileup -B -q 10 -d 1000000 -f ${hg19} -l $target ${bam_dir}/${tumor}_clean.bam > ${outputdir}/${tumor}.pileup
  16. samtools mpileup -B -q 10 -d 1000000 -f ${hg19} -l $target ${bam_dir}/${normal}_clean.bam > ${outputdir}/${normal}.pileup
  17. ########################2.pair########################################################################
  18. varscan somatic ${outputdir}/${normal}.pileup ${outputdir}/${tumor}.pileup --output-snp ${outputdir}/${sample}.snp --output-indel ${outputdir}/${sample}.indel --tumor-purity 0.01 --output-vcf
  19. ##对原始的结果进行合并并注释
  20. less ${outputdir}/${sample}.snp.vcf | grep -vE "^#" > ${outputdir}/${sample}.merge.Somatic.vcf
  21. less ${outputdir}/${sample}.indel.vcf | grep -vE "^#" >> ${outputdir}/${sample}.merge.Somatic.vcf
  22. #3.3 将邻近的位点进行合并#3.3 将邻近的位点进行合并
  23. python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${outputdir}/${sample}.merge.Somatic.vcf -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${outputdir}/${sample}.merge.Somatic.combind1.vcf
  24. grep "^#" ${outputdir}/${sample}.snp.vcf >${outputdir}/${sample}.merge.Somatic.combind.vcf
  25. less ${outputdir}/${sample}.merge.Somatic.combind1.vcf >>${outputdir}/${sample}.merge.Somatic.combind.vcf
  26. rm -rf ${outputdir}/${sample}.merge.Somatic.combind1.vcf
  27. #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.merge.Somatic.combind.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.combind.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
  28. #5.2先拆分再标准化对齐
  29. vt decompose ${outputdir}/${sample}.merge.Somatic.combind.vcf |vt normalize - -r ${hg19} -o ${outputdir}/${sample}.merge.Somatic.combind.norm.vcf
  30. conda deactivate
  31. #5.3对经过标准化的进行vep注释
  32. source /home/liuxiangqiong/miniconda3/bin/activate ensemble-vep
  33. PERL5LIB=/home/liuxiangqiong/miniconda3/envs/ensemble-vep/share/ensembl-vep-105.0-1/Bio/EnsEMBL/Variation/Utils/DB/HTS/Faidx
  34. export PERL5LIB=$PERL5LIB
  35. inputdir=${outputdir}/${sample}.merge.Somatic.combind.norm.vcf
  36. outdir1=${outputdir}/${sample}.merge.Somatic.combind.norm.vepanno.vcf
  37. vep --cache --fork 16 \
  38. -i ${inputdir} \
  39. -o ${outdir1} \
  40. --assembly GRCh37 \
  41. --cache_version 104 \
  42. --dir_cache /home/liuxiangqiong/miniconda3/share/ensembl-vep-88.9-0/hg19 \
  43. --refseq \
  44. --canonical \
  45. --offline \
  46. --hgvs \
  47. --fasta /cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta \
  48. --hgvsg \
  49. --max_af \
  50. --check_existing \
  51. --numbers \
  52. --symbol \
  53. --canonical \
  54. --offline \
  55. -vcf \
  56. --use_given_ref \
  57. --af_gnomad \
  58. --check_existing
  59. #5.4对vep的再进行annovar注释
  60. outdir2=${outputdir}/${sample}.merge.Somatic.combind.norm.vepanno.annovar
  61. perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outdir1} /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outdir2} -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
  62. conda deactivate
  63. source /home/liuxiangqiong/miniconda3/bin/activate base
  64. ########################3.Run the processSomatic subcommand to divide the output into separate files based on somatic status and confidence########
  65. #3.1获得高置信的突变
  66. varscan processSomatic ${outputdir}/${sample}.snp.vcf --min-tumor-freq 0.001 --max-normal-freq 0.05 --p-value 0.5
  67. varscan processSomatic ${outputdir}/${sample}.indel.vcf --min-tumor-freq 0.001 --max-normal-freq 0.05 --p-value 0.5
  68. #3.2merge the hc
  69. less ${outputdir}/${sample}.snp.Somatic.hc.vcf | grep -vE "^#"> ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf
  70. less ${outputdir}/${sample}.indel.Somatic.hc.vcf | grep -vE "^#" >> ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf
  71. #3.3 将邻近的位点进行合并#3.3 将邻近的位点进行合并
  72. python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${outputdir}/${sample}.merge.Somatic.hc.nohead.combind.vcf
  73. grep "^#" ${outputdir}/${sample}.snp.Somatic.hc.vcf >${outputdir}/${sample}.merge.Somatic.hc.combind.vcf
  74. less ${outputdir}/${sample}.merge.Somatic.hc.nohead.combind.vcf >>${outputdir}/${sample}.merge.Somatic.hc.combind.vcf
  75. #rm -rf ${outputdir}/${sample}.merge.Somatic.hc.nohead.combind.vcf
  76. #########################4.filter the false positive########################################################################
  77. grep -v "#" ${outputdir}/${sample}.merge.Somatic.hc.combind.vcf | awk '{a=$2+10;print $1"\t"$2"\t"a}' > ${outputdir}/${sample}.merge.Somatic.hc.combind.bed
  78. conda deactivate
  79. source /home/liuxiangqiong/miniconda3/bin/activate python2
  80. bam-readcount -q 20 -b 20 -w 100 -f ${hg19} -l ${outputdir}/${sample}.merge.Somatic.hc.combind.bed ${bam_dir}/${tumor}_clean.bam >${outputdir}/${sample}.merge.Somatic.hc.combind.readcount
  81. conda deactivate
  82. source /home/liuxiangqiong/miniconda3/bin/activate base
  83. varscan fpfilter \
  84. ${outputdir}/${sample}.merge.Somatic.hc.combind.vcf \
  85. ${outputdir}/${sample}.merge.Somatic.hc.combind.readcount \
  86. --min-var-count 5 \
  87. --min-var-basequal 20 \
  88. --min-ref-basequal 20 \
  89. --min-var-freq ${min_af} \
  90. --output-file ${outputdir}/${sample}.merge.Somatic.hc.combind.pass.vcf
  91. ##########################5.注释########################################################################
  92. #5.1对没有过滤的进行注释对高置信的过滤
  93. #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.merge.Somatic.hc.head.combind.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.hc.head.combind.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
  94. #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.snp.Somatic.hc.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.hc.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
  95. #5.2先拆分再标准化对齐
  96. vt decompose ${outputdir}/${sample}.merge.Somatic.hc.combind.pass.vcf |vt normalize - -r ${hg19} -o ${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vcf
  97. conda deactivate
  98. #5.3对经过标准化的进行vep注释
  99. source /home/liuxiangqiong/miniconda3/bin/activate ensemble-vep
  100. PERL5LIB=/home/liuxiangqiong/miniconda3/envs/ensemble-vep/share/ensembl-vep-105.0-1/Bio/EnsEMBL/Variation/Utils/DB/HTS/Faidx
  101. export PERL5LIB=$PERL5LIB
  102. inputdir0=${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vcf
  103. outdir1_0=${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vepanno.vcf
  104. vep --cache --fork 16 \
  105. -i ${inputdir0} \
  106. -o ${outdir1_0} \
  107. --assembly GRCh37 \
  108. --cache_version 104 \
  109. --dir_cache /home/liuxiangqiong/miniconda3/share/ensembl-vep-88.9-0/hg19 \
  110. --refseq \
  111. --canonical \
  112. --offline \
  113. --hgvs \
  114. --fasta /cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta \
  115. --hgvsg \
  116. --max_af \
  117. --check_existing \
  118. --numbers \
  119. --symbol \
  120. --canonical \
  121. --offline \
  122. -vcf \
  123. --use_given_ref \
  124. --af_gnomad \
  125. --check_existing
  126. #5.4对vep的再进行annovar注释
  127. outdir2_0=${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vepanno.annovar
  128. perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outdir1_0} /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outdir2_0} -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
  129. ###############################6.删除中间文件
  130. #rm -rf ${outputdir}/${normal}.pileup
  131. #rm -rf ${outputdir}/${tumor}.pileup
  132. rm -rf ${outputdir}/${sample}.merge.Somatic.hc.readcount