s2_noUMI_sv_calling_varscan_pair_20221027.pbs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. #!/bin/sh
  2. #PBS -N noUMI_varscan_pair
  3. #PBS -j oe
  4. #PBS -l ncpus=12
  5. #PBS -l nodes=1
  6. #PBS -l mem=20G
  7. #inputpath=$1
  8. #sample=$2
  9. source /home/liuxiangqiong/miniconda3/bin/activate base
  10. hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta
  11. target=/cgdata/liuxiangqiong/work62pancancer/Client/v0/script/refdata/NanOnco_Plus_Panel_v2.0_Covered_b37_cg.parY2X.sort.bed
  12. #min_af=0.005 for blood,0.01 for tissue
  13. outputdir=${inputpath}/1SV_varscan_pair
  14. #######################1.pileup file########################################################################
  15. samtools mpileup -B -q 10 -d 1000000 -f ${hg19} -l $target ${bam_dir}/${tumor}_clean.bam > ${outputdir}/${tumor}.pileup
  16. samtools mpileup -B -q 10 -d 1000000 -f ${hg19} -l $target ${bam_dir}/${normal}_clean.bam > ${outputdir}/${normal}.pileup
  17. ########################2.pair########################################################################
  18. varscan somatic ${outputdir}/${normal}.pileup ${outputdir}/${tumor}.pileup --output-snp ${outputdir}/${sample}.snp --output-indel ${outputdir}/${sample}.indel --tumor-purity 0.01 --output-vcf
  19. ########################3.Run the processSomatic subcommand to divide the output into separate files based on somatic status and confidence########
  20. #3.1获得高置信的突变
  21. varscan processSomatic ${outputdir}/${sample}.snp.vcf --min-tumor-freq 0.001 --max-normal-freq 0.05 --p-value 0.5
  22. varscan processSomatic ${outputdir}/${sample}.indel.vcf --min-tumor-freq 0.001 --max-normal-freq 0.05 --p-value 0.5
  23. #3.2merge the hc
  24. less ${outputdir}/${sample}.snp.Somatic.hc.vcf | grep -vE "^#"> ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf
  25. less ${outputdir}/${sample}.indel.Somatic.hc.vcf | grep -vE "^#" >> ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf
  26. #3.3 将邻近的位点进行合并#3.3 将邻近的位点进行合并
  27. #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${outputdir}/${sample}.merge.Somatic.hc.nohead.combind.vcf
  28. #grep "^#" ${outputdir}/${sample}.snp.Somatic.hc.vcf >${outputdir}/${sample}.merge.Somatic.hc.combind.vcf
  29. #less ${outputdir}/${sample}.merge.Somatic.hc.nohead.combind.vcf >>${outputdir}/${sample}.merge.Somatic.hc.combind.vcf
  30. #rm -rf ${outputdir}/${sample}.merge.Somatic.hc.nohead.combind.vcf
  31. ##将VCF按照染色体进行拆分
  32. python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/vcfsplit_v0_20220929_finish.py -i ${outputdir}/${sample}.merge.Somatic.hc.nohead.vcf
  33. fun() {
  34. hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta
  35. python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${file} -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${file}_chr.vcf &
  36. wait
  37. echo "fun is end."
  38. }
  39. for file in ${outputdir}/${sample}.merge.Somatic.hc.nohead_*.vcf;
  40. do
  41. fun ${tumor} ${file} ${bam_dir} &
  42. done
  43. wait
  44. echo "the vcfmergeing is finished"
  45. grep "^#" ${outputdir}/${sample}.snp.Somatic.hc.vcf >${outputdir}/${sample}.merge.Somatic.hc.combind.vcf
  46. cat ${outputdir}/${sample}.merge.Somatic.hc.nohead_*.vcf_chr.vcf>>${outputdir}/${sample}.merge.Somatic.hc.combind.vcf
  47. rm -rf ${outputdir}/${sample}.merge.Somatic.hc.nohead_*.vcf
  48. rm -rf ${outputdir}/${sample}.merge.Somatic.hc.nohead_*.vcf_chr.vcf
  49. #########################4.filter the false positive########################################################################
  50. grep -v "#" ${outputdir}/${sample}.merge.Somatic.hc.combind.vcf | awk '{a=$2+10;print $1"\t"$2"\t"a}' > ${outputdir}/${sample}.merge.Somatic.hc.combind.bed
  51. #将bed分成多个小文件
  52. split -l 3000 ${outputdir}/${sample}.merge.Somatic.hc.combind.bed -d -a 2 ${outputdir}/${sample}.merge.Somatic.hc.combind.bed_
  53. conda deactivate
  54. source /home/liuxiangqiong/miniconda3/bin/activate python2
  55. #bam-readcount -q 20 -b 20 -w 100 -f ${hg19} -l ${outputdir}/${sample}.merge.Somatic.hc.combind.bed ${bam_dir}/${tumor}_clean.bam >${outputdir}/${sample}.merge.Somatic.hc.combind.readcount
  56. fun() {
  57. hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta
  58. bam-readcount -q 20 -b 20 -w 100 -f ${hg19} -l ${file} ${bam_dir}/${tumor}_clean.bam >${file}.readcount &
  59. wait
  60. echo "fun is end."
  61. }
  62. for file in ${outputdir}/${sample}.merge.Somatic.hc.combind.bed_*;
  63. do
  64. fun ${inputpath} ${tumor} ${file} ${bam_dir} &
  65. done
  66. wait
  67. echo "the bam-readcount is finished"
  68. cat ${outputdir}/${sample}.merge.Somatic.hc.combind.bed_*readcount>${outputdir}/${sample}.merge.Somatic.hc.combind.readcount
  69. rm -rf ${outputdir}/${sample}.merge.Somatic.hc.combind.bed_*readcount
  70. rm -rf ${outputdir}/${sample}.merge.Somatic.hc.combind.bed_*
  71. conda deactivate
  72. source /home/liuxiangqiong/miniconda3/bin/activate base
  73. varscan fpfilter \
  74. ${outputdir}/${sample}.merge.Somatic.hc.combind.vcf \
  75. ${outputdir}/${sample}.merge.Somatic.hc.combind.readcount \
  76. --min-var-count 5 \
  77. --min-var-basequal 20 \
  78. --min-ref-basequal 20 \
  79. --min-var-freq ${min_af} \
  80. --output-file ${outputdir}/${sample}.merge.Somatic.hc.combind.pass.vcf
  81. ##########################5.注释########################################################################
  82. #5.1对没有过滤的进行注释对高置信的过滤
  83. #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.merge.Somatic.hc.head.combind.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.hc.head.combind.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
  84. #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.snp.Somatic.hc.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.hc.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
  85. #5.2先拆分再标准化对齐
  86. #vt decompose ${outputdir}/${sample}.merge.Somatic.hc.combind.pass.vcf |vt normalize - -r ${hg19} -o ${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vcf
  87. conda deactivate
  88. #5.3对经过标准化的进行vep注释
  89. source /home/liuxiangqiong/miniconda3/bin/activate ensemble-vep
  90. PERL5LIB=/home/liuxiangqiong/miniconda3/envs/ensemble-vep/share/ensembl-vep-105.0-1/Bio/EnsEMBL/Variation/Utils/DB/HTS/Faidx
  91. export PERL5LIB=$PERL5LIB
  92. inputdir0=${outputdir}/${sample}.merge.Somatic.hc.combind.pass.vcf
  93. outdir1_0=${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vepanno.vcf
  94. vep --cache --fork 16 \
  95. -i ${inputdir0} \
  96. -o ${outdir1_0} \
  97. --assembly GRCh37 \
  98. --cache_version 104 \
  99. --dir_cache /home/liuxiangqiong/miniconda3/share/ensembl-vep-88.9-0/hg19 \
  100. --refseq \
  101. --canonical \
  102. --offline \
  103. --hgvs \
  104. --fasta /cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta \
  105. --hgvsg \
  106. --max_af \
  107. --check_existing \
  108. --numbers \
  109. --symbol \
  110. --canonical \
  111. --offline \
  112. -vcf \
  113. --use_given_ref \
  114. --af_gnomad \
  115. --check_existing
  116. #5.4对vep的再进行annovar注释
  117. outdir2_0=${outputdir}/${sample}.merge.Somatic.hc.combind.pass.norm.vepanno.annovar
  118. perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outdir1_0} /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outdir2_0} -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20220829,intervar_20170202,icgc28,exac03,esp6500siv2_all -operation g,f,f,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
  119. ###############################6.删除中间文件
  120. #rm -rf ${outputdir}/${normal}.pileup
  121. #rm -rf ${outputdir}/${tumor}.pileup
  122. rm -rf ${outputdir}/${sample}.merge.Somatic.hc.readcount
  123. conda deactivate
  124. source /home/liuxiangqiong/miniconda3/bin/activate base
  125. #对varscan的结果进行过滤
  126. python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_somatic_filter_varscan_v0_20220929_finish.py -i ${inputpath} -s ${sample}
  127. ##对varscan和vardict的方法结合获得最后的突变
  128. #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_somatic_filter_mergeresult_v0_20220929_finish.py -i ${inputpath} -s ${sample}
  129. #########################################################################原始的结果进行合并并注释#####################
  130. ##对原始的结果进行合并并注释
  131. less ${outputdir}/${sample}.snp.vcf | grep -vE "^#" > ${outputdir}/${sample}.merge.Somatic.vcf
  132. less ${outputdir}/${sample}.indel.vcf | grep -vE "^#" >> ${outputdir}/${sample}.merge.Somatic.vcf
  133. #3.3 将邻近的位点进行合并#3.3 将邻近的位点进行合并
  134. #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${outputdir}/${sample}.merge.Somatic.vcf -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${outputdir}/${sample}.merge.Somatic.combind1.vcf
  135. #grep "^#" ${outputdir}/${sample}.snp.vcf >${outputdir}/${sample}.merge.Somatic.combind.vcf
  136. #less ${outputdir}/${sample}.merge.Somatic.combind1.vcf >>${outputdir}/${sample}.merge.Somatic.combind.vcf
  137. #rm -rf ${outputdir}/${sample}.merge.Somatic.combind1.vcf
  138. ##将VCF按照染色体进行拆分
  139. python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/vcfsplit_v0_20220929_finish.py -i ${outputdir}/${sample}.merge.Somatic.vcf
  140. fun() {
  141. hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta
  142. python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${file} -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${file}_chr.vcf &
  143. wait
  144. echo "fun is end."
  145. }
  146. for file in ${outputdir}/${sample}.merge.Somatic_*.vcf;
  147. do
  148. fun ${tumor} ${file} ${bam_dir} &
  149. done
  150. wait
  151. echo "the vcfmergeing is finished"
  152. grep "^#" ${outputdir}/${sample}.snp.vcf >${outputdir}/${sample}.merge.Somatic.combind.vcf
  153. cat ${outputdir}/${sample}.merge.Somatic_*.vcf_chr.vcf>>${outputdir}/${sample}.merge.Somatic.combind.vcf
  154. rm -rf ${outputdir}/${sample}.merge.Somatic_*.vcf
  155. rm -rf ${outputdir}/${sample}.merge.Somatic_*.vcf_chr.vcf
  156. #perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outputdir}/${sample}.merge.Somatic.combind.vcf /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outputdir}/${sample}.merge.Somatic.combind.anno -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20210501,intervar_20170202,icgc28 -operation g,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
  157. #5.2先拆分再标准化对齐
  158. vt decompose ${outputdir}/${sample}.merge.Somatic.combind.vcf |vt normalize - -r ${hg19} -o ${outputdir}/${sample}.merge.Somatic.combind.norm.vcf
  159. conda deactivate
  160. #5.3对经过标准化的进行vep注释
  161. source /home/liuxiangqiong/miniconda3/bin/activate ensemble-vep
  162. PERL5LIB=/home/liuxiangqiong/miniconda3/envs/ensemble-vep/share/ensembl-vep-105.0-1/Bio/EnsEMBL/Variation/Utils/DB/HTS/Faidx
  163. export PERL5LIB=$PERL5LIB
  164. inputdir=${outputdir}/${sample}.merge.Somatic.combind.norm.vcf
  165. outdir1=${outputdir}/${sample}.merge.Somatic.combind.norm.vepanno.vcf
  166. vep --cache --fork 16 \
  167. -i ${inputdir} \
  168. -o ${outdir1} \
  169. --assembly GRCh37 \
  170. --cache_version 104 \
  171. --dir_cache /home/liuxiangqiong/miniconda3/share/ensembl-vep-88.9-0/hg19 \
  172. --refseq \
  173. --canonical \
  174. --offline \
  175. --hgvs \
  176. --fasta /cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta \
  177. --hgvsg \
  178. --max_af \
  179. --check_existing \
  180. --numbers \
  181. --symbol \
  182. --canonical \
  183. --offline \
  184. -vcf \
  185. --use_given_ref \
  186. --af_gnomad \
  187. --check_existing
  188. #5.4对vep的再进行annovar注释
  189. outdir2=${outputdir}/${sample}.merge.Somatic.combind.norm.vepanno.annovar
  190. perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outdir1} /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outdir2} -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20220829,intervar_20170202,icgc28,exac03,esp6500siv2_all -operation g,f,f,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
  191. conda deactivate