tumor_only_sv_calling_vardict_unpair_20230330.pbs 5.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. #!/bin/sh
  2. #PBS -N noUMI_varscan_unpair
  3. #PBS -j oe
  4. #PBS -l ncpus=12
  5. #PBS -l nodes=1
  6. #PBS -l mem=20G
  7. #inputpath=$1
  8. #tumor=$2
  9. #bam_dir=$3
  10. #min_af=0.001
  11. source /home/liuxiangqiong/miniconda3/bin/activate base
  12. hg19=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta
  13. target=/cgdata/liuxiangqiong/work62pancancer/pipeline/v0/refdata/NanOnco_Plus_Panel_v2.0_Covered_b37_cg.parY2X.sort
  14. Vardict_Dir=/cgdata/liuxiangqiong/software/VarDict-1.5.7
  15. outputdir=${inputpath}/1SV_vardict_unpair
  16. tumorbam=${bam_dir}/${tumor}_clean.bam
  17. #normalbam=${bam_dir}/${normal}_clean.bam
  18. #unpair
  19. for i in `seq 1 8`
  20. do
  21. ${Vardict_Dir}/vardict -G ${hg19} -f ${min_af} -N ${tumor} -b ${tumorbam} -z -c 1 -S 2 -E 3 -g 4 --nosv ${target}-$i.bed | ${Vardict_Dir}/teststrandbias.R|${Vardict_Dir}/var2vcf_valid.pl -N ${tumor} -E -f ${min_af}|awk '{if ($q ~ /^#/) print; else if ($4 !=5) print}' >${outputdir}/${tumor}_unpair.vd_$i.vcf&
  22. done
  23. wait
  24. #bgzip
  25. bcftools view ${outputdir}/${tumor}_unpair.vd_1.vcf -Oz -o ${outputdir}/${tumor}_unpair.vd_1.vcf.gz
  26. bcftools view ${outputdir}/${tumor}_unpair.vd_2.vcf -Oz -o ${outputdir}/${tumor}_unpair.vd_2.vcf.gz
  27. bcftools view ${outputdir}/${tumor}_unpair.vd_3.vcf -Oz -o ${outputdir}/${tumor}_unpair.vd_3.vcf.gz
  28. bcftools view ${outputdir}/${tumor}_unpair.vd_4.vcf -Oz -o ${outputdir}/${tumor}_unpair.vd_4.vcf.gz
  29. bcftools view ${outputdir}/${tumor}_unpair.vd_5.vcf -Oz -o ${outputdir}/${tumor}_unpair.vd_5.vcf.gz
  30. bcftools view ${outputdir}/${tumor}_unpair.vd_6.vcf -Oz -o ${outputdir}/${tumor}_unpair.vd_6.vcf.gz
  31. bcftools view ${outputdir}/${tumor}_unpair.vd_7.vcf -Oz -o ${outputdir}/${tumor}_unpair.vd_7.vcf.gz
  32. bcftools view ${outputdir}/${tumor}_unpair.vd_8.vcf -Oz -o ${outputdir}/${tumor}_unpair.vd_8.vcf.gz
  33. #index
  34. bcftools index ${outputdir}/${tumor}_unpair.vd_1.vcf.gz
  35. bcftools index ${outputdir}/${tumor}_unpair.vd_2.vcf.gz
  36. bcftools index ${outputdir}/${tumor}_unpair.vd_3.vcf.gz
  37. bcftools index ${outputdir}/${tumor}_unpair.vd_4.vcf.gz
  38. bcftools index ${outputdir}/${tumor}_unpair.vd_5.vcf.gz
  39. bcftools index ${outputdir}/${tumor}_unpair.vd_6.vcf.gz
  40. bcftools index ${outputdir}/${tumor}_unpair.vd_7.vcf.gz
  41. bcftools index ${outputdir}/${tumor}_unpair.vd_8.vcf.gz
  42. #
  43. bcftools concat -a ${outputdir}/${tumor}_unpair.vd_1.vcf.gz ${outputdir}/${tumor}_unpair.vd_2.vcf.gz ${outputdir}/${tumor}_unpair.vd_3.vcf.gz ${outputdir}/${tumor}_unpair.vd_4.vcf.gz ${outputdir}/${tumor}_unpair.vd_5.vcf.gz ${outputdir}/${tumor}_unpair.vd_6.vcf.gz ${outputdir}/${tumor}_unpair.vd_7.vcf.gz ${outputdir}/${tumor}_unpair.vd_8.vcf.gz | bcftools sort - -O z -o ${outputdir}/${tumor}_unpair.vd.vcf
  44. #标准化对齐
  45. vt decompose ${outputdir}/${tumor}_unpair.vd.vcf |vt normalize - -r ${hg19} -o ${outputdir}/${tumor}_unpair.vd.normalized.vcf
  46. #less ${outputdir}/${tumor}_unpair.vd.normalized.vcf | grep -vE "^#" > ${outputdir}/${tumor}_unpair.vd.normalized.nohead.vcf
  47. #3.3 将邻近的位点进行合并#3.3 将邻近的位点进行合并
  48. #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_new.py -i ${outputdir}/${tumor}_unpair.vd.normalized.nohead.vcf -b ${bam_dir}/${tumor}_clean.bam -f ${hg19} -mode 2 -n 10 -p 0.5 -o ${outputdir}/${tumor}_unpair.vd.normalized.nohead.combind.vcf
  49. #grep "^#" ${outputdir}/${tumor}_unpair.vd.normalized.vcf >${outputdir}/${tumor}_unpair.vd.normalized.head.combind.vcf
  50. #less ${outputdir}/${tumor}_unpair.vd.normalized.nohead.combind.vcf >>${outputdir}/${tumor}_unpair.vd.normalized.head.combind.vcf
  51. conda deactivate
  52. #5.3对经过标准化的进行vep注释
  53. source /home/liuxiangqiong/miniconda3/bin/activate ensemble-vep
  54. PERL5LIB=/home/liuxiangqiong/miniconda3/envs/ensemble-vep/share/ensembl-vep-105.0-1/Bio/EnsEMBL/Variation/Utils/DB/HTS/Faidx
  55. export PERL5LIB=$PERL5LIB
  56. inputdir=${outputdir}/${tumor}_unpair.vd.normalized.vcf
  57. outdir1=${outputdir}/${tumor}_unpair.vd.normalized.vepanno.vcf
  58. vep --cache --fork 16 \
  59. -i ${inputdir} \
  60. -o ${outdir1} \
  61. --assembly GRCh37 \
  62. --cache_version 104 \
  63. --dir_cache /home/liuxiangqiong/miniconda3/share/ensembl-vep-88.9-0/hg19 \
  64. --refseq \
  65. --canonical \
  66. --offline \
  67. --hgvs \
  68. --fasta /cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta \
  69. --hgvsg \
  70. --max_af \
  71. --check_existing \
  72. --numbers \
  73. --symbol \
  74. --canonical \
  75. --offline \
  76. -vcf \
  77. --use_given_ref \
  78. --af_gnomad \
  79. --check_existing
  80. #5.4对vep的再进行annovar注释
  81. outdir2=${outputdir}/${tumor}_unpair.vd.normalized.vepanno.annovar
  82. perl /cgdata/CGTools/soft/src/annovar_20200608/table_annovar.pl ${outdir1} /cgdata/soft/src/annovar.archive/DB/ -buildver hg19 -out ${outdir2} -remove -protocol refGene,popfreq_max_20150413,gnomad_genome,avsnp150,snp138NonFlagged,dbnsfp33a,cosmic94,clinvar_20220829,intervar_20170202,icgc28,exac03,esp6500siv2_all -operation g,f,f,f,f,f,f,f,f,f,f,f -nastring . -vcfinput --thread 40
  83. #remove
  84. rm ${outputdir}/${tumor}_unpair.vd_?.vcf*
  85. ###对vardict的结果进行过滤
  86. conda deactivate
  87. source /home/liuxiangqiong/miniconda3/bin/activate base
  88. python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_somatic_filter_vardict_v0_20220929_finish.py -i ${inputpath} -s ${tumor}