s5_noUMI_Germline_gatk_unpair_20230522.sh 4.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. #!/bin/sh
  2. #PBS -N Germline
  3. #PBS -j oe
  4. #PBS -l ncpus=12
  5. #PBS -l nodes=1
  6. #PBS -l mem=20G
  7. #inputpath=$1
  8. #sample=$2
  9. source /home/liuxiangqiong/miniconda3/bin/activate base
  10. gatk4=/cgdata/CGTools/soft/tools/gatk4/gatk-4.0.8.1/gatk
  11. b37=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta
  12. target=/cgdata/liuxiangqiong/work62pancancer/Client/v0/script/refdata/NanOnco_Plus_Panel_v2.0_Covered_b37_cg.parY2X.sort_NOhap.bed
  13. annovar_dir=/cgdata/soft/src/annovar.archive/annovar_v20200608
  14. annovar_db=/cgdata/soft/src/annovar.archive/annovar_v20200608/DB
  15. outdir=${inputpath}/4Germline_unpair
  16. $gatk4 HaplotypeCaller -R $b37 -I ${bam_dir}/${sample}_clean.bam -ip 50 -L ${target} -O ${outdir}/${sample}.vcf.gz
  17. #perl $annovar_dir/table_annovar.pl ${outdir}/${sample}.vcf.gz $annovar_db \
  18. -buildver hg19 -out ${outdir}/${sample} -remove \
  19. -protocol refGene,genomicSuperDups,phastConsElements46way,omim,avsnp150,clinvar,icgc28,cosmic94,gnomad_exome,1000g2015aug_all,hgmd-pro,intervar_20180118,dbscsnv11,dbnsfp41a,regsnpintron \
  20. -operation g,r,r,r,f,f,f,f,f,f,f,f,f,f,f \
  21. -nastring . -vcfinput -polish
  22. #perl /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/germ_filter.pl -i ${outdir}/${sample}.hg19_multianno.txt -s /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/refdata/Select_RefSeq_HGNC_MANE.txt ${outdir}/${sample}.germ.xls
  23. ##去掉vcf的#开头的行
  24. less ${outdir}/${sample}.vcf.gz | grep -vE "^#" > ${outdir}/${sample}.nohead.vcf
  25. #位点合并
  26. python /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_germline_20220824.py -i ${outdir}/${sample}.nohead.vcf -b ${bam_dir}/${sample}_clean.bam -f ${b37} -mode 2 -n 10 -p 0.5 -o ${outdir}/${sample}.nohead.combind.vcf
  27. ###将表头加回去
  28. gunzip -dc ${outdir}/${sample}.vcf.gz|grep "^#">${outdir}/${sample}.head.combind.vcf
  29. less ${outdir}/${sample}.nohead.combind.vcf >>${outdir}/${sample}.head.combind.vcf
  30. conda deactivate
  31. #####用vep注释
  32. source /home/liuxiangqiong/miniconda3/bin/activate ensemble-vep
  33. PERL5LIB=/home/liuxiangqiong/miniconda3/envs/ensemble-vep/share/ensembl-vep-105.0-1/Bio/EnsEMBL/Variation/Utils/DB/HTS/Faidx
  34. export PERL5LIB=$PERL5LIB
  35. inputdir=${outdir}/${sample}.head.combind.vcf
  36. outdir1=${outdir}/${sample}.head.combind.vepanno.vcf
  37. vep --cache --fork 16 \
  38. -i ${inputdir} \
  39. -o ${outdir1} \
  40. --assembly GRCh37 \
  41. --cache_version 104 \
  42. --dir_cache /home/liuxiangqiong/miniconda3/share/ensembl-vep-88.9-0/hg19 \
  43. --refseq \
  44. --canonical \
  45. --offline \
  46. --hgvs \
  47. --fasta /cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta \
  48. --hgvsg \
  49. --max_af \
  50. --check_existing \
  51. --numbers \
  52. --symbol \
  53. --canonical \
  54. --offline \
  55. -vcf \
  56. --use_given_ref \
  57. --af_gnomad \
  58. --check_existing
  59. ####注释
  60. perl $annovar_dir/table_annovar.pl ${outdir}/${sample}.head.combind.vepanno.vcf $annovar_db \
  61. -buildver hg19 -out ${outdir}/${sample} -remove \
  62. -protocol refGene,genomicSuperDups,phastConsElements46way,omim,avsnp150,clinvar,icgc28,cosmic94,gnomad_exome,1000g2015aug_all,hgmd-pro,intervar_20180118,dbscsnv11,dbnsfp41a,regsnpintron,exac03,esp6500siv2_all,WBBC_v211129,inhouse_IDTWES_v2 \
  63. -operation g,r,r,r,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f \
  64. -nastring . -vcfinput -polish
  65. #信息提取校正
  66. perl /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/germ_filter.pl -i ${outdir}/${sample}.hg19_multianno.txt -s /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/refdata/Select_RefSeq_HGNC_MANE.txt ${outdir}/${sample}.germ.xls
  67. conda deactivate
  68. source /home/liuxiangqiong/miniconda3/bin/activate base
  69. #结果输出并过滤
  70. #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_germline_v0_20220926_finish.py -i ${inputpath} -s ${sample}
  71. #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_germline_v0_20230221_finish.py -i ${inputpath} -s ${sample}
  72. #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_germline_v0_20230301_finish.py -i ${inputpath} -s ${sample}
  73. #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_germline_v0_20230303_finish.py -i ${inputpath} -s ${sample}
  74. #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_germline_v0_20230304_finish.py -i ${inputpath} -s ${sample}
  75. #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_germline_v0_20230308_finish.py -i ${inputpath} -s ${sample}
  76. #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_germline_v0_20230403_finish.py -i ${inputpath} -s ${sample}
  77. python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_germline_v0_20230522_finish.py -i ${inputpath} -s ${sample}