123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- #!/bin/sh
- #PBS -N Germline
- #PBS -j oe
- #PBS -l ncpus=12
- #PBS -l nodes=1
- #PBS -l mem=20G
- #inputpath=$1
- #sample=$2
- source /home/liuxiangqiong/miniconda3/bin/activate base
- gatk4=/cgdata/CGTools/soft/tools/gatk4/gatk-4.0.8.1/gatk
- b37=/cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta
- target=/cgdata/liuxiangqiong/work62pancancer/Client/v0/script/refdata/NanOnco_Plus_Panel_v2.0_Covered_b37_cg.parY2X.sort_NOhap.bed
- annovar_dir=/cgdata/soft/src/annovar.archive/annovar_v20200608
- annovar_db=/cgdata/soft/src/annovar.archive/annovar_v20200608/DB
- outdir=${inputpath}/4Germline_unpair
- $gatk4 HaplotypeCaller -R $b37 -I ${bam_dir}/${sample}_clean.bam -ip 50 -L ${target} -O ${outdir}/${sample}.vcf.gz
- #perl $annovar_dir/table_annovar.pl ${outdir}/${sample}.vcf.gz $annovar_db \
- -buildver hg19 -out ${outdir}/${sample} -remove \
- -protocol refGene,genomicSuperDups,phastConsElements46way,omim,avsnp150,clinvar,icgc28,cosmic94,gnomad_exome,1000g2015aug_all,hgmd-pro,intervar_20180118,dbscsnv11,dbnsfp41a,regsnpintron \
- -operation g,r,r,r,f,f,f,f,f,f,f,f,f,f,f \
- -nastring . -vcfinput -polish
- #perl /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/germ_filter.pl -i ${outdir}/${sample}.hg19_multianno.txt -s /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/refdata/Select_RefSeq_HGNC_MANE.txt ${outdir}/${sample}.germ.xls
- ##去掉vcf的#开头的行
- less ${outdir}/${sample}.vcf.gz | grep -vE "^#" > ${outdir}/${sample}.nohead.vcf
- #位点合并
- python /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/complex_variant_germline_20220824.py -i ${outdir}/${sample}.nohead.vcf -b ${bam_dir}/${sample}_clean.bam -f ${b37} -mode 2 -n 10 -p 0.5 -o ${outdir}/${sample}.nohead.combind.vcf
- ###将表头加回去
- gunzip -dc ${outdir}/${sample}.vcf.gz|grep "^#">${outdir}/${sample}.head.combind.vcf
- less ${outdir}/${sample}.nohead.combind.vcf >>${outdir}/${sample}.head.combind.vcf
- conda deactivate
- #####用vep注释
- source /home/liuxiangqiong/miniconda3/bin/activate ensemble-vep
- PERL5LIB=/home/liuxiangqiong/miniconda3/envs/ensemble-vep/share/ensembl-vep-105.0-1/Bio/EnsEMBL/Variation/Utils/DB/HTS/Faidx
- export PERL5LIB=$PERL5LIB
- inputdir=${outdir}/${sample}.head.combind.vcf
- outdir1=${outdir}/${sample}.head.combind.vepanno.vcf
- vep --cache --fork 16 \
- -i ${inputdir} \
- -o ${outdir1} \
- --assembly GRCh37 \
- --cache_version 104 \
- --dir_cache /home/liuxiangqiong/miniconda3/share/ensembl-vep-88.9-0/hg19 \
- --refseq \
- --canonical \
- --offline \
- --hgvs \
- --fasta /cgdata/Database/GATK/b37/human_g1k_v37_decoy.fasta \
- --hgvsg \
- --max_af \
- --check_existing \
- --numbers \
- --symbol \
- --canonical \
- --offline \
- -vcf \
- --use_given_ref \
- --af_gnomad \
- --check_existing
- ####注释
- perl $annovar_dir/table_annovar.pl ${outdir}/${sample}.head.combind.vepanno.vcf $annovar_db \
- -buildver hg19 -out ${outdir}/${sample} -remove \
- -protocol refGene,genomicSuperDups,phastConsElements46way,omim,avsnp150,clinvar,icgc28,cosmic94,gnomad_exome,1000g2015aug_all,hgmd-pro,intervar_20180118,dbscsnv11,dbnsfp41a,regsnpintron,exac03,esp6500siv2_all \
- -operation g,r,r,r,f,f,f,f,f,f,f,f,f,f,f,f,f \
- -nastring . -vcfinput -polish
- #信息提取校正
- perl /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/germ_filter.pl -i ${outdir}/${sample}.hg19_multianno.txt -s /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/refdata/Select_RefSeq_HGNC_MANE.txt ${outdir}/${sample}.germ.xls
- conda deactivate
- source /home/liuxiangqiong/miniconda3/bin/activate base
- #结果输出并过滤
- #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_germline_v0_20220926_finish.py -i ${inputpath} -s ${sample}
- #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_germline_v0_20230221_finish.py -i ${inputpath} -s ${sample}
- #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_germline_v0_20230301_finish.py -i ${inputpath} -s ${sample}
- #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_germline_v0_20230303_finish.py -i ${inputpath} -s ${sample}
- #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_germline_v0_20230304_finish.py -i ${inputpath} -s ${sample}
- #python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_germline_v0_20230308_finish.py -i ${inputpath} -s ${sample}
- python3 /cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/datafile_germline_v0_20230403_finish.py -i ${inputpath} -s ${sample}
|