run_FeBY2_single_L2.sh.bak 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. #!/usr/bin/env bash
  2. ##Author: guochangquan
  3. ##Copyright 20210929
  4. ##company: NuProbe
  5. set -e
  6. function usage() {
  7. echo "Usage: $0 [ -s <sample> ] [ -i <fq_dir> ] [ -o <analysis_dir> ] [ -x <script_dir> ] [ -c <FeBY2_CNV_outdir> ]"
  8. echo "-s sample name"
  9. echo "-i input fastq dir"
  10. echo "-x script_dir"
  11. echo "-o analysis_dir"
  12. echo "-c FeBY2 CNV outdir"
  13. }
  14. if [ $# -eq 0 ]
  15. then
  16. usage
  17. exit
  18. fi
  19. feby2_script=$(cd "$(readlink -f "$0" | xargs dirname)"; pwd)
  20. while getopts ":s:c:x:o:i:h" opt; do
  21. case $opt in
  22. i)
  23. fq_dir=$OPTARG
  24. ;;
  25. o)
  26. out_dir=$OPTARG
  27. ;;
  28. s)
  29. sample=$OPTARG
  30. ;;
  31. c)
  32. cnv_dir=$OPTARG
  33. ;;
  34. x)
  35. feby2_script=$OPTARG
  36. ;;
  37. h)
  38. usage
  39. exit
  40. ;;
  41. \?)
  42. echo "Invalid option: -$OPTARG" > /dev/stderr
  43. usage
  44. exit
  45. ;;
  46. esac
  47. done
  48. echo "Input FASTQ dir:" $fq_dir
  49. echo "Output BAM dir:" $out_dir
  50. echo "Sample Name:" $sample
  51. echo "CNV Analysis dir:" $cnv_dir
  52. echo $feby2_script
  53. . $feby2_script/conf/feby2.cfg
  54. ### FASTQ TRIM and Mapping
  55. test -d $out_dir/${sample} || mkdir ${outdir}/${sample}
  56. cd $out_dir/${sample}
  57. $fastp -i ${fq_dir}/${sample}/${sample}_combined_R1.fastq.gz -I ${fq_dir}/${sample}/${sample}_combined_R2.fastq.gz -o ${sample}_1.fq.gz -O ${sample}_2.fq.gz -c --length_required 50 -q 20 -j ${sample}.fastp.json -h ${sample}.fastp.html
  58. $speedseq_dir/speedseq align -o ${sample} -M 20 -t 8 -R "@RG\tID:${sample}\tSM:${sample}\tLB:${sample}\tPL:ILLUMINA" $ref ${sample}_1.fq.gz ${sample}_2.fq.gz && rm ${sample}_1.fq.gz ${sample}_2.fq.gz
  59. ### SNV/INDEL CAll and annotation
  60. $gatk4 --java-options "-Xmx10g" HaplotypeCaller -R $ref -I ${sample}.bam -ip 20 -L $target_call -O ${sample}.gatk.vcf.gz
  61. /cgdata/CGTools/CGTools/wrapper/S14-annovar-snv-annotation.sh -w . -s ${sample}.gatk -v ${sample}.gatk.vcf.gz
  62. perl $exe_dir/snp_stat.pl $sample.gatk.hg19_multianno.txt
  63. python /cgdata/CGTools/CGTools/tools/T07-var-filter.py -i $sample.gatk.hg19_multianno.txt -o $sample.gatk.hg19_multianno.xlsx
  64. ### QC
  65. test -d conest/ || mkdir conest/
  66. $qualimap_dir/qualimap bamqc -bam ${sample}.bam -c --java-mem-size=20G -sd -gff ${target}
  67. $verify --vcf $bia_snp --bam ${sample}.bam --out conest/${sample}.verify --verbose --ignoreRG &
  68. $gatk4 CollectHsMetrics -I ${sample}.bam -BI $interval -TI $interval -O ${sample}_hs_metrics.txt -R $ref --COVERAGE_CAP 6000 -MQ 0 --CLIP_OVERLAPPING_READS false &
  69. $gatk4 GetPileupSummaries -I ${sample}.bam -L $target -V $bia_snp -max-af 0.99 -min-af 0.01 -O conest/${sample}.ps.tab
  70. $gatk4 CalculateContamination -I conest/${sample}.ps.tab -O conest/${sample}.contEST.tab
  71. wait
  72. ### yesuan
  73. $gatk4 HaplotypeCaller -R $ref -I ${sample}.bam --genotyping-mode GENOTYPE_GIVEN_ALLELES -L $ye_target --alleles $ye_site -O ${sample}.ye.vcf
  74. bcftools norm -m-both -o ${sample}.step1.vcf ${sample}.ye.vcf
  75. bcftools norm -f $ref -o ${sample}.step2.vcf ${sample}.step1.vcf
  76. $gatk4 VariantsToTable -V ${sample}.step2.vcf -O ${sample}.ye.norm.tab -F CHROM -F POS -F TYPE -GF GT -GF GQ -GF AD -F REF -F ALT --show-filtered && rm ${sample}.step2.vcf ${sample}.step1.vcf
  77. perl $ye_home/prep_FeBY2_plus.pl ${sample} FeBY2 $ye_ref
  78. ### prepare for CNV
  79. bamf=$out_dir/$sample/$sample.bam
  80. cd $cnv_dir
  81. samtools view -f 2 -F 1024 -F 2048 -hub $bamf \
  82. | samtools sort -n -l 0 -m 20G - |bamToBed -bedpe \
  83. | awk '$1!~/M/ && $1==$4 && $2!=-1 && $6-$2>0 && $6-$2<1000{print $1"\t"$2"\t"$6"\t"$7}' \
  84. | intersectBed -a - -b $cnv_target -loj -wb \
  85. | perl $feby2_script/script/get_GCcor2cov.pl $sample $ref $target_GCref $cnv_target b37 > $sample.gc.xls
  86. test -s $sample_?_QC_plot.pdf || Rscript $feby2_script/script/MBY2_CNV_stat.r $sample
  87. samtools view -hb -L $target $bamf > ${sample}.target.bam && samtools index ${sample}.target.bam && samtools idxstats ${sample}.target.bam > ${sample}.idxstats && rm ${sample}.target.bam*
  88. ## for CNV
  89. perl $exe_dir/com_cov_norm.pl FeBY2.exon.cov.xls
  90. perl $exe_dir/stats_chrs.pl FeBY2.chr.cov.xls && rm *.idxstats
  91. cp $feby2_script/dat/*.pcov $cnv_dir/
  92. cp $feby2_script/dat/ref.list $cnv_dir/
  93. sex=`echo ${sample}_?_QC_plot.pdf |sed "s/${sample}_//;s/_QC_plot.pdf//" `
  94. echo sex is $sex.
  95. test -d ${sample}_cnv || mkdir ${sample}_cnv
  96. test -e ${sample}_cnv/$sample.normals.txt && rm ${sample}_cnv/$sample.normals.txt
  97. for sam in ` cat ref.list |grep -v $sample `
  98. do
  99. echo $sam.pcov | cat >> ${sample}_cnv/$sample.normals.txt
  100. done
  101. $gatkb CombineReadCounts -inputList ${sample}_cnv/$sample.normals.txt -O ${sample}_cnv/$sample.combined-normals.tsv
  102. $gatkb CreatePanelOfNormals -I ${sample}_cnv/$sample.combined-normals.tsv -O ${sample}_cnv/$sample.normals.pon -noQC --disableSpark --minimumTargetFactorPercentileThreshold 0.2
  103. ref_cov=${sample}_cnv/$sample.normals.pon
  104. echo $ref_cov
  105. $gatkb NormalizeSomaticReadCounts -I ${sample}.pcov -PON ${ref_cov} -PTN ${sample}_cnv/${sample}.ptn.tsv -TN ${sample}_cnv/${sample}.tn.tsv
  106. $gatkb PerformSegmentation -TN ${sample}_cnv/${sample}.tn.tsv -O ${sample}_cnv/${sample}.seg -LOG
  107. $gatkb CallSegments -TN ${sample}_cnv/${sample}.tn.tsv -S ${sample}_cnv/${sample}.seg -O ${sample}_cnv/${sample}.called
  108. $gatkb PlotSegmentedCopyRatio -TN ${sample}_cnv/${sample}.tn.tsv -PTN ${sample}_cnv/${sample}.ptn.tsv -S ${sample}_cnv/${sample}.seg -O ${sample}_cnv -pre ${sample} -SD ${b37_dict} -LOG
  109. perl $exe_dir/seg_merge.pl ${sample}_cnv/${sample}.seg
  110. awk 'NR>1 && ($6 > 1.38 || $6 < 0.62) ' ${sample}_cnv/${sample}.seg.xls |awk -v sex=$sex '{print $1"\t"sex"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6}' > ${sample}-cnv.input.tsv
  111. echo -e "Sample\tGender\tChr\tStart\tEnd\tLength\tCopy-ratio\tBand_region" |cat - ${sample}-cnv.input.tsv > ${sample}-${sex}-cnv.input.xls && rm ${sample}-cnv.input.tsv
  112. $anno_cnv -name ${sample} -outdir . -clean ${sample}-${sex}-cnv.input.xls
  113. perl $exe_dir/trim_cnv_result.pl $sample && mv $sample.cnv.anno.xls $sample-?-cnv.input.xls ${sample}_cnv/
  114. ## plot
  115. cp ${sample}_cnv/$sample.ptn.tsv .
  116. cp ${sample}_cnv/$sample.tn.tsv .
  117. cp ${sample}_cnv/$sample.seg .
  118. cp $out_dir/${sample}/conest/${sample}.ps.tab .
  119. perl $exe_dir/pre_plot.pl ${sample}
  120. /cgdata/guocq/soft/R-3.6.0/bin/Rscript $exe_dir/plot_panel_CR.r ${sample}
  121. convert -density 300 $sample.CR_VAF.pdf $sample.CR_VAF.png && rm -r $sample.ptn.tsv $sample.tn.tsv $sample.seg ${sample}.ps.tab $sample.CR_VAF.pdf ${sample}_plot.tsv