run_lcWGS_call_L2.sh 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. #!/usr/bin/env bash
  2. ##Author: guochangquan
  3. ##Copyright 20210907
  4. ##company: NuProbe
  5. set -e
  6. lcwgs_path=$(cd "$(readlink -f "$0" | xargs dirname)"; pwd)
  7. echo $0
  8. function usage() {
  9. echo "Usage: $0 [-r <Seq_length>] [ -i <rawdata_dir> ] [ -s <sample> ] [ -d <analysis_dir> ] [ -x <script_dir> ]"
  10. echo "-r reads长度,75 or >75bp"
  11. echo "-i rawdata_dir.File structure follow:indir/sample/sample_combined_R?.fastq.gz"
  12. echo "-s sample name"
  13. echo "-x script_dir"
  14. echo "-d analysis outdir"
  15. }
  16. if [ $# -eq 0 ]
  17. then
  18. usage
  19. exit
  20. fi
  21. while getopts ":r:i:s:d:x:h" opt; do
  22. case $opt in
  23. r)
  24. SeqLen=$OPTARG
  25. ;;
  26. i)
  27. indir=$OPTARG
  28. ;;
  29. s)
  30. sample=$OPTARG
  31. ;;
  32. d)
  33. out=$OPTARG
  34. ;;
  35. x)
  36. lcwgs_path=$OPTARG
  37. ;;
  38. h)
  39. usage
  40. exit
  41. ;;
  42. \?)
  43. echo "Invalid option: -$OPTARG" > /dev/stderr
  44. usage
  45. exit
  46. ;;
  47. esac
  48. done
  49. echo "SeqLen:" $SeqLen
  50. echo "Rawdata dir:" $indir
  51. echo "Sample Name:" $sample
  52. echo "Analysis dir:" $out
  53. echo $lcwgs_path
  54. . $lcwgs_path/conf/lcwgs.cfg
  55. export SAM=$sample
  56. fq1=`find $indir -name "*${sample}*.gz" |perl -ne 'print if /$ENV{SAM}(_S\d+)?(_L\d+)?(_combined)?_R?1(_\d+)?.f(ast)?q.gz/' `
  57. fq2=`find $indir -name "*${sample}*.gz" |perl -ne 'print if /$ENV{SAM}(_S\d+)?(_L\d+)?(_combined)?_R?2(_\d+)?.f(ast)?q.gz/' `
  58. cd $out/${sample}
  59. if [ $SeqLen -eq 75 ]
  60. then
  61. echo "Sequence Length is:" $SeqLen
  62. $bwa_dir/run-bwamem -o $sample -ds -t 12 -R "@RG\tID:$sample\tSM:$sample" $ref $fq1 $fq2 | sh
  63. else
  64. echo "Sequence Length is:" $SeqLen
  65. gzip -dc $fq1 | $fastx_trimmer -l 75 -z -o ${sample}_75_R1.fq.gz &
  66. gzip -dc $fq2 | $fastx_trimmer -l 75 -z -o ${sample}_75_R2.fq.gz
  67. wait
  68. #$fastp -i $indir/${sample}/${sample}_combined_R1.fastq.gz -I $indir/${sample}/${sample}_combined_R2.fastq.gz -o ${sample}_75_R1.fq.gz -O ${sample}_75_R2.fq.gz -b 75 -l 50 -q 20 -j ${sample}.fastp.json -h ${sample}.fastp.html
  69. $bwa_dir/run-bwamem -o $sample -ds -t 12 -R "@RG\tID:$sample\tSM:$sample" $ref ${sample}_75_R1.fq.gz ${sample}_75_R2.fq.gz | sh
  70. fi
  71. samtools index ${sample}.aln.bam
  72. test -f ${sample}_75_R1.fq.gz && rm ${sample}_75_R1.fq.gz
  73. test -f ${sample}_75_R2.fq.gz && rm ${sample}_75_R2.fq.gz
  74. samtools flagstat ${sample}.aln.bam > ${sample}.aln.flagstat
  75. $gatk4 CollectGcBiasMetrics -CHART $sample.gc.pdf -I $sample.aln.bam -O $sample.gc_bias.txt -S $sample.gc_summary.txt -R $ref -WINDOW_SIZE $window
  76. grep -v '^#' $sample.gc_summary.txt |cut -f 4-12 | \
  77. awk -v sam=$sample 'NF==9{if($1~/TOTAL_CLUSTERS/){print "Sample\tMap(%)\t"$0}
  78. else{map=sprintf("%.3f",$2/(2*$1));if(map>1){map=1};print sam"\t"map"\t"$0} } ' > $sample.lcWGS.QC.xls && rm $sample.gc_summary.txt
  79. samtools view -b -F 1024 -F 2048 -F 256 -f 64 $sample.aln.bam | $bedtools_dir/bamToBed | \
  80. awk '$3-$2>70{print $1"\t"$2"\t"$2+1}' | $bedtools_dir/coverageBed -a ${target_bed} -b - > $sample.target.cov
  81. Rscript $exe_dir/run_hmmcopy-wes.r $sample
  82. perl $exe_dir/hmm2pcov.pl $sample.normal.tsv
  83. Rscript $exe_dir/lcWGS_CNV_call_v1.r ${target_hmm_ref} $sample
  84. sex=`echo ${sample}_?_dnacopy.out.tsv |sed "s/${sample}_//;s/_dnacopy.out.tsv//" `
  85. perl $exe_dir/get_lcWGS_chr_CR.pl ${sample}_${sex}_dnacopy.out.tsv > ${sample}.chr_CR.xls
  86. awk 'NR>1 && ($6 > 1.37 || $6 < 0.63) && $1 != "Control"' ${sample}_${sex}_dnacopy.out.tsv |awk -v sex=$sex -v win=$window '{print $1"\t"sex"\t"$2"\t"$3"\t"$4+win"\t"$5"\t"$6}' > ${sample}-cnv.input.tsv
  87. if [ $sex == 'F' ]; then
  88. echo "$sample is Female, removing Y calls."
  89. awk '$3!~"Y" ' ${sample}-cnv.input.tsv > ${sample}-cnv.input.noY.tsv
  90. mv ${sample}-cnv.input.noY.tsv ${sample}-cnv.input.tsv
  91. fi
  92. echo -e "Sample\tGender\tChr\tStart\tEnd\tLength\tCopy-ratio\tBand_region" |cat - ${sample}-cnv.input.tsv > ${sample}-${sex}-cnv.input.xls && rm ${sample}-cnv.input.tsv
  93. perl $exe_dir/cnv_trim_gap.pl $ref_gap $sample
  94. mv ${sample}-${sex}-cnv.input.xls.xls ${sample}-${sex}-cnv.input.xls
  95. $Annotate_cnv -name ${sample} -outdir . -clean ${sample}-${sex}-cnv.input.xls
  96. perl $exe_dir/trim_cnv_result.pl ${sample} && rm $sample.cnv.anno.xls
  97. . $lcwgs_path/run_lcWGS_100k.sh