123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139 |
- #!/usr/bin/env bash
- ##Author: guochangquan
- ##Copyright 20210929
- ##company: NuProbe
- set -e
- function usage() {
- echo "Usage: $0 [ -s <sample> ] [ -i <fq_dir> ] [ -o <analysis_dir> ] [ -x <script_dir> ] [ -c <FeBY2_CNV_outdir> ]"
- echo "-s sample name"
- echo "-i input fastq dir"
- echo "-x script_dir"
- echo "-o analysis_dir"
- echo "-c FeBY2 CNV outdir"
- }
- if [ $# -eq 0 ]
- then
- usage
- exit
- fi
- feby2_script=$(cd "$(readlink -f "$0" | xargs dirname)"; pwd)
- while getopts ":s:c:x:o:i:h" opt; do
- case $opt in
- i)
- fq_dir=$OPTARG
- ;;
- o)
- out_dir=$OPTARG
- ;;
- s)
- sample=$OPTARG
- ;;
- c)
- cnv_dir=$OPTARG
- ;;
- x)
- feby2_script=$OPTARG
- ;;
- h)
- usage
- exit
- ;;
- \?)
- echo "Invalid option: -$OPTARG" > /dev/stderr
- usage
- exit
- ;;
- esac
- done
- echo "Input FASTQ dir:" $fq_dir
- echo "Output BAM dir:" $out_dir
- echo "Sample Name:" $sample
- echo "CNV Analysis dir:" $cnv_dir
- echo $feby2_script
- . $feby2_script/conf/feby2.cfg
- ### FASTQ TRIM and Mapping
- test -d $out_dir/${sample} || mkdir ${outdir}/${sample}
- cd $out_dir/${sample}
- $fastp -i ${fq_dir}/${sample}/${sample}_combined_R1.fastq.gz -I ${fq_dir}/${sample}/${sample}_combined_R2.fastq.gz -o ${sample}_1.fq.gz -O ${sample}_2.fq.gz -c --length_required 50 -q 20 -j ${sample}.fastp.json -h ${sample}.fastp.html
- $speedseq_dir/speedseq align -o ${sample} -M 20 -t 8 -R "@RG\tID:${sample}\tSM:${sample}\tLB:${sample}\tPL:ILLUMINA" $ref ${sample}_1.fq.gz ${sample}_2.fq.gz && rm ${sample}_1.fq.gz ${sample}_2.fq.gz
- ### SNV/INDEL CAll and annotation
- $gatk4 --java-options "-Xmx10g" HaplotypeCaller -R $ref -I ${sample}.bam -ip 20 -L $target_call -O ${sample}.gatk.vcf.gz
- /cgdata/CGTools/CGTools/wrapper/S14-annovar-snv-annotation.sh -w . -s ${sample}.gatk -v ${sample}.gatk.vcf.gz
- perl $exe_dir/snp_stat.pl $sample.gatk.hg19_multianno.txt
- python /cgdata/CGTools/CGTools/tools/T07-var-filter.py -i $sample.gatk.hg19_multianno.txt -o $sample.gatk.hg19_multianno.xlsx
- ### QC
- test -d conest/ || mkdir conest/
- $qualimap_dir/qualimap bamqc -bam ${sample}.bam -c --java-mem-size=20G -sd -gff ${target}
- $verify --vcf $bia_snp --bam ${sample}.bam --out conest/${sample}.verify --verbose --ignoreRG &
- $gatk4 CollectHsMetrics -I ${sample}.bam -BI $interval -TI $interval -O ${sample}_hs_metrics.txt -R $ref --COVERAGE_CAP 6000 -MQ 0 --CLIP_OVERLAPPING_READS false &
- $gatk4 GetPileupSummaries -I ${sample}.bam -L $target -V $bia_snp -max-af 0.99 -min-af 0.01 -O conest/${sample}.ps.tab
- $gatk4 CalculateContamination -I conest/${sample}.ps.tab -O conest/${sample}.contEST.tab
- wait
- ### yesuan
- $gatk4 HaplotypeCaller -R $ref -I ${sample}.bam --genotyping-mode GENOTYPE_GIVEN_ALLELES -L $ye_target --alleles $ye_site -O ${sample}.ye.vcf
- bcftools norm -m-both -o ${sample}.step1.vcf ${sample}.ye.vcf
- bcftools norm -f $ref -o ${sample}.step2.vcf ${sample}.step1.vcf
- $gatk4 VariantsToTable -V ${sample}.step2.vcf -O ${sample}.ye.norm.tab -F CHROM -F POS -F TYPE -GF GT -GF GQ -GF AD -F REF -F ALT --show-filtered && rm ${sample}.step2.vcf ${sample}.step1.vcf
- perl $ye_home/prep_FeBY2_plus.pl ${sample} FeBY2 $ye_ref
- ### prepare for CNV
- bamf=$out_dir/$sample/$sample.bam
- cd $cnv_dir
- samtools view -f 2 -F 1024 -F 2048 -hub $bamf \
- | samtools sort -n -l 0 -m 20G - |bamToBed -bedpe \
- | awk '$1!~/M/ && $1==$4 && $2!=-1 && $6-$2>0 && $6-$2<1000{print $1"\t"$2"\t"$6"\t"$7}' \
- | intersectBed -a - -b $cnv_target -loj -wb \
- | perl $feby2_script/script/get_GCcor2cov.pl $sample $ref $target_GCref $cnv_target b37 > $sample.gc.xls
- test -s $sample_?_QC_plot.pdf || Rscript $feby2_script/script/MBY2_CNV_stat.r $sample
- samtools view -hb -L $target $bamf > ${sample}.target.bam && samtools index ${sample}.target.bam && samtools idxstats ${sample}.target.bam > ${sample}.idxstats && rm ${sample}.target.bam*
- ## for CNV
- perl $exe_dir/com_cov_norm.pl FeBY2.exon.cov.xls
- perl $exe_dir/stats_chrs.pl FeBY2.chr.cov.xls && rm *.idxstats
- cp $feby2_script/dat/*.pcov $cnv_dir/
- cp $feby2_script/dat/ref.list $cnv_dir/
- sex=`echo ${sample}_?_QC_plot.pdf |sed "s/${sample}_//;s/_QC_plot.pdf//" `
- echo sex is $sex.
- test -d ${sample}_cnv || mkdir ${sample}_cnv
- test -e ${sample}_cnv/$sample.normals.txt && rm ${sample}_cnv/$sample.normals.txt
- for sam in ` cat ref.list |grep -v $sample `
- do
- echo $sam.pcov | cat >> ${sample}_cnv/$sample.normals.txt
- done
- $gatkb CombineReadCounts -inputList ${sample}_cnv/$sample.normals.txt -O ${sample}_cnv/$sample.combined-normals.tsv
- $gatkb CreatePanelOfNormals -I ${sample}_cnv/$sample.combined-normals.tsv -O ${sample}_cnv/$sample.normals.pon -noQC --disableSpark --minimumTargetFactorPercentileThreshold 0.2
- ref_cov=${sample}_cnv/$sample.normals.pon
- echo $ref_cov
- $gatkb NormalizeSomaticReadCounts -I ${sample}.pcov -PON ${ref_cov} -PTN ${sample}_cnv/${sample}.ptn.tsv -TN ${sample}_cnv/${sample}.tn.tsv
- $gatkb PerformSegmentation -TN ${sample}_cnv/${sample}.tn.tsv -O ${sample}_cnv/${sample}.seg -LOG
- $gatkb CallSegments -TN ${sample}_cnv/${sample}.tn.tsv -S ${sample}_cnv/${sample}.seg -O ${sample}_cnv/${sample}.called
- $gatkb PlotSegmentedCopyRatio -TN ${sample}_cnv/${sample}.tn.tsv -PTN ${sample}_cnv/${sample}.ptn.tsv -S ${sample}_cnv/${sample}.seg -O ${sample}_cnv -pre ${sample} -SD ${b37_dict} -LOG
- perl $exe_dir/seg_merge.pl ${sample}_cnv/${sample}.seg
- awk 'NR>1 && ($6 > 1.38 || $6 < 0.62) ' ${sample}_cnv/${sample}.seg.xls |awk -v sex=$sex '{print $1"\t"sex"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6}' > ${sample}-cnv.input.tsv
- echo -e "Sample\tGender\tChr\tStart\tEnd\tLength\tCopy-ratio\tBand_region" |cat - ${sample}-cnv.input.tsv > ${sample}-${sex}-cnv.input.xls && rm ${sample}-cnv.input.tsv
- $anno_cnv -name ${sample} -outdir . -clean ${sample}-${sex}-cnv.input.xls
- perl $exe_dir/trim_cnv_result.pl $sample && mv $sample.cnv.anno.xls $sample-?-cnv.input.xls ${sample}_cnv/
- ## plot
- cp ${sample}_cnv/$sample.ptn.tsv .
- cp ${sample}_cnv/$sample.tn.tsv .
- cp ${sample}_cnv/$sample.seg .
- cp $out_dir/${sample}/conest/${sample}.ps.tab .
- perl $exe_dir/pre_plot.pl ${sample}
- /cgdata/guocq/soft/R-3.6.0/bin/Rscript $exe_dir/plot_panel_CR.r ${sample}
- convert -density 300 $sample.CR_VAF.pdf $sample.CR_VAF.png && rm -r $sample.ptn.tsv $sample.tn.tsv $sample.seg ${sample}.ps.tab $sample.CR_VAF.pdf ${sample}_plot.tsv
|