1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798 |
- #!/usr/bin/env bash
- ##Author: guochangquan
- ##Copyright 20210929
- ##company: NuProbe
- set -e
- function usage() {
- echo "Usage: $0 [ -s <sample> ] [ -i <fq_dir> ] [ -o <analysis_dir> ] [ -x <script_dir> ] [ -c <FeBY2_CNV_outdir> ]"
- echo "-s sample name"
- echo "-i input fastq dir"
- echo "-x script_dir"
- echo "-o analysis_dir"
- echo "-c FeBY2 CNV outdir"
- }
- if [ $# -eq 0 ]
- then
- usage
- exit
- fi
- while getopts ":s:c:x:o:i:h" opt; do
- case $opt in
- i)
- fq_dir=$OPTARG
- ;;
- o)
- out_dir=$OPTARG
- ;;
- s)
- sample=$OPTARG
- ;;
- c)
- cnv_dir=$OPTARG
- ;;
- x)
- feby2_script=$OPTARG
- ;;
- h)
- usage
- exit
- ;;
- \?)
- echo "Invalid option: -$OPTARG" > /dev/stderr
- usage
- exit
- ;;
- esac
- done
- echo "Input FASTQ dir:" $fq_dir
- echo "Output BAM dir:" $out_dir
- echo "Sample Name:" $sample
- echo "CNV Analysis dir:" $cnv_dir
- echo $feby2_script
- . $feby2_script/conf/feby2.cfg
- ### FASTQ TRIM and Mapping
- test -d $out_dir/${sample} || mkdir ${outdir}/${sample}
- cd $out_dir/${sample}
- $fastp -i ${fq_dir}/${sample}/${sample}_combined_R1.fastq.gz -I ${fq_dir}/${sample}/${sample}_combined_R2.fastq.gz -o ${sample}_1.fq.gz -O ${sample}_2.fq.gz -c --length_required 50 -q 20 -j ${sample}.fastp.json -h ${sample}.fastp.html
- $speedseq_dir/speedseq align -o ${sample} -M 20 -t 8 -R "@RG\tID:${sample}\tSM:${sample}\tLB:${sample}\tPL:ILLUMINA" $ref ${sample}_1.fq.gz ${sample}_2.fq.gz && rm ${sample}_1.fq.gz ${sample}_2.fq.gz
- ### SNV/INDEL CAll and annotation
- $gatk4 --java-options "-Xmx10g" HaplotypeCaller -R $ref -I ${sample}.bam -ip 20 -L $target_call -O ${sample}.gatk.vcf.gz
- /cgdata/CGTools/CGTools/wrapper/S14-annovar-snv-annotation.sh -w . -s ${sample}.gatk -v ${sample}.gatk.vcf.gz
- perl $exe_dir/snp_stat.pl $sample.gatk.hg19_multianno.txt
- python /cgdata/CGTools/CGTools/tools/T07-var-filter.py -i $sample.gatk.hg19_multianno.txt -o $sample.gatk.hg19_multianno.xlsx
- ### QC
- test -d conest/ || mkdir conest/
- $qualimap_dir/qualimap bamqc -bam ${sample}.bam -c --java-mem-size=20G -sd -gff ${target}
- $verify --vcf $bia_snp --bam ${sample}.bam --out conest/${sample}.verify --verbose --ignoreRG &
- $gatk4 CollectHsMetrics -I ${sample}.bam -BI $interval -TI $interval -O ${sample}_hs_metrics.txt -R $ref --COVERAGE_CAP 6000 -MQ 0 --CLIP_OVERLAPPING_READS false &
- $gatk4 GetPileupSummaries -I ${sample}.bam -L $target -V $bia_snp -max-af 0.99 -min-af 0.01 -O conest/${sample}.ps.tab
- $gatk4 CalculateContamination -I conest/${sample}.ps.tab -O conest/${sample}.contEST.tab
- wait
- ### yesuan
- $gatk4 HaplotypeCaller -R $ref -I ${sample}.bam --genotyping-mode GENOTYPE_GIVEN_ALLELES -L $ye_target --alleles $ye_site -O ${sample}.ye.vcf
- bcftools norm -m-both -o ${sample}.step1.vcf ${sample}.ye.vcf
- bcftools norm -f $ref -o ${sample}.step2.vcf ${sample}.step1.vcf
- $gatk4 VariantsToTable -V ${sample}.step2.vcf -O ${sample}.ye.norm.tab -F CHROM -F POS -F TYPE -GF GT -GF GQ -GF AD -F REF -F ALT --show-filtered && rm ${sample}.step2.vcf ${sample}.step1.vcf
- perl $ye_home/prep_FeBY2_plus.pl ${sample} FeBY2 $ye_ref
- ### prepare for CNV
- bamf=$out_dir/$sample/$sample.bam
- cd $cnv_dir
- samtools view -f 2 -F 1024 -F 2048 -hub $bamf \
- | samtools sort -n -l 0 -m 20G - |bamToBed -bedpe \
- | awk '$1!~/M/ && $1==$4 && $2!=-1 && $6-$2>0 && $6-$2<1000{print $1"\t"$2"\t"$6"\t"$7}' \
- | intersectBed -a - -b $cnv_target -loj -wb \
- | perl $feby2_script/script/get_GCcor2cov.pl $sample $ref $target_GCref $cnv_target b37 > $sample.gc.xls
- test -s $sample_?_QC_plot.pdf || Rscript $feby2_script/script/MBY2_CNV_stat.r $sample
- samtools view -hb -L $target $bamf > ${sample}.target.bam && samtools index ${sample}.target.bam && samtools idxstats ${sample}.target.bam > ${sample}.idxstats && rm ${sample}.target.bam*
|