#!/usr/bin/env bash ##Author: guochangquan ##Copyright 20210929 ##company: NuProbe set -e function usage() { echo "Usage: $0 [ -s ] [ -i ] [ -o ] [ -x ] [ -c ]" echo "-s sample name" echo "-i input fastq dir" echo "-x script_dir" echo "-o analysis_dir" echo "-c FeBY2 CNV outdir" } if [ $# -eq 0 ] then usage exit fi while getopts ":s:c:x:o:i:h" opt; do case $opt in i) fq_dir=$OPTARG ;; o) out_dir=$OPTARG ;; s) sample=$OPTARG ;; c) cnv_dir=$OPTARG ;; x) feby2_script=$OPTARG ;; h) usage exit ;; \?) echo "Invalid option: -$OPTARG" > /dev/stderr usage exit ;; esac done echo "Input FASTQ dir:" $fq_dir echo "Output BAM dir:" $out_dir echo "Sample Name:" $sample echo "CNV Analysis dir:" $cnv_dir echo $feby2_script . $feby2_script/conf/feby2.cfg ### FASTQ TRIM and Mapping test -d $out_dir/${sample} || mkdir ${outdir}/${sample} cd $out_dir/${sample} $fastp -i ${fq_dir}/${sample}/${sample}_combined_R1.fastq.gz -I ${fq_dir}/${sample}/${sample}_combined_R2.fastq.gz -o ${sample}_1.fq.gz -O ${sample}_2.fq.gz -c --length_required 50 -q 20 -j ${sample}.fastp.json -h ${sample}.fastp.html $speedseq_dir/speedseq align -o ${sample} -M 20 -t 8 -R "@RG\tID:${sample}\tSM:${sample}\tLB:${sample}\tPL:ILLUMINA" $ref ${sample}_1.fq.gz ${sample}_2.fq.gz && rm ${sample}_1.fq.gz ${sample}_2.fq.gz ### SNV/INDEL CAll and annotation $gatk4 --java-options "-Xmx10g" HaplotypeCaller -R $ref -I ${sample}.bam -ip 20 -L $target_call -O ${sample}.gatk.vcf.gz /cgdata/CGTools/CGTools/wrapper/S14-annovar-snv-annotation.sh -w . -s ${sample}.gatk -v ${sample}.gatk.vcf.gz perl $exe_dir/snp_stat.pl $sample.gatk.hg19_multianno.txt python /cgdata/CGTools/CGTools/tools/T07-var-filter.py -i $sample.gatk.hg19_multianno.txt -o $sample.gatk.hg19_multianno.xlsx ### QC test -d conest/ || mkdir conest/ $qualimap_dir/qualimap bamqc -bam ${sample}.bam -c --java-mem-size=20G -sd -gff ${target} $verify --vcf $bia_snp --bam ${sample}.bam --out conest/${sample}.verify --verbose --ignoreRG & $gatk4 CollectHsMetrics -I ${sample}.bam -BI $interval -TI $interval -O ${sample}_hs_metrics.txt -R $ref --COVERAGE_CAP 6000 -MQ 0 --CLIP_OVERLAPPING_READS false & $gatk4 GetPileupSummaries -I ${sample}.bam -L $target -V $bia_snp -max-af 0.99 -min-af 0.01 -O conest/${sample}.ps.tab $gatk4 CalculateContamination -I conest/${sample}.ps.tab -O conest/${sample}.contEST.tab wait ### yesuan $gatk4 HaplotypeCaller -R $ref -I ${sample}.bam --genotyping-mode GENOTYPE_GIVEN_ALLELES -L $ye_target --alleles $ye_site -O ${sample}.ye.vcf bcftools norm -m-both -o ${sample}.step1.vcf ${sample}.ye.vcf bcftools norm -f $ref -o ${sample}.step2.vcf ${sample}.step1.vcf $gatk4 VariantsToTable -V ${sample}.step2.vcf -O ${sample}.ye.norm.tab -F CHROM -F POS -F TYPE -GF GT -GF GQ -GF AD -F REF -F ALT --show-filtered && rm ${sample}.step2.vcf ${sample}.step1.vcf perl $ye_home/prep_FeBY2_plus.pl ${sample} FeBY2 $ye_ref ### prepare for CNV bamf=$out_dir/$sample/$sample.bam cd $cnv_dir samtools view -f 2 -F 1024 -F 2048 -hub $bamf \ | samtools sort -n -l 0 -m 20G - |bamToBed -bedpe \ | awk '$1!~/M/ && $1==$4 && $2!=-1 && $6-$2>0 && $6-$2<1000{print $1"\t"$2"\t"$6"\t"$7}' \ | intersectBed -a - -b $cnv_target -loj -wb \ | perl $feby2_script/script/get_GCcor2cov.pl $sample $ref $target_GCref $cnv_target b37 > $sample.gc.xls test -s $sample_?_QC_plot.pdf || Rscript $feby2_script/script/MBY2_CNV_stat.r $sample samtools view -hb -L $target $bamf > ${sample}.target.bam && samtools index ${sample}.target.bam && samtools idxstats ${sample}.target.bam > ${sample}.idxstats && rm ${sample}.target.bam*