123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- #!/usr/bin/env bash
- ##Author: guochangquan
- ##Copyright 20210907
- ##company: NuProbe
- set -e
- lcwgs_path=$(cd "$(readlink -f "$0" | xargs dirname)"; pwd)
- echo $0
- function usage() {
- echo "Usage: $0 [-r <Seq_length>] [ -i <rawdata_dir> ] [ -s <sample> ] [ -d <analysis_dir> ] [ -x <script_dir> ]"
- echo "-r reads长度,75 or >75bp"
- echo "-i rawdata_dir.File structure follow:indir/sample/sample_combined_R?.fastq.gz"
- echo "-s sample name"
- echo "-x script_dir"
- echo "-d analysis outdir"
- }
- if [ $# -eq 0 ]
- then
- usage
- exit
- fi
- while getopts ":r:i:s:d:x:h" opt; do
- case $opt in
- r)
- SeqLen=$OPTARG
- ;;
- i)
- indir=$OPTARG
- ;;
- s)
- sample=$OPTARG
- ;;
- d)
- out=$OPTARG
- ;;
- x)
- lcwgs_path=$OPTARG
- ;;
- h)
- usage
- exit
- ;;
- \?)
- echo "Invalid option: -$OPTARG" > /dev/stderr
- usage
- exit
- ;;
- esac
- done
- echo "SeqLen:" $SeqLen
- echo "Rawdata dir:" $indir
- echo "Sample Name:" $sample
- echo "Analysis dir:" $out
- echo $lcwgs_path
- . $lcwgs_path/conf/lcwgs.cfg
- export SAM=$sample
- fq1=`find $indir -name "*${sample}*.gz" |perl -ne 'print if /$ENV{SAM}(_S\d+)?(_L\d+)?(_combined)?_R?1(_\d+)?.f(ast)?q.gz/' `
- fq2=`find $indir -name "*${sample}*.gz" |perl -ne 'print if /$ENV{SAM}(_S\d+)?(_L\d+)?(_combined)?_R?2(_\d+)?.f(ast)?q.gz/' `
- cd $out/${sample}
- if [ $SeqLen -eq 75 ]
- then
- echo "Sequence Length is:" $SeqLen
- $bwa_dir/run-bwamem -o $sample -ds -t 12 -R "@RG\tID:$sample\tSM:$sample" $ref $fq1 $fq2 | sh
- else
- echo "Sequence Length is:" $SeqLen
- gzip -dc $fq1 | $fastx_trimmer -l 75 -z -o ${sample}_75_R1.fq.gz &
- gzip -dc $fq2 | $fastx_trimmer -l 75 -z -o ${sample}_75_R2.fq.gz
- wait
- #$fastp -i $indir/${sample}/${sample}_combined_R1.fastq.gz -I $indir/${sample}/${sample}_combined_R2.fastq.gz -o ${sample}_75_R1.fq.gz -O ${sample}_75_R2.fq.gz -b 75 -l 50 -q 20 -j ${sample}.fastp.json -h ${sample}.fastp.html
- $bwa_dir/run-bwamem -o $sample -ds -t 12 -R "@RG\tID:$sample\tSM:$sample" $ref ${sample}_75_R1.fq.gz ${sample}_75_R2.fq.gz | sh
- fi
- samtools index ${sample}.aln.bam
- test -f ${sample}_75_R1.fq.gz && rm ${sample}_75_R1.fq.gz
- test -f ${sample}_75_R2.fq.gz && rm ${sample}_75_R2.fq.gz
- samtools flagstat ${sample}.aln.bam > ${sample}.aln.flagstat
- $gatk4 CollectGcBiasMetrics -CHART $sample.gc.pdf -I $sample.aln.bam -O $sample.gc_bias.txt -S $sample.gc_summary.txt -R $ref -WINDOW_SIZE $window
- grep -v '^#' $sample.gc_summary.txt |cut -f 4-12 | \
- awk -v sam=$sample 'NF==9{if($1~/TOTAL_CLUSTERS/){print "Sample\tMap(%)\t"$0}
- else{map=sprintf("%.3f",$2/(2*$1));if(map>1){map=1};print sam"\t"map"\t"$0} } ' > $sample.lcWGS.QC.xls && rm $sample.gc_summary.txt
- samtools view -b -F 1024 -F 2048 -F 256 -f 64 $sample.aln.bam | $bedtools_dir/bamToBed | \
- awk '$3-$2>70{print $1"\t"$2"\t"$2+1}' | $bedtools_dir/coverageBed -a ${target_bed} -b - > $sample.target.cov
- Rscript $exe_dir/run_hmmcopy-wes.r $sample
- perl $exe_dir/hmm2pcov.pl $sample.normal.tsv
- Rscript $exe_dir/lcWGS_CNV_call_v1.r ${target_hmm_ref} $sample
- sex=`echo ${sample}_?_dnacopy.out.tsv |sed "s/${sample}_//;s/_dnacopy.out.tsv//" `
- perl $exe_dir/get_lcWGS_chr_CR.pl ${sample}_${sex}_dnacopy.out.tsv > ${sample}.chr_CR.xls
- awk 'NR>1 && ($6 > 1.37 || $6 < 0.63) && $1 != "Control"' ${sample}_${sex}_dnacopy.out.tsv |awk -v sex=$sex -v win=$window '{print $1"\t"sex"\t"$2"\t"$3"\t"$4+win"\t"$5"\t"$6}' > ${sample}-cnv.input.tsv
- if [ $sex == 'F' ]; then
- echo "$sample is Female, removing Y calls."
- awk '$3!~"Y" ' ${sample}-cnv.input.tsv > ${sample}-cnv.input.noY.tsv
- mv ${sample}-cnv.input.noY.tsv ${sample}-cnv.input.tsv
- fi
- echo -e "Sample\tGender\tChr\tStart\tEnd\tLength\tCopy-ratio\tBand_region" |cat - ${sample}-cnv.input.tsv > ${sample}-${sex}-cnv.input.xls && rm ${sample}-cnv.input.tsv
- perl $exe_dir/cnv_trim_gap.pl $ref_gap $sample
- mv ${sample}-${sex}-cnv.input.xls.xls ${sample}-${sex}-cnv.input.xls
- $Annotate_cnv -name ${sample} -outdir . -clean ${sample}-${sex}-cnv.input.xls
- perl $exe_dir/trim_cnv_result.pl ${sample} && rm $sample.cnv.anno.xls
- . $lcwgs_path/run_lcWGS_100k.sh
|