#!/usr/bin/perl use strict; use warnings; use Getopt::Std; use File::Basename; use File::Find; my $usage="Usage: trim verifyBam and gatk4 CalculateContamination result. result files name like \$sample.verify.selfSM and \$sample.contEST.tab. \tperl $0 outfile Options: -v verifyBam output directory. default '.' . -g gatk4 CalculateContamination output directory. default '.'.\n"; my ($g_dir,$v_dir,@samples,%data); my %opts = (v => '.', g => '.', ); my %opts1; getopts(':v:g:', \%opts1); unless (@ARGV){ print $usage; exit; } for my $key (keys %opts1){ my $val=$opts1{$key}; $opts{$key}=$val; } my $out=shift; for my $key (keys %opts){ my $val=$opts{$key}; if($key eq 'g'){ $g_dir=$val; print "gatk4 output directory: $val .\n"; } if($key eq 'v' ){ $v_dir= $val; print "verify output directory: $val .\n"; } } print "\$g_dir:$g_dir;\$v_dir:$v_dir\n"; my @gfiles; find(sub { push @gfiles,$File::Find::name if -f and /\.contEST.tab$/; }, $g_dir); die "No *.contEST.tab file under $g_dir,Please check.\n" unless @gfiles; @gfiles=sort @gfiles; for my $gfile(@gfiles){ open my $gfh,'<',$gfile or die "Cannot open file:$!"; my $sample=basename($gfile); $sample=~s/.contEST.tab//; push @samples,$sample; my $col; while(<$gfh>){ chomp; if($.==1){ my @conts=split /\t/; for my $i (0..$#conts){ $col=$i if $conts[$i] eq 'contamination'; } next; } my ($cont)=(split /\t/)[$col]; $data{g}{$sample}=sprintf "%.4f",$cont; } close($gfh); } my @vfiles; find(sub { push @vfiles,$File::Find::name if -f and /\.verify.selfSM$/; }, $v_dir); die "No *.verify.selfSM file under $v_dir,Please check.\n" unless @vfiles; for my $vfile(@vfiles){ open my $vfh,'<',$vfile or die "Cannot open file:$!"; my $sample=basename($vfile); $sample=~s/.verify.selfSM//; push @samples,$sample unless is_elmt($sample,@samples); while(<$vfh>){ next if /^#/; my ($id,$nsnp,$read,$avg_dp,$cont,$val1,$val2)=(split /\t/)[0,3..8] ; my $conts=sprintf("%.4f",($val2-$val1)/$val2); $data{v}{$sample}=join("\t",$nsnp,$avg_dp,sprintf("%.4f",$cont),$conts ); } close($vfh); } open my $wfh,'>',$out or die "Cannot write file:$!\n"; print $wfh join("\t","#Sample","SNPS","AVG_DP","Cont1","Cont2","Cont3"),"\n"; for my $sample(@samples){ my $g_result=exists $data{g}{$sample} ? $data{g}{$sample} :" "; my $v_result=exists $data{v}{$sample} ? $data{v}{$sample} :"\t"x3; print $wfh join("\t",$sample,$v_result,$g_result),"\n"; } ; sub is_elmt{ my ($var,@lists)=@_; for my $list(@lists){ return 1 if $var eq $list; } return 0; }