#!/usr/bin/perl use strict; use warnings; use Getopt::Std; use File::Basename; use File::Find; my $usage="Usage:\n\tperl $0 outfile\n Options:\n -d input directory. default '.' . -s If Scan is required.True for YES,False for NO. default 1 (YES) . \n"; my ($in_dir,$scan); my %opts = (d => '.', s=>1); my %opts1; unless (@ARGV){ print $usage; exit; } getopts(':d:s:', \%opts1); my $OUT=shift; for my $key (keys %opts1){ my $val=$opts1{$key}; $opts{$key}=$val; } for my $key (keys %opts){ my $val=$opts{$key}; if($key eq 'd'){ $in_dir=$val; print "Input directory: $val .\n"; } if($key eq 's'){ $scan=$val; print "Directory Scan is required.\n" if $scan; print "Sinple glob $in_dir for *_stats.\n" unless $scan; } } my @files; if($scan){ find(sub { push @files,$File::Find::name if -f and /_hs_metrics.txt$/; }, $in_dir); print "sanning...\n"; }else{ @files=<$in_dir/*_hs_metrics.txt>; print "No sanning...\n"; } my (@samples,%data); my @keys=qw(TARGET_TERRITORY TOTAL_READS PCT_PF_UQ_READS PCT_PF_UQ_READS_ALIGNED PCT_SELECTED_BASES MEAN_TARGET_COVERAGE PCT_USABLE_BASES_ON_TARGET FOLD_80_BASE_PENALTY PCT_TARGET_BASES_1X PCT_TARGET_BASES_2X PCT_TARGET_BASES_10X PCT_TARGET_BASES_20X PCT_TARGET_BASES_30X PCT_TARGET_BASES_40X PCT_TARGET_BASES_50X PCT_TARGET_BASES_100X AT_DROPOUT GC_DROPOUT); for my $file (sort @files){ open my $fh, '<', $file or die "Cannot open file:$!"; my $sam=basename $file; $sam=~s/_hs_metrics.txt//; push @samples,$sam; my $flag=0; my @head; while(<$fh>){ chomp; next if /^#/ or /^\s*$/; if(/^BAIT_SET/){ $flag=1; @head=split /\t/; next; } next unless $flag; my @conts=split /\t/; my ($cov30,$cov1); for my $i (0..$#conts){ my $key=$head[$i]; my $val=$conts[$i]; $cov30=$val if $key eq 'PCT_TARGET_BASES_30X'; $cov1=$val if $key eq 'PCT_TARGET_BASES_1X'; if($val=~/^\d+\.\d+$/ ){ $val= $val<100? sprintf("%.3f",$val):sprintf("%.0f",$val); } $data{$sam}{$key}=$val; } my $adj30=$cov1 ? sprintf("%.4f",$cov30/$cov1) : 0; $data{$sam}{Con}=$adj30; $flag=0; } } open my $wfh,'>',$OUT or die "Cannot write file:$OUT $!\n"; print $wfh join("\t",'#Sample',@keys),"\tAdjust_30X\n"; for my $sam (sort {$data{$a}{TARGET_TERRITORY}<=> $data{$b}{TARGET_TERRITORY} } @samples){ print $wfh $sam; for my $key(@keys){ print $wfh "\t",$data{$sam}{$key}; } print $wfh "\t",$data{$sam}{Con},"\n"; }