123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102 |
- #!/usr/bin/perl
- use strict;
- use warnings;
- use Getopt::Std;
- use File::Basename;
- use File::Find;
- my $usage="Usage:\n\tperl $0 outfile\n
- Options:\n
- -d input directory. default '.' .
- -s If Scan is required.True for YES,False for NO. default 1 (YES) .
- \n";
- my ($in_dir,$scan);
- my %opts = (d => '.', s=>1);
- my %opts1;
- unless (@ARGV){
- print $usage;
- exit;
- }
- getopts(':d:s:', \%opts1);
- my $OUT=shift;
- for my $key (keys %opts1){
- my $val=$opts1{$key};
- $opts{$key}=$val;
- }
- for my $key (keys %opts){
- my $val=$opts{$key};
- if($key eq 'd'){
- $in_dir=$val;
- print "Input directory: $val .\n";
- }
- if($key eq 's'){
- $scan=$val;
- print "Directory Scan is required.\n" if $scan;
- print "Sinple glob $in_dir for *_stats.\n" unless $scan;
- }
- }
- my @files;
- if($scan){
- find(sub {
- push @files,$File::Find::name if -f and /_hs_metrics.txt$/;
- }, $in_dir);
- print "sanning...\n";
- }else{
- @files=<$in_dir/*_hs_metrics.txt>;
- print "No sanning...\n";
- }
- my (@samples,%data);
- my @keys=qw(TARGET_TERRITORY TOTAL_READS PCT_PF_UQ_READS PCT_PF_UQ_READS_ALIGNED
- PCT_SELECTED_BASES MEAN_TARGET_COVERAGE PCT_USABLE_BASES_ON_TARGET FOLD_80_BASE_PENALTY
- PCT_TARGET_BASES_1X PCT_TARGET_BASES_2X PCT_TARGET_BASES_10X PCT_TARGET_BASES_20X PCT_TARGET_BASES_30X
- PCT_TARGET_BASES_40X PCT_TARGET_BASES_50X PCT_TARGET_BASES_100X AT_DROPOUT GC_DROPOUT);
- for my $file (sort @files){
- open my $fh, '<', $file or die "Cannot open file:$!";
- my $sam=basename $file;
- $sam=~s/_hs_metrics.txt//;
- push @samples,$sam;
- my $flag=0;
- my @head;
- while(<$fh>){
- chomp;
- next if /^#/ or /^\s*$/;
- if(/^BAIT_SET/){
- $flag=1;
- @head=split /\t/;
- next;
- }
- next unless $flag;
- my @conts=split /\t/;
- my ($cov30,$cov1);
- for my $i (0..$#conts){
- my $key=$head[$i];
- my $val=$conts[$i];
- $cov30=$val if $key eq 'PCT_TARGET_BASES_30X';
- $cov1=$val if $key eq 'PCT_TARGET_BASES_1X';
- if($val=~/^\d+\.\d+$/ ){
- $val= $val<100? sprintf("%.3f",$val):sprintf("%.0f",$val);
- }
- $data{$sam}{$key}=$val;
- }
- my $adj30=$cov1 ? sprintf("%.4f",$cov30/$cov1) : 0;
- $data{$sam}{Con}=$adj30;
- $flag=0;
- }
- }
- open my $wfh,'>',$OUT or die "Cannot write file:$OUT $!\n";
- print $wfh join("\t",'#Sample',@keys),"\tAdjust_30X\n";
- for my $sam (sort {$data{$a}{TARGET_TERRITORY}<=> $data{$b}{TARGET_TERRITORY} } @samples){
- print $wfh $sam;
- for my $key(@keys){
- print $wfh "\t",$data{$sam}{$key};
- }
- print $wfh "\t",$data{$sam}{Con},"\n";
- }
|