trim_picard_hs.pl 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. use Getopt::Std;
  5. use File::Basename;
  6. use File::Find;
  7. my $usage="Usage:\n\tperl $0 outfile\n
  8. Options:\n
  9. -d input directory. default '.' .
  10. -s If Scan is required.True for YES,False for NO. default 1 (YES) .
  11. \n";
  12. my ($in_dir,$scan);
  13. my %opts = (d => '.', s=>1);
  14. my %opts1;
  15. unless (@ARGV){
  16. print $usage;
  17. exit;
  18. }
  19. getopts(':d:s:', \%opts1);
  20. my $OUT=shift;
  21. for my $key (keys %opts1){
  22. my $val=$opts1{$key};
  23. $opts{$key}=$val;
  24. }
  25. for my $key (keys %opts){
  26. my $val=$opts{$key};
  27. if($key eq 'd'){
  28. $in_dir=$val;
  29. print "Input directory: $val .\n";
  30. }
  31. if($key eq 's'){
  32. $scan=$val;
  33. print "Directory Scan is required.\n" if $scan;
  34. print "Sinple glob $in_dir for *_stats.\n" unless $scan;
  35. }
  36. }
  37. my @files;
  38. if($scan){
  39. find(sub {
  40. push @files,$File::Find::name if -f and /_hs_metrics.txt$/;
  41. }, $in_dir);
  42. print "sanning...\n";
  43. }else{
  44. @files=<$in_dir/*_hs_metrics.txt>;
  45. print "No sanning...\n";
  46. }
  47. my (@samples,%data);
  48. my @keys=qw(TARGET_TERRITORY TOTAL_READS PCT_PF_UQ_READS PCT_PF_UQ_READS_ALIGNED
  49. PCT_SELECTED_BASES MEAN_TARGET_COVERAGE PCT_USABLE_BASES_ON_TARGET FOLD_80_BASE_PENALTY
  50. PCT_TARGET_BASES_1X PCT_TARGET_BASES_2X PCT_TARGET_BASES_10X PCT_TARGET_BASES_20X PCT_TARGET_BASES_30X
  51. PCT_TARGET_BASES_40X PCT_TARGET_BASES_50X PCT_TARGET_BASES_100X AT_DROPOUT GC_DROPOUT);
  52. for my $file (sort @files){
  53. open my $fh, '<', $file or die "Cannot open file:$!";
  54. my $sam=basename $file;
  55. $sam=~s/_hs_metrics.txt//;
  56. push @samples,$sam;
  57. my $flag=0;
  58. my @head;
  59. while(<$fh>){
  60. chomp;
  61. next if /^#/ or /^\s*$/;
  62. if(/^BAIT_SET/){
  63. $flag=1;
  64. @head=split /\t/;
  65. next;
  66. }
  67. next unless $flag;
  68. my @conts=split /\t/;
  69. my ($cov30,$cov1);
  70. for my $i (0..$#conts){
  71. my $key=$head[$i];
  72. my $val=$conts[$i];
  73. $cov30=$val if $key eq 'PCT_TARGET_BASES_30X';
  74. $cov1=$val if $key eq 'PCT_TARGET_BASES_1X';
  75. if($val=~/^\d+\.\d+$/ ){
  76. $val= $val<100? sprintf("%.3f",$val):sprintf("%.0f",$val);
  77. }
  78. $data{$sam}{$key}=$val;
  79. }
  80. my $adj30=$cov1 ? sprintf("%.4f",$cov30/$cov1) : 0;
  81. $data{$sam}{Con}=$adj30;
  82. $flag=0;
  83. }
  84. }
  85. open my $wfh,'>',$OUT or die "Cannot write file:$OUT $!\n";
  86. print $wfh join("\t",'#Sample',@keys),"\tAdjust_30X\n";
  87. for my $sam (sort {$data{$a}{TARGET_TERRITORY}<=> $data{$b}{TARGET_TERRITORY} } @samples){
  88. print $wfh $sam;
  89. for my $key(@keys){
  90. print $wfh "\t",$data{$sam}{$key};
  91. }
  92. print $wfh "\t",$data{$sam}{Con},"\n";
  93. }