fastp2tab.pl 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. use JSON::Parse 'json_file_to_perl';
  5. use File::Find;
  6. use Getopt::Std;
  7. use File::Basename;
  8. my $usage="Usage:\n\tperl $0 outfile\n
  9. Options:\n
  10. -d input directory. default '.' .
  11. -x suffix,iSNP tab file ending with. default .fastp.json.
  12. -s If Scan is required.True for YES,False for NO. default 1 (YES) . \n\n";
  13. my ($in_dir,$scan,$suffix);
  14. my %opts = (d => '.', s=>1, x=> '.fastp.json');
  15. my %opts1;
  16. getopts(':d:q:s:x:', \%opts1);
  17. for my $key (keys %opts1){
  18. my $val=$opts1{$key};
  19. $opts{$key}=$val;
  20. }
  21. for my $key (keys %opts){
  22. my $val=$opts{$key};
  23. if($key eq 'd'){
  24. $in_dir=$val;
  25. print "Input directory: $val .\n";
  26. }
  27. if($key eq 'x'){
  28. $suffix=$val;
  29. print "Input files suffix : $val .\n";
  30. }
  31. if($key eq 's'){
  32. $scan=$val;
  33. print "Directory Scan is required.\n" if $scan;
  34. print "Sinple glob $in_dir for VariantsToTable outputs.\n" unless $scan;
  35. }
  36. }
  37. my @files;
  38. if($scan){
  39. find(sub {
  40. push @files,$File::Find::name if -f and /$suffix$/;
  41. }, $in_dir.'/');
  42. }else{
  43. @files=($in_dir);
  44. }
  45. @files=sort @files;
  46. unless (@ARGV){
  47. print $usage;
  48. exit;
  49. }
  50. my $out=shift;
  51. open my $wfh,'>' ,$out or die "Cannot open file:$!\n";
  52. my @key=('total_reads','total_bases','q20','q30','GC');
  53. my @n_key=map {/total_reads/ ? $_.' (M)': $_ } @key;
  54. @n_key=map {/total_bases/ ? $_.' (G)': $_ } @n_key;
  55. print $wfh join("\t",'Sample',@n_key , (map {if(/total/){
  56. my $k=$_;
  57. $k=~s/total/clean/;
  58. $k;
  59. }else{'clean_'.$_
  60. } } @n_key) ,'Clean_rate_read','Clean_rate_base'),"\n";
  61. for my $file(@files){
  62. my $json_f=$file;
  63. my $sample=basename $file;
  64. $sample=~s/.fastp.json//;
  65. my $decoded_json = json_file_to_perl( $json_f );
  66. for my $key(keys %{$decoded_json}){
  67. next unless $key eq 'summary';
  68. my $sum=${$decoded_json}{$key};
  69. my $b_reads=sprintf "%.2f", ${$sum}{'before_filtering'}{'total_reads'}/1000000;
  70. my $b_bases=sprintf "%.2f", ${$sum}{'before_filtering'}{'total_bases'}/1000000000;
  71. my $b_q20=sprintf "%.2f", ${$sum}{'before_filtering'}{'q20_rate'};
  72. my $b_q30=sprintf "%.2f", ${$sum}{'before_filtering'}{'q30_rate'};
  73. my $b_gc=sprintf "%.3f", ${$sum}{'before_filtering'}{'gc_content'};
  74. my $a_reads=sprintf "%.2f", ${$sum}{'after_filtering'}{'total_reads'}/1000000;
  75. my $a_bases=sprintf "%.2f", ${$sum}{'after_filtering'}{'total_bases'}/1000000000;
  76. my $a_q20=sprintf "%.2f", ${$sum}{'after_filtering'}{'q20_rate'};
  77. my $a_q30=sprintf "%.2f", ${$sum}{'after_filtering'}{'q30_rate'};
  78. my $a_gc=sprintf "%.3f", ${$sum}{'after_filtering'}{'gc_content'};
  79. my $clean_r=sprintf "%.2f",${$sum}{'after_filtering'}{'total_reads'}/${$sum}{'before_filtering'}{'total_reads'};
  80. my $clean_b=sprintf "%.2f",${$sum}{'after_filtering'}{'total_bases'}/${$sum}{'before_filtering'}{'total_bases'};
  81. print $wfh join("\t",$sample,$b_reads,$b_bases,$b_q20,$b_q30,$b_gc,$a_reads,$a_bases,$a_q20,$a_q30,$a_gc,$clean_r,$clean_b),"\n";
  82. }
  83. }