#!/usr/bin/perl use strict; use warnings; use JSON::Parse 'json_file_to_perl'; use File::Find; use Getopt::Std; use File::Basename; my $usage="Usage:\n\tperl $0 outfile\n Options:\n -d input directory. default '.' . -x suffix,iSNP tab file ending with. default .fastp.json. -s If Scan is required.True for YES,False for NO. default 1 (YES) . \n\n"; my ($in_dir,$scan,$suffix); my %opts = (d => '.', s=>1, x=> '.fastp.json'); my %opts1; getopts(':d:q:s:x:', \%opts1); for my $key (keys %opts1){ my $val=$opts1{$key}; $opts{$key}=$val; } for my $key (keys %opts){ my $val=$opts{$key}; if($key eq 'd'){ $in_dir=$val; print "Input directory: $val .\n"; } if($key eq 'x'){ $suffix=$val; print "Input files suffix : $val .\n"; } if($key eq 's'){ $scan=$val; print "Directory Scan is required.\n" if $scan; print "Sinple glob $in_dir for VariantsToTable outputs.\n" unless $scan; } } my @files; if($scan){ find(sub { push @files,$File::Find::name if -f and /$suffix$/; }, $in_dir.'/'); }else{ @files=($in_dir); } @files=sort @files; unless (@ARGV){ print $usage; exit; } my $out=shift; open my $wfh,'>' ,$out or die "Cannot open file:$!\n"; my @key=('total_reads','total_bases','q20','q30','GC'); my @n_key=map {/total_reads/ ? $_.' (M)': $_ } @key; @n_key=map {/total_bases/ ? $_.' (G)': $_ } @n_key; print $wfh join("\t",'Sample',@n_key , (map {if(/total/){ my $k=$_; $k=~s/total/clean/; $k; }else{'clean_'.$_ } } @n_key) ,'Clean_rate_read','Clean_rate_base'),"\n"; for my $file(@files){ my $json_f=$file; my $sample=basename $file; $sample=~s/.fastp.json//; my $decoded_json = json_file_to_perl( $json_f ); for my $key(keys %{$decoded_json}){ next unless $key eq 'summary'; my $sum=${$decoded_json}{$key}; my $b_reads=sprintf "%.2f", ${$sum}{'before_filtering'}{'total_reads'}/1000000; my $b_bases=sprintf "%.2f", ${$sum}{'before_filtering'}{'total_bases'}/1000000000; my $b_q20=sprintf "%.2f", ${$sum}{'before_filtering'}{'q20_rate'}; my $b_q30=sprintf "%.2f", ${$sum}{'before_filtering'}{'q30_rate'}; my $b_gc=sprintf "%.3f", ${$sum}{'before_filtering'}{'gc_content'}; my $a_reads=sprintf "%.2f", ${$sum}{'after_filtering'}{'total_reads'}/1000000; my $a_bases=sprintf "%.2f", ${$sum}{'after_filtering'}{'total_bases'}/1000000000; my $a_q20=sprintf "%.2f", ${$sum}{'after_filtering'}{'q20_rate'}; my $a_q30=sprintf "%.2f", ${$sum}{'after_filtering'}{'q30_rate'}; my $a_gc=sprintf "%.3f", ${$sum}{'after_filtering'}{'gc_content'}; my $clean_r=sprintf "%.2f",${$sum}{'after_filtering'}{'total_reads'}/${$sum}{'before_filtering'}{'total_reads'}; my $clean_b=sprintf "%.2f",${$sum}{'after_filtering'}{'total_bases'}/${$sum}{'before_filtering'}{'total_bases'}; print $wfh join("\t",$sample,$b_reads,$b_bases,$b_q20,$b_q30,$b_gc,$a_reads,$a_bases,$a_q20,$a_q30,$a_gc,$clean_r,$clean_b),"\n"; } }