yanyan.lv
/
prod_reproduction


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
							#!/usr/bin/perl

use strict;
use warnings;
use JSON::Parse 'json_file_to_perl';
use File::Find;
use Getopt::Std;
use File::Basename;

my $usage="Usage:\n\tperl $0 outfile\n
	Options:\n
    -d	input directory. default '.' .
    -x	suffix,iSNP tab file ending with. default .fastp.json.
    -s  If Scan is required.True for YES,False for NO. default 1 (YES) . \n\n"; 
my ($in_dir,$scan,$suffix);  
my %opts = (d => '.', s=>1, x=> '.fastp.json');
my %opts1;
getopts(':d:q:s:x:', \%opts1);

for my $key (keys %opts1){
	my $val=$opts1{$key};
	$opts{$key}=$val;
}   
for my $key (keys %opts){
	my $val=$opts{$key};
	if($key eq 'd'){
		$in_dir=$val;
		print "Input directory: $val .\n";
		}
	if($key eq 'x'){
		$suffix=$val;
		print "Input files suffix : $val .\n";
		}  	    
    if($key eq 's'){
        $scan=$val;
        print "Directory Scan is required.\n" if $scan;
        print "Sinple glob $in_dir for VariantsToTable outputs.\n" unless $scan;
        }
}     
my @files;
if($scan){
    find(sub {
		push @files,$File::Find::name if  -f and /$suffix$/;
		}, $in_dir.'/');
    }else{
    @files=($in_dir);
}
@files=sort @files;
unless (@ARGV){
	print $usage;
	exit;
}
my $out=shift;
open my $wfh,'>' ,$out or die "Cannot open file:$!\n";

my @key=('total_reads','total_bases','q20','q30','GC');
my @n_key=map {/total_reads/ ? $_.' (M)': $_ } @key;
@n_key=map {/total_bases/ ? $_.' (G)': $_ } @n_key;
print $wfh join("\t",'Sample',@n_key , (map {if(/total/){
    my $k=$_;
	$k=~s/total/clean/;
    $k;
	}else{'clean_'.$_
	}  } @n_key) ,'Clean_rate_read','Clean_rate_base'),"\n";

for my $file(@files){
	my $json_f=$file;
	my $sample=basename $file;
	$sample=~s/.fastp.json//;
	my $decoded_json = json_file_to_perl( $json_f );

	for my $key(keys %{$decoded_json}){
		next unless $key eq 'summary';
		my $sum=${$decoded_json}{$key};

		my $b_reads=sprintf "%.2f", ${$sum}{'before_filtering'}{'total_reads'}/1000000;
		my $b_bases=sprintf "%.2f", ${$sum}{'before_filtering'}{'total_bases'}/1000000000;
		my $b_q20=sprintf "%.2f", ${$sum}{'before_filtering'}{'q20_rate'};
		my $b_q30=sprintf "%.2f", ${$sum}{'before_filtering'}{'q30_rate'};
		my $b_gc=sprintf "%.3f", ${$sum}{'before_filtering'}{'gc_content'};
		my $a_reads=sprintf "%.2f", ${$sum}{'after_filtering'}{'total_reads'}/1000000;
		my $a_bases=sprintf "%.2f", ${$sum}{'after_filtering'}{'total_bases'}/1000000000;
		my $a_q20=sprintf "%.2f", ${$sum}{'after_filtering'}{'q20_rate'};
		my $a_q30=sprintf "%.2f", ${$sum}{'after_filtering'}{'q30_rate'};
		my $a_gc=sprintf "%.3f", ${$sum}{'after_filtering'}{'gc_content'};
		my $clean_r=sprintf "%.2f",${$sum}{'after_filtering'}{'total_reads'}/${$sum}{'before_filtering'}{'total_reads'};
		my $clean_b=sprintf "%.2f",${$sum}{'after_filtering'}{'total_bases'}/${$sum}{'before_filtering'}{'total_bases'};
		print $wfh join("\t",$sample,$b_reads,$b_bases,$b_q20,$b_q30,$b_gc,$a_reads,$a_bases,$a_q20,$a_q30,$a_gc,$clean_r,$clean_b),"\n";
	}

}