chaowen.xi
/
602panel_test


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
							#!/usr/bin/perl

use strict;
use warnings;
use JSON::Parse 'json_file_to_perl';
use File::Find;
use List::Util qw(sum0);
use Getopt::Std;
use File::Basename;

my $usage="Usage:\n\tperl $0 outfile\n
	Options:\n
    -d	input directory. default '.' .
    -x	suffix,iSNP tab file ending with. default .fastp.json.
    -s  If Scan is required.True for YES,False for NO. default 1 (YES) . \n\n"; 
my ($in_dir,$scan,$suffix);  
my %opts = (d => '.', s=>1, x=> '.fastp.json');
my %opts1;
getopts(':d:q:s:x:', \%opts1);

for my $key (keys %opts1){
	my $val=$opts1{$key};
	$opts{$key}=$val;
}   
for my $key (keys %opts){
	my $val=$opts{$key};
	if($key eq 'd'){
		$in_dir=$val;
		print "Input directory: $val .\n";
		}
	if($key eq 'x'){
		$suffix=$val;
		print "Input files suffix : $val .\n";
		}  	    
    if($key eq 's'){
        $scan=$val;
        print "Directory Scan is required.\n" if $scan;
        print "Sinple glob $in_dir for VariantsToTable outputs.\n" unless $scan;
        }
}     
my @files;
if($scan){
    find(sub {
		push @files,$File::Find::name if  -f and /$suffix$/;
		}, $in_dir.'/');
    }else{
    @files=($in_dir);
}
@files=sort @files;
unless (@ARGV){
	print $usage;
	exit;
}
my $out=shift;
open my $wfh,'>' ,$out or die "Cannot open file:$!\n";

my @key=('total_reads','total_bases','q20','q30','GC');
my @n_key=map {/total_reads/ ? $_.' (M)': $_ } @key;
@n_key=map {/total_bases/ ? $_.' (G)': $_ } @n_key;
print $wfh join("\t",'Sample',@n_key , (map {if(/total/){
    my $k=$_;
	$k=~s/total/clean/;
    $k;
	}else{'clean_'.$_
	}  } @n_key) ,'F1_AT_Dev','F2_AT_Dev','Clean_rate_read','Clean_rate_base'),"\n";

for my $file(@files){
	my $json_f=$file;
	my $sample=basename $file;
	$sample=~s/.fastp.json//;
	my $decoded_json = json_file_to_perl( $json_f );
    my ($f1_gc_dev,$f2_gc_dev)=(0,0);
	for my $key(sort keys %{$decoded_json}){
        if ($key eq 'read1_after_filtering'){
            my $sum=${$decoded_json}{$key};
            $f1_gc_dev=get_dev($sum);
        }
        if ($key eq 'read2_after_filtering'){
            my $sum=${$decoded_json}{$key};
            $f2_gc_dev=get_dev($sum);
        }
		next unless $key eq 'summary';
		my $sum=${$decoded_json}{$key};

		my $b_reads=sprintf "%.2f", ${$sum}{'before_filtering'}{'total_reads'}/1000000;
		my $b_bases=sprintf "%.2f", ${$sum}{'before_filtering'}{'total_bases'}/1000000000;
		my $b_q20=sprintf "%.2f", ${$sum}{'before_filtering'}{'q20_rate'};
		my $b_q30=sprintf "%.2f", ${$sum}{'before_filtering'}{'q30_rate'};
		my $b_gc=sprintf "%.3f", ${$sum}{'before_filtering'}{'gc_content'};
		my $a_reads=sprintf "%.2f", ${$sum}{'after_filtering'}{'total_reads'}/1000000;
		my $a_bases=sprintf "%.2f", ${$sum}{'after_filtering'}{'total_bases'}/1000000000;
		my $a_q20=sprintf "%.2f", ${$sum}{'after_filtering'}{'q20_rate'};
		my $a_q30=sprintf "%.2f", ${$sum}{'after_filtering'}{'q30_rate'};
		my $a_gc=sprintf "%.3f", ${$sum}{'after_filtering'}{'gc_content'};
		my $clean_r=sprintf "%.2f",${$sum}{'after_filtering'}{'total_reads'}/${$sum}{'before_filtering'}{'total_reads'};
		my $clean_b=sprintf "%.2f",${$sum}{'after_filtering'}{'total_bases'}/${$sum}{'before_filtering'}{'total_bases'};
		print $wfh join("\t",$sample,$b_reads,$b_bases,$b_q20,$b_q30,$b_gc,
                             $a_reads,$a_bases,$a_q20,$a_q30,$a_gc,$f1_gc_dev,$f2_gc_dev,$clean_r,$clean_b),"\n";
	}
}

sub get_dev{
    my $sum=shift;
    my @G_conts=@{${$sum}{'content_curves'}{'T'}};
    my @C_conts=@{${$sum}{'content_curves'}{'A'}};
    my $G_avg=sum0(@G_conts[10..$#G_conts])/($#G_conts-10);
    my $C_avg=sum0(@C_conts[10..$#C_conts])/($#C_conts-10);
    my $deno=$G_avg>$C_avg ? $G_avg:$C_avg;
    my $f1_gc=sprintf("%.3f",abs(($G_avg-$C_avg)/$deno ) );
    return $f1_gc;
}