yanyan.lv
/
prod_reproduction


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
							#!/usr/bin/perl

use strict;
use warnings;
use File::Basename;

my $usage="\n\tUsage: perl $0 output_file [input_dir] [sample.list]
\tdefault input directory '.'; If sample.list file provided, Only
\tsample in sample.list included; otherwise all file ending with
\t*.pcov under directory input_dir are included.\n";

if(@ARGV<1){
    print $usage,"\n";
    exit;
}

my $outf=shift;
my $dir=shift;
my $in_dir=defined $dir ? $dir : '.';
my $samf=shift;

my @files;
if(defined $samf){
    open my $samfh, '<' , $samf or die "Cannot open file: $samf $!\n";
    while(<$samfh>){
        chomp;
        next if /^REF/;
        next if /^NA/;
        push @files, $in_dir.'/'.$_.'.pcov';
    }
}else{
    @files=<$in_dir/*.pcov>;
}

my (%data,@samples,%cnts);

for my $file (@files){
	next if $file =~ /\.ns\.pcov/;
	my $sam=basename $file;
	$sam=~s/\.pcov//;
	push @samples,$sam;
	open my $fh, '<', $file or die "Cannot open file:$! $file.\n";
	while(<$fh>){
		next if /^#/;
		next if /^contig/;
		chomp;
		my ($chr,$start,$end,$info,$val)=split /\t/;		
		my $gene=get_uniq($info);
		my $id=join("\t",$chr,$start,$end,' '.$gene);
		$data{$id}{$sam}=$val;
		$cnts{line}{$sam}++;
		$cnts{total}{$sam} += $val;
	}
	close($fh);
}
open my $wfh,'>', $outf or die "Cannot open $outf.\n";

{
	print $wfh join("\t",'chrom', 'start','end','name');
	for my $sam(@samples){
		print $wfh "\t",$sam;
	}
	print $wfh "\n";
}

for my $id(map { $_->[0] }
		sort{
			$a->[1] cmp $b->[1] or
			$a->[2] <=> $b->[2] 
			}
		(map [$_, (split /\t/)[0], (split /\t/)[1] ], keys %data) ){
	print $wfh $id;
	for my $sam(@samples){
		my $nline=$cnts{line}{$sam};
		my $total=$cnts{total}{$sam};
		my $val=exists $data{$id}{$sam}? $data{$id}{$sam} : 0;
		print $wfh "\t",sprintf("%.2f",$nline*$val/$total)
	}
	print $wfh "\n";
}

;
sub get_uniq{
	my $str=shift;
	my (%hash);
	my $flag=0;
	while($str=~/\(([^)]+)\)/g){
		$hash{$1}=undef;
		$flag=1;
	}
	if($flag==1){
		return join(",",sort keys %hash);
	}else{
		my $new=$str;
		$new=~s/_\d+$//;
		$new=~/ref\|([^,]+),/;
		if($1){
			return $1;
		}else{
		return $new;
		
		}
	}
}