1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- #!/usr/bin/perl
- use strict;
- use warnings;
- use File::Basename;
- my $usage="\n\tUsage: perl $0 output_file [input_dir] [M.sample.list]
- \tdefault input directory '.'; If sample.list file provided, Only
- \tsample in sample.list included; otherwise all file ending with
- \t*.pcov under directory input_dir are included.\n";
- if(@ARGV<1){
- print $usage,"\n";
- exit;
- }
- my $outf=shift;
- my $dir=shift;
- my $in_dir=defined $dir ? $dir : '.';
- my $samf=shift;
- my @files;
- if(defined $samf){
- open my $samfh, '<' , $samf or die "Cannot open file: $samf $!\n";
- while(<$samfh>){
- chomp;
- next if /^REF/;
- next if /^NA/;
- push @files, $in_dir.'/'.$_.'.pcov';
- }
- }else{
- @files=<$in_dir/*.pcov>;
- }
- my (%data,@samples,%cnts);
- for my $file (@files){
- next if $file =~ /\.ns\.pcov/;
- my $sam=basename $file;
- $sam=~s/\.pcov//;
- push @samples,$sam;
- open my $fh, '<', $file or die "Cannot open file:$! $file.\n";
- while(<$fh>){
- next if /^#/;
- next if /^contig/;
- chomp;
- my ($chr,$start,$end,$info,$val)=split /\t/;
- $cnts{$sam}{line}++;
- $cnts{$sam}{total} += $val;
- if($chr=~/X/){
- if($start > 2700000 and $start < 154000000){
- $data{$sam}{X_total}+=$val;
- $data{$sam}{X_line}++;
- }
- }elsif($chr=~/Y/){
- if( ($start>14300000 and $start< 15200000 ) or
- ($start>19300000 and $start< 24000000 ) ){
- $data{$sam}{AZFab_total}+=$val;
- $data{$sam}{AZFab_line}++;
- }elsif( $start>24000000 and $start< 28500000 ){
- $data{$sam}{AZFc_total}+=$val;
- $data{$sam}{AZFc_line}++;
- }else{
- $data{$sam}{Y_total}+=$val;
- $data{$sam}{Y_line}++;
- }
- }
- }
- close($fh);
- }
- open my $wfh,'>', $outf or die "Cannot open $outf.\n";
- print $wfh join("\t",'Sample', 'X','AZFab','AZFc','Y'),"\n";
- for my $sam(@samples){
- my $avg=$cnts{$sam}{total}/$cnts{$sam}{line};
- my $val_X=$data{$sam}{X_total}/$data{$sam}{X_line};
- my $val_AZFab=$data{$sam}{AZFab_total}/$data{$sam}{AZFab_line};
- my $val_AZFc=$data{$sam}{AZFc_total}/$data{$sam}{AZFc_line};
- my $val_Y=$data{$sam}{Y_total}/$data{$sam}{Y_line};
- print $wfh $sam,"\t",sprintf("%.2f",$val_X/$avg),"\t",sprintf("%.2f",$val_AZFab/$avg),
- "\t",sprintf("%.2f",$val_AZFc/$avg),"\t",sprintf("%.2f",$val_Y/$avg),"\n";
- }
|