com_cov_norm.pl 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. use File::Basename;
  5. my $usage="\n\tUsage: perl $0 output_file [input_dir] [sample.list]
  6. \tdefault input directory '.'; If sample.list file provided, Only
  7. \tsample in sample.list included; otherwise all file ending with
  8. \t*.pcov under directory input_dir are included.\n";
  9. if(@ARGV<1){
  10. print $usage,"\n";
  11. exit;
  12. }
  13. my $outf=shift;
  14. my $dir=shift;
  15. my $in_dir=defined $dir ? $dir : '.';
  16. my $samf=shift;
  17. my @files;
  18. if(defined $samf){
  19. open my $samfh, '<' , $samf or die "Cannot open file: $samf $!\n";
  20. while(<$samfh>){
  21. chomp;
  22. next if /^REF/;
  23. next if /^NA/;
  24. push @files, $in_dir.'/'.$_.'.pcov';
  25. }
  26. }else{
  27. @files=<$in_dir/*.pcov>;
  28. }
  29. my (%data,@samples,%cnts);
  30. for my $file (@files){
  31. next if $file =~ /\.ns\.pcov/;
  32. my $sam=basename $file;
  33. $sam=~s/\.pcov//;
  34. push @samples,$sam;
  35. open my $fh, '<', $file or die "Cannot open file:$! $file.\n";
  36. while(<$fh>){
  37. next if /^#/;
  38. next if /^contig/;
  39. chomp;
  40. my ($chr,$start,$end,$info,$val)=split /\t/;
  41. my $gene=get_uniq($info);
  42. my $id=join("\t",$chr,$start,$end,' '.$gene);
  43. $data{$id}{$sam}=$val;
  44. $cnts{line}{$sam}++;
  45. $cnts{total}{$sam} += $val;
  46. }
  47. close($fh);
  48. }
  49. open my $wfh,'>', $outf or die "Cannot open $outf.\n";
  50. {
  51. print $wfh join("\t",'chrom', 'start','end','name');
  52. for my $sam(@samples){
  53. print $wfh "\t",$sam;
  54. }
  55. print $wfh "\n";
  56. }
  57. for my $id(map { $_->[0] }
  58. sort{
  59. $a->[1] cmp $b->[1] or
  60. $a->[2] <=> $b->[2]
  61. }
  62. (map [$_, (split /\t/)[0], (split /\t/)[1] ], keys %data) ){
  63. print $wfh $id;
  64. for my $sam(@samples){
  65. my $nline=$cnts{line}{$sam};
  66. my $total=$cnts{total}{$sam};
  67. my $val=exists $data{$id}{$sam}? $data{$id}{$sam} : 0;
  68. print $wfh "\t",sprintf("%.2f",$nline*$val/$total)
  69. }
  70. print $wfh "\n";
  71. }
  72. ;
  73. sub get_uniq{
  74. my $str=shift;
  75. my (%hash);
  76. my $flag=0;
  77. while($str=~/\(([^)]+)\)/g){
  78. $hash{$1}=undef;
  79. $flag=1;
  80. }
  81. if($flag==1){
  82. return join(",",sort keys %hash);
  83. }else{
  84. my $new=$str;
  85. $new=~s/_\d+$//;
  86. $new=~/ref\|([^,]+),/;
  87. if($1){
  88. return $1;
  89. }else{
  90. return $new;
  91. }
  92. }
  93. }