prep_cnv_Mref.pl 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. use File::Basename;
  5. my $usage="\n\tUsage: perl $0 output_file [input_dir] [M.sample.list]
  6. \tdefault input directory '.'; If sample.list file provided, Only
  7. \tsample in sample.list included; otherwise all file ending with
  8. \t*.pcov under directory input_dir are included.\n";
  9. if(@ARGV<1){
  10. print $usage,"\n";
  11. exit;
  12. }
  13. my $outf=shift;
  14. my $dir=shift;
  15. my $in_dir=defined $dir ? $dir : '.';
  16. my $samf=shift;
  17. my @files;
  18. if(defined $samf){
  19. open my $samfh, '<' , $samf or die "Cannot open file: $samf $!\n";
  20. while(<$samfh>){
  21. chomp;
  22. next if /^REF/;
  23. next if /^NA/;
  24. push @files, $in_dir.'/'.$_.'.pcov';
  25. }
  26. }else{
  27. @files=<$in_dir/*.pcov>;
  28. }
  29. my (%data,@samples,%cnts);
  30. for my $file (@files){
  31. next if $file =~ /\.ns\.pcov/;
  32. my $sam=basename $file;
  33. $sam=~s/\.pcov//;
  34. push @samples,$sam;
  35. open my $fh, '<', $file or die "Cannot open file:$! $file.\n";
  36. while(<$fh>){
  37. next if /^#/;
  38. next if /^contig/;
  39. chomp;
  40. my ($chr,$start,$end,$info,$val)=split /\t/;
  41. $cnts{$sam}{line}++;
  42. $cnts{$sam}{total} += $val;
  43. if($chr=~/X/){
  44. if($start > 2700000 and $start < 154000000){
  45. $data{$sam}{X_total}+=$val;
  46. $data{$sam}{X_line}++;
  47. }
  48. }elsif($chr=~/Y/){
  49. if( ($start>14300000 and $start< 15200000 ) or
  50. ($start>19300000 and $start< 24000000 ) ){
  51. $data{$sam}{AZFab_total}+=$val;
  52. $data{$sam}{AZFab_line}++;
  53. }elsif( $start>24000000 and $start< 28500000 ){
  54. $data{$sam}{AZFc_total}+=$val;
  55. $data{$sam}{AZFc_line}++;
  56. }else{
  57. $data{$sam}{Y_total}+=$val;
  58. $data{$sam}{Y_line}++;
  59. }
  60. }
  61. }
  62. close($fh);
  63. }
  64. open my $wfh,'>', $outf or die "Cannot open $outf.\n";
  65. print $wfh join("\t",'Sample', 'X','AZFab','AZFc','Y'),"\n";
  66. for my $sam(@samples){
  67. my $avg=$cnts{$sam}{total}/$cnts{$sam}{line};
  68. my $val_X=$data{$sam}{X_total}/$data{$sam}{X_line};
  69. my $val_AZFab=$data{$sam}{AZFab_total}/$data{$sam}{AZFab_line};
  70. my $val_AZFc=$data{$sam}{AZFc_total}/$data{$sam}{AZFc_line};
  71. my $val_Y=$data{$sam}{Y_total}/$data{$sam}{Y_line};
  72. print $wfh $sam,"\t",sprintf("%.2f",$val_X/$avg),"\t",sprintf("%.2f",$val_AZFab/$avg),
  73. "\t",sprintf("%.2f",$val_AZFc/$avg),"\t",sprintf("%.2f",$val_Y/$avg),"\n";
  74. }