seg_merge.v0.pl 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. use Getopt::Long;
  5. my $segf=shift;
  6. my $th0=0.2;
  7. my $th1=0.7;
  8. my $th3=1.3;
  9. my $th4=1.7;
  10. my $out=$segf.'.xls';
  11. my $verbose=1;
  12. sub parse_arguments{
  13. my $result = GetOptions (
  14. "Hom_del=s" => \$th0,
  15. "Het_del=s" => \$th1,
  16. "Het_amp=s" => \$th3,
  17. "Hom_amp=s" => \$th4,
  18. "output-basename=s" => \$out,
  19. "verbose=s" => \$verbose,
  20. );
  21. }
  22. my ($pre_cont, $pre_chr,$pre_type,$pre_ratio);
  23. open my $infh, '<', $segf or die "Cannot open file:$segf $!\n";
  24. open my $wfh , '>' , $out or die "Cannot write file:$! \n";
  25. while(<$infh>){
  26. chomp;
  27. if(/^Sample\s+/){
  28. print $wfh $_,"\tNum_merge\n";
  29. next;
  30. }
  31. my ($sample,$chr,$start,$end,$nprob,$seg)=split /\t/;
  32. my $type='';
  33. if($seg<$th0){
  34. $type='Hom_del';
  35. }elsif($seg < $th1){
  36. $type='Het_del';
  37. }elsif($seg < $th3){
  38. $type='Neutral';
  39. }elsif($seg < $th4){
  40. $type='Het_amp';
  41. }else{
  42. $type='Hom_amp';
  43. }
  44. if( ! $pre_cont){
  45. $pre_cont=$_."\n";
  46. }elsif($pre_chr ne $chr or $type ne $pre_type or abs($seg-$pre_ratio)>0.5 ){
  47. print $wfh merge_cont($pre_cont),"\n";
  48. $pre_cont=$_."\n";
  49. }else{
  50. $pre_cont .= $_."\n";
  51. }
  52. $pre_type=$type;
  53. $pre_chr=$chr;
  54. $pre_ratio=merge_cont($pre_cont,1);
  55. }
  56. print $wfh merge_cont($pre_cont),"\n";
  57. sub merge_cont{
  58. my $cont=shift;
  59. my $seg=shift;
  60. my ($tot_prob, $tot_seg, $num, $seg_st, $seg_end)=(0, 0, 0, 0, 0);
  61. my ( $sam,$chrs);
  62. for my $line(split /\n/,$cont){
  63. my ($sample,$chr,$start,$end,$nprob,$seg)=split /\t/,$line;
  64. $tot_prob+= $nprob;
  65. $tot_seg += $nprob*$seg;
  66. $num++;
  67. $seg_st=$start if $num==1;
  68. $seg_st =$seg_st > $start ? $start : $seg_st;
  69. $seg_end= $seg_end > $end ? $seg_end : $end ;
  70. $sam=$sample;
  71. $chrs=$chr;
  72. }
  73. my $avg_seg= sprintf("%.2f",$tot_seg/$tot_prob);
  74. if(defined $seg){
  75. return $avg_seg;
  76. }else{
  77. return join("\t",$sam,$chrs,$seg_st,$seg_end,$tot_prob,$avg_seg,$num);
  78. }
  79. }