seg_merge.pl 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. use Getopt::Long;
  5. my $segf=shift;
  6. my $th0=0.2;
  7. my $th1=0.7;
  8. my $th3=1.3;
  9. my $th4=1.7;
  10. my $out=$segf.'.xls';
  11. my $verbose=1;
  12. my $nprobe_th=5;
  13. sub parse_arguments{
  14. my $result = GetOptions (
  15. "Hom_del=s" => \$th0,
  16. "Het_del=s" => \$th1,
  17. "Het_amp=s" => \$th3,
  18. "Hom_amp=s" => \$th4,
  19. "output-basename=s" => \$out,
  20. "FP_CNV_Nprobes_threshold=i" => \$nprobe_th,
  21. "verbose=s" => \$verbose,
  22. );
  23. }
  24. my ($pre_cont, $pre_chr,$pre_type,$pre_ratio);
  25. open my $infh, '<', $segf or die "Cannot open file:$segf $!\n";
  26. open my $wfh , '>' , $out or die "Cannot write file:$! \n";
  27. while(<$infh>){
  28. chomp;
  29. if(/^Sample\s+/){
  30. print $wfh $_,"\tNum_merge\n";
  31. next;
  32. }
  33. my ($sample,$chr,$start,$end,$nprob,$seg)=split /\t/;
  34. my $type='';
  35. if($seg<$th0){
  36. $type='Hom_del';
  37. }elsif($seg < $th1){
  38. $type='Het_del';
  39. }elsif($seg < $th3){
  40. $type='Neutral';
  41. }elsif($seg < $th4){
  42. $type='Het_amp';
  43. }else{
  44. $type='Hom_amp';
  45. }
  46. my $first=0; ## filter FP CNV calls by Number of probes
  47. if($chr=~/Y/ and $start> 22800000){
  48. print "Omit AZFc CNV.\n";
  49. }elsif($nprob <= $nprobe_th and $seg > $th0 ){
  50. if ($chr and $chr eq $pre_chr){ # FP in chr
  51. $type=$pre_type;
  52. $seg=$pre_ratio;
  53. }else{ # FP at chr start
  54. $first=1;
  55. print $_,"\n";
  56. }
  57. }
  58. if( ! $pre_cont){
  59. $pre_cont=$_."\n";
  60. }elsif( ! $pre_type){ # pre is FP at chr start
  61. $pre_cont .= $_."\n";
  62. }elsif($pre_chr ne $chr or $type ne $pre_type or abs($seg-$pre_ratio)>0.5 ){
  63. print $wfh merge_cont($pre_cont),"\n";
  64. $pre_cont=$_."\n";
  65. }else{
  66. $pre_cont .= $_."\n";
  67. }
  68. $pre_type=$first ? '' : $type;
  69. $pre_chr=$chr;
  70. $pre_ratio=merge_cont($pre_cont,1);
  71. }
  72. print $wfh merge_cont($pre_cont),"\n";
  73. sub merge_cont{
  74. my $cont=shift;
  75. my $seg=shift;
  76. my ($tot_prob, $tot_seg, $num, $seg_st, $seg_end)=(0, 0, 0, 0, 0);
  77. my ( $sam,$chrs);
  78. for my $line(split /\n/,$cont){
  79. my ($sample,$chr,$start,$end,$nprob,$seg)=split /\t/,$line;
  80. $tot_prob+= $nprob;
  81. $tot_seg += $nprob*$seg;
  82. $num++;
  83. $seg_st=$start if $num==1;
  84. $seg_st =$seg_st > $start ? $start : $seg_st;
  85. $seg_end= $seg_end > $end ? $seg_end : $end ;
  86. $sam=$sample;
  87. $chrs=$chr;
  88. }
  89. my $avg_seg= sprintf("%.2f",$tot_seg/$tot_prob);
  90. if(defined $seg){
  91. return $avg_seg;
  92. }else{
  93. return join("\t",$sam,$chrs,$seg_st,$seg_end,$tot_prob,$avg_seg,$num);
  94. }
  95. }