trim_cnv_result.pl 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. use utf8;
  5. #binmode(STDOUT,":encoding(gbk)");
  6. use open ":encoding(gbk)",":std";
  7. my $nb_gene=3;
  8. my $prefix=shift;
  9. my $inf= -e $prefix.'-M-cnv.input.xls' ? $prefix.'-M-cnv.input.xls' : $prefix.'-F-cnv.input.xls';
  10. my $annof=$prefix.'.cnv.anno.xls';
  11. my (%data);
  12. open my $in_fh, '<', $inf or die "Cannot open file:$!";
  13. while(<$in_fh>){
  14. next if /^Sample/;
  15. my ($chr,$start,$end,$n_probe)=(split /\t/)[2,3,4,5];
  16. my $id=join("\t",$chr,$start,$end);
  17. $data{$id}=$n_probe;
  18. }
  19. close($in_fh);
  20. open my $anno_fh, '<' , $annof or die "Cannot open file:$!";
  21. open my $w_fh, '>',$prefix.'.CNV.xls' or die "Cannot write file:$!" ;
  22. while(<$anno_fh>){
  23. chomp;
  24. if(/^Sample/){
  25. print $w_fh join("\t", "检出变异\tChr\tStart\tEnd\t变异类型\t变异长度\t包含基因\t重复/缺失",
  26. 'Num_Probes',"Sample\tGender\tCopy-ratio",
  27. "Band_region\tGene\tGeneReview\tDECIPHER\tClinVarCNV\tOMIM\tHGMD"),"\n";
  28. next;
  29. }
  30. #my ($chr,$start,$end,$n_probe)=(split /\t/)[2,3,4,5];
  31. my ($sample,$gender,$chr,$start,$end,$len,$ratio,
  32. $band,$gene,$gr,$dp,$cc,$omim,$hgmd)=split /\t/;
  33. my $id=join("\t",$chr,$start,$end);
  34. my $nprobe=$data{$id};
  35. my $cnv_type=$ratio>1? 'dup': 'del';
  36. my $hgvs=cnv_hgvs($chr,$start,$end,$band,$cnv_type);
  37. my $tlen=trim_len($len);
  38. my $re_gene=trim_gene($gene,$omim);
  39. $re_gene=~s/;/, /g;
  40. my $c_type=$ratio>1? '重复': '缺失';
  41. $gene=~s/^gene://;
  42. $gr=~s/^geneReview://;
  43. $dp=~s/^Decipher://;
  44. $cc=~s/^clinvarCnv://;
  45. $omim=~s/^OMIM://;
  46. $hgmd=~s/^HGMD://;
  47. my $sv_type='';
  48. if( $gender eq 'M' and ($chr=~/X/ or $chr=~/Y/) ){
  49. $sv_type= $ratio>1 ? '重复' : '半合子缺失';
  50. }elsif($ratio<0.2){
  51. $sv_type='纯合缺失';
  52. }else{
  53. $sv_type= $ratio>1 ? '重复' : '杂合缺失';
  54. }
  55. $chr= $chr=~/chr/ ? $chr : 'chr'.$chr ;
  56. print $w_fh join("\t",$hgvs,$chr,$start,$end,$sv_type,$tlen,$re_gene,$c_type,
  57. $nprobe,$sample,$gender,$ratio,$band,$gene,$gr,$dp,$cc,$omim,$hgmd),"\n";
  58. }
  59. close($anno_fh);
  60. sub cnv_hgvs{
  61. my ($chr,$start,$end, $band, $type,$ref)=@_;
  62. unless(defined $ref){
  63. $ref='GRCh37'
  64. }
  65. $chr=~s/chr//;
  66. $band=~s/^[^qp]+([pq])/$1/;
  67. return "seq[$ref] $type($chr)($band)chr$chr:g.$start".'_'."$end$type";
  68. }
  69. sub trim_len{
  70. my $len=shift;
  71. $len=int($len);
  72. if($len <1000){
  73. return $len.'bp';
  74. }elsif($len <1000000){
  75. my $len_kb=sprintf("%.1f", $len/1000);
  76. return "约".$len_kb.'kb';
  77. }elsif($len < 1000000000){
  78. my $len_mb=sprintf("%.1f", $len/1000000);
  79. return "约".$len_mb.'M' ;
  80. }else{
  81. return 'NA';
  82. }
  83. }
  84. sub trim_gene{
  85. my ($genes, $omim)=@_;
  86. my ($num,$fgene);
  87. $genes=~s/gene://;
  88. $num=0;
  89. $fgene='';
  90. my $o_genes=omim_gene($omim);
  91. for my $gene(split /;/,$o_genes){
  92. next if $gene eq '-';
  93. $num++;
  94. if($num< $nb_gene+1){
  95. $fgene= $fgene ? $fgene.';'.$gene : $gene;
  96. }else{
  97. return $fgene."等";
  98. }
  99. $genes=~s/$gene;//;
  100. }
  101. my $re_gene=$fgene.';'.$genes;
  102. $num=0;
  103. $fgene='';
  104. for my $gene(split /;/,$re_gene){
  105. next unless $gene;
  106. $num++;
  107. return $fgene."等" if $num==$nb_gene+1;
  108. $fgene= $fgene ? $fgene.';'.$gene : $gene;
  109. }
  110. return $fgene;
  111. }
  112. sub omim_gene{
  113. my $omim=shift;
  114. my ($genes,%gen);
  115. return '-' unless defined $omim;
  116. $omim=~s/OMIM://;
  117. $omim=~s/;\s*$//;
  118. return '' if$omim eq '-';
  119. for my $ann (split /;/, $omim ){
  120. my ($gene)=(split /&/,$ann)[-1];
  121. unless(exists $gen{$gene}){
  122. $genes= $genes ? $genes.';'.$gene: $gene;
  123. $gen{$gene}=undef;
  124. }
  125. }
  126. return $genes;
  127. }