#!/usr/bin/perl use strict; use warnings; use utf8; #binmode(STDOUT,":encoding(gbk)"); use open ":encoding(gbk)",":std"; my $nb_gene=3; my $prefix=shift; my $inf= -e $prefix.'-M-cnv.input.xls' ? $prefix.'-M-cnv.input.xls' : $prefix.'-F-cnv.input.xls'; my $annof=$prefix.'.cnv.anno.xls'; my (%data); open my $in_fh, '<', $inf or die "Cannot open file:$!"; while(<$in_fh>){ next if /^Sample/; my ($chr,$start,$end,$n_probe)=(split /\t/)[2,3,4,5]; my $id=join("\t",$chr,$start,$end); $data{$id}=$n_probe; } close($in_fh); open my $anno_fh, '<' , $annof or die "Cannot open file:$!"; open my $w_fh, '>',$prefix.'.CNV.xls' or die "Cannot write file:$!" ; while(<$anno_fh>){ chomp; if(/^Sample/){ print $w_fh join("\t", "检出变异\tChr\tStart\tEnd\t变异类型\t变异长度\t包含基因\t重复/缺失", 'Num_Probes',"Sample\tGender\tCopy-ratio", "Band_region\tGene\tGeneReview\tDECIPHER\tClinVarCNV\tOMIM\tHGMD"),"\n"; next; } #my ($chr,$start,$end,$n_probe)=(split /\t/)[2,3,4,5]; my ($sample,$gender,$chr,$start,$end,$len,$ratio, $band,$gene,$gr,$dp,$cc,$omim,$hgmd)=split /\t/; my $id=join("\t",$chr,$start,$end); my $nprobe=$data{$id}; my $cnv_type=$ratio>1? 'dup': 'del'; my $hgvs=cnv_hgvs($chr,$start,$end,$band,$cnv_type); my $tlen=trim_len($len); my $re_gene=trim_gene($gene,$omim); $re_gene=~s/;/, /g; my $c_type=$ratio>1? '重复': '缺失'; $gene=~s/^gene://; $gr=~s/^geneReview://; $dp=~s/^Decipher://; $cc=~s/^clinvarCnv://; $omim=~s/^OMIM://; $hgmd=~s/^HGMD://; my $sv_type=''; if( $gender eq 'M' and ($chr=~/X/ or $chr=~/Y/) ){ $sv_type= $ratio>1 ? '重复' : '半合子缺失'; }elsif($ratio<0.2){ $sv_type='纯合缺失'; }else{ $sv_type= $ratio>1 ? '重复' : '杂合缺失'; } $chr= $chr=~/chr/ ? $chr : 'chr'.$chr ; print $w_fh join("\t",$hgvs,$chr,$start,$end,$sv_type,$tlen,$re_gene,$c_type, $nprobe,$sample,$gender,$ratio,$band,$gene,$gr,$dp,$cc,$omim,$hgmd),"\n"; } close($anno_fh); sub cnv_hgvs{ my ($chr,$start,$end, $band, $type,$ref)=@_; unless(defined $ref){ $ref='GRCh37' } $chr=~s/chr//; $band=~s/^[^qp]+([pq])/$1/; return "seq[$ref] $type($chr)($band)chr$chr:g.$start".'_'."$end$type"; } sub trim_len{ my $len=shift; $len=int($len); if($len <1000){ return $len.'bp'; }elsif($len <1000000){ my $len_kb=sprintf("%.1f", $len/1000); return "约".$len_kb.'kb'; }elsif($len < 1000000000){ my $len_mb=sprintf("%.1f", $len/1000000); return "约".$len_mb.'M' ; }else{ return 'NA'; } } sub trim_gene{ my ($genes, $omim)=@_; my ($num,$fgene); $genes=~s/gene://; $num=0; $fgene=''; my $o_genes=omim_gene($omim); for my $gene(split /;/,$o_genes){ next if $gene eq '-'; $num++; if($num< $nb_gene+1){ $fgene= $fgene ? $fgene.';'.$gene : $gene; }else{ return $fgene."等"; } $genes=~s/$gene;//; } my $re_gene=$fgene.';'.$genes; $num=0; $fgene=''; for my $gene(split /;/,$re_gene){ next unless $gene; $num++; return $fgene."等" if $num==$nb_gene+1; $fgene= $fgene ? $fgene.';'.$gene : $gene; } return $fgene; } sub omim_gene{ my $omim=shift; my ($genes,%gen); return '-' unless defined $omim; $omim=~s/OMIM://; $omim=~s/;\s*$//; return '' if$omim eq '-'; for my $ann (split /;/, $omim ){ my ($gene)=(split /&/,$ann)[-1]; unless(exists $gen{$gene}){ $genes= $genes ? $genes.';'.$gene: $gene; $gen{$gene}=undef; } } return $genes; }