123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- #!/usr/bin/perl
- use strict;
- use warnings;
- use Getopt::Long;
- my $segf=shift;
- my $th0=0.2;
- my $th1=0.7;
- my $th3=1.3;
- my $th4=1.7;
- my $out=$segf.'.xls';
- my $verbose=1;
- my $nprobe_th=5;
- sub parse_arguments{
- my $result = GetOptions (
- "Hom_del=s" => \$th0,
- "Het_del=s" => \$th1,
- "Het_amp=s" => \$th3,
- "Hom_amp=s" => \$th4,
- "output-basename=s" => \$out,
- "FP_CNV_Nprobes_threshold=i" => \$nprobe_th,
- "verbose=s" => \$verbose,
- );
- }
- my ($pre_cont, $pre_chr,$pre_type,$pre_ratio);
- open my $infh, '<', $segf or die "Cannot open file:$segf $!\n";
- open my $wfh , '>' , $out or die "Cannot write file:$! \n";
- while(<$infh>){
- chomp;
- if(/^Sample\s+/){
- print $wfh $_,"\tNum_merge\n";
- next;
- }
-
- my ($sample,$chr,$start,$end,$nprob,$seg)=split /\t/;
- my $type='';
-
- if($seg<$th0){
- $type='Hom_del';
- }elsif($seg < $th1){
- $type='Het_del';
- }elsif($seg < $th3){
- $type='Neutral';
- }elsif($seg < $th4){
- $type='Het_amp';
- }else{
- $type='Hom_amp';
- }
-
- my $first=0; ## filter FP CNV calls by Number of probes
- if($chr=~/Y/ and $start> 22800000){
- print "Omit AZFc CNV.\n";
- }elsif($nprob <= $nprobe_th and $seg > $th0 ){
- if ($chr and $chr eq $pre_chr){ # FP in chr
- $type=$pre_type;
- $seg=$pre_ratio;
- }else{ # FP at chr start
- $first=1;
- print $_,"\n";
- }
- }
-
- if( ! $pre_cont){
- $pre_cont=$_."\n";
- }elsif( ! $pre_type){ # pre is FP at chr start
- $pre_cont .= $_."\n";
- }elsif($pre_chr ne $chr or $type ne $pre_type or abs($seg-$pre_ratio)>0.5 ){
- print $wfh merge_cont($pre_cont),"\n";
- $pre_cont=$_."\n";
- }else{
- $pre_cont .= $_."\n";
- }
- $pre_type=$first ? '' : $type;
- $pre_chr=$chr;
- $pre_ratio=merge_cont($pre_cont,1);
- }
- print $wfh merge_cont($pre_cont),"\n";
- sub merge_cont{
- my $cont=shift;
- my $seg=shift;
- my ($tot_prob, $tot_seg, $num, $seg_st, $seg_end)=(0, 0, 0, 0, 0);
- my ( $sam,$chrs);
- for my $line(split /\n/,$cont){
- my ($sample,$chr,$start,$end,$nprob,$seg)=split /\t/,$line;
- $tot_prob+= $nprob;
- $tot_seg += $nprob*$seg;
- $num++;
- $seg_st=$start if $num==1;
- $seg_st =$seg_st > $start ? $start : $seg_st;
- $seg_end= $seg_end > $end ? $seg_end : $end ;
- $sam=$sample;
- $chrs=$chr;
- }
- my $avg_seg= sprintf("%.2f",$tot_seg/$tot_prob);
- if(defined $seg){
- return $avg_seg;
- }else{
- return join("\t",$sam,$chrs,$seg_st,$seg_end,$tot_prob,$avg_seg,$num);
- }
- }
|