#!/usr/bin/perl use strict; use warnings; use Getopt::Long; my $segf=shift; my $th0=0.2; my $th1=0.7; my $th3=1.3; my $th4=1.7; my $out=$segf.'.xls'; my $verbose=1; my $nprobe_th=5; sub parse_arguments{ my $result = GetOptions ( "Hom_del=s" => \$th0, "Het_del=s" => \$th1, "Het_amp=s" => \$th3, "Hom_amp=s" => \$th4, "output-basename=s" => \$out, "FP_CNV_Nprobes_threshold=i" => \$nprobe_th, "verbose=s" => \$verbose, ); } my ($pre_cont, $pre_chr,$pre_type,$pre_ratio); open my $infh, '<', $segf or die "Cannot open file:$segf $!\n"; open my $wfh , '>' , $out or die "Cannot write file:$! \n"; while(<$infh>){ chomp; if(/^Sample\s+/){ print $wfh $_,"\tNum_merge\n"; next; } my ($sample,$chr,$start,$end,$nprob,$seg)=split /\t/; my $type=''; if($seg<$th0){ $type='Hom_del'; }elsif($seg < $th1){ $type='Het_del'; }elsif($seg < $th3){ $type='Neutral'; }elsif($seg < $th4){ $type='Het_amp'; }else{ $type='Hom_amp'; } my $first=0; ## filter FP CNV calls by Number of probes if($chr=~/Y/ and $start> 22800000){ print "Omit AZFc CNV.\n"; }elsif($nprob <= $nprobe_th and $seg > $th0 ){ if ($chr and $chr eq $pre_chr){ # FP in chr $type=$pre_type; $seg=$pre_ratio; }else{ # FP at chr start $first=1; print $_,"\n"; } } if( ! $pre_cont){ $pre_cont=$_."\n"; }elsif( ! $pre_type){ # pre is FP at chr start $pre_cont .= $_."\n"; }elsif($pre_chr ne $chr or $type ne $pre_type or abs($seg-$pre_ratio)>0.5 ){ print $wfh merge_cont($pre_cont),"\n"; $pre_cont=$_."\n"; }else{ $pre_cont .= $_."\n"; } $pre_type=$first ? '' : $type; $pre_chr=$chr; $pre_ratio=merge_cont($pre_cont,1); } print $wfh merge_cont($pre_cont),"\n"; sub merge_cont{ my $cont=shift; my $seg=shift; my ($tot_prob, $tot_seg, $num, $seg_st, $seg_end)=(0, 0, 0, 0, 0); my ( $sam,$chrs); for my $line(split /\n/,$cont){ my ($sample,$chr,$start,$end,$nprob,$seg)=split /\t/,$line; $tot_prob+= $nprob; $tot_seg += $nprob*$seg; $num++; $seg_st=$start if $num==1; $seg_st =$seg_st > $start ? $start : $seg_st; $seg_end= $seg_end > $end ? $seg_end : $end ; $sam=$sample; $chrs=$chr; } my $avg_seg= sprintf("%.2f",$tot_seg/$tot_prob); if(defined $seg){ return $avg_seg; }else{ return join("\t",$sam,$chrs,$seg_st,$seg_end,$tot_prob,$avg_seg,$num); } }