#!/usr/bin/perl use strict; use warnings; use List::Util qw/max min /; use Data::Dumper; my $BIN=120; my (%ids); # my ($a,$b)= get_pad(105,200,$BIN); # print $b-$a, " $a, $b \n"; # exit; my $bedf=shift; my $out=$bedf; $out=~s/\.bed/.uniq120.bed/; open my $bedfh, '<', $bedf or die "Cannot open file: $bedf $! \n"; open my $wfh , '>' , $out or die "Cannot write file: $! \n"; my ($pos, $str, $strand)=('', '', ''); while(<$bedfh>){ chomp; # last if $. > 50; my ($chr, $start, $end, $id, $qual , $plus)=split /\t/,$_; my $posi=join("\t",$chr,$start, $end ); LABEL: my $over=0; $over=is_overlap($pos, $posi) if $pos; if($pos eq ''){ $strand=$plus; $pos=$posi; $str=$id; }elsif($pos eq $posi){ $str= length($str) > length($id) ? $id : $str; }elsif( $over ){ my ($m_chr, $m_start, $m_end)=split /\t/,$pos; if($end <= $m_end){ my ($last_id)=(split /,/,$str)[-1]; $str=~s/$last_id$/$id/ if length($id) < length($last_id); }else{ my ($f, $r)=split /_/,$over; $pos=join("\t",$chr,$f, $r ); $str.=','.$id; $strand=$plus; } }else{ my ($chr,$merge_s, $merge_e)=split /\t/, $pos; my ($pad_s,$pad_e)=get_pad($merge_s, $merge_e,$BIN); # print $pad_e-$pad_s," $.:$pad_s, $pad_e \n"; if($str=~/,/){ my @strs=split /,/,$str; for my $i(0..$#strs){ my $id=$strs[$i]; my $num=0 ; while(exists $ids{$id}){ if($id=~/\.\d+$/){ $id=~s/\.\d+$/.$num/; }else{ $id=$id.'.'.$num; } $num++; } $ids{$id}=undef; if($i<$#strs){ print $wfh join("\t", $chr, $pad_s, $pad_s+$BIN, $id, 1,$strand),"\n" ; }else{ print $wfh join("\t", $chr, $pad_s, $pad_e, $id, 1,$strand),"\n" ; } $pad_s+=$BIN; last if $pad_s >= $pad_e; } }else{ my $id=$str; my $num=0 ; while(exists $ids{$id}){ if($id=~/\.\d+$/){ $id=~s/\.\d+$/.$num/; }else{ $id=$id.'.'.$num; } $num++; } $ids{$id}=undef; print $wfh join("\t", $chr, $pad_s, $pad_e, $id, 0,$strand),"\n" ; } last if eof($bedfh) and $posi eq "N:1-2"; $strand=$plus; $pos=$posi; $str=$id; } if(eof($bedfh) ){ $posi="N:1-2"; goto LABEL; } last if eof($bedfh); } sub is_overlap{ my ($fstr, $sstr)=@_; my ($f_chr, $f_start, $f_end)=split /\t/, $fstr; my ($s_chr, $s_start, $s_end)=split /\t/, $sstr; return 0 unless $f_chr eq $s_chr; my $max =max($f_start, $f_end, $s_start, $s_end) ; my $min =min($f_start, $f_end, $s_start, $s_end) ; if($max-$min < $f_end - $f_start + $s_end - $s_start +0.2 *$BIN){ return $min.'_'.$max; }else{ return 0; } } sub get_pad{ my ($start,$end,$bin)=@_; my $num=($end-$start)/$bin; $num=int($num); my $length=($num*$bin-($end-$start))/2; return(int($start-$length),int($end+$length)); }