123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118 |
- #!/usr/bin/perl
- use strict;
- use warnings;
- use List::Util qw/max min /;
- use Data::Dumper;
- my $BIN=120;
- my (%ids);
- # my ($a,$b)= get_pad(105,200,$BIN);
- # print $b-$a, " $a, $b \n";
- # exit;
- my $bedf=shift;
- my $out=$bedf;
- $out=~s/\.bed/.uniq120.bed/;
- open my $bedfh, '<', $bedf or die "Cannot open file: $bedf $! \n";
- open my $wfh , '>' , $out or die "Cannot write file: $! \n";
- my ($pos, $str, $strand)=('', '', '');
- while(<$bedfh>){
- chomp;
- # last if $. > 50;
- my ($chr, $start, $end, $id, $qual , $plus)=split /\t/,$_;
- my $posi=join("\t",$chr,$start, $end );
- LABEL: my $over=0;
- $over=is_overlap($pos, $posi) if $pos;
- if($pos eq ''){
- $strand=$plus;
- $pos=$posi;
- $str=$id;
- }elsif($pos eq $posi){
- $str= length($str) > length($id) ? $id : $str;
- }elsif( $over ){
- my ($m_chr, $m_start, $m_end)=split /\t/,$pos;
- if($end <= $m_end){
- my ($last_id)=(split /,/,$str)[-1];
- $str=~s/$last_id$/$id/ if length($id) < length($last_id);
- }else{
- my ($f, $r)=split /_/,$over;
- $pos=join("\t",$chr,$f, $r );
- $str.=','.$id;
- $strand=$plus;
- }
- }else{
- my ($chr,$merge_s, $merge_e)=split /\t/, $pos;
- my ($pad_s,$pad_e)=get_pad($merge_s, $merge_e,$BIN);
- # print $pad_e-$pad_s," $.:$pad_s, $pad_e \n";
- if($str=~/,/){
- my @strs=split /,/,$str;
- for my $i(0..$#strs){
- my $id=$strs[$i];
- my $num=0 ;
- while(exists $ids{$id}){
- if($id=~/\.\d+$/){
- $id=~s/\.\d+$/.$num/;
- }else{
- $id=$id.'.'.$num;
- }
- $num++;
- }
- $ids{$id}=undef;
- if($i<$#strs){
- print $wfh join("\t", $chr, $pad_s, $pad_s+$BIN, $id, 1,$strand),"\n" ;
- }else{
- print $wfh join("\t", $chr, $pad_s, $pad_e, $id, 1,$strand),"\n" ;
- }
- $pad_s+=$BIN;
- last if $pad_s >= $pad_e;
- }
-
- }else{
- my $id=$str;
- my $num=0 ;
- while(exists $ids{$id}){
- if($id=~/\.\d+$/){
- $id=~s/\.\d+$/.$num/;
- }else{
- $id=$id.'.'.$num;
- }
- $num++;
- }
- $ids{$id}=undef;
- print $wfh join("\t", $chr, $pad_s, $pad_e, $id, 0,$strand),"\n" ;
- }
- last if eof($bedfh) and $posi eq "N:1-2";
- $strand=$plus;
- $pos=$posi;
- $str=$id;
- }
- if(eof($bedfh) ){
- $posi="N:1-2";
- goto LABEL;
- }
- last if eof($bedfh);
- }
- sub is_overlap{
- my ($fstr, $sstr)=@_;
- my ($f_chr, $f_start, $f_end)=split /\t/, $fstr;
- my ($s_chr, $s_start, $s_end)=split /\t/, $sstr;
- return 0 unless $f_chr eq $s_chr;
-
- my $max =max($f_start, $f_end, $s_start, $s_end) ;
- my $min =min($f_start, $f_end, $s_start, $s_end) ;
- if($max-$min < $f_end - $f_start + $s_end - $s_start +0.2 *$BIN){
- return $min.'_'.$max;
- }else{
- return 0;
- }
- }
- sub get_pad{
- my ($start,$end,$bin)=@_;
- my $num=($end-$start)/$bin;
- $num=int($num);
- my $length=($num*$bin-($end-$start))/2;
- return(int($start-$length),int($end+$length));
- }
|