trim_MTsite_VAF.pl 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. use File::Basename;
  5. my $ref=shift;
  6. my $file=shift;
  7. my (%dat);
  8. open my $vcffh, "gzip -dc $ref |" or die "Cannot open file:$! \n";
  9. while(<$vcffh>){
  10. next if /^#/;
  11. my ($chr, $pos, $id,$ref,$alt)=(split /\t/)[0..4];
  12. my $posid=join("\t",$chr, $pos, $ref,$alt);
  13. next if $id eq '.';
  14. $dat{$posid}=$id;
  15. }
  16. close($vcffh);
  17. open my $fh, '<', $file or die "Cannot open file: $! \n";
  18. my $sample=basename($file);
  19. $sample=~s/.SNPsite.vcf//;
  20. print join("\t","#Sample","Chr","Pos","Ref","Alt","ID","Depth","VAF"),"\n";
  21. while(<$fh>){
  22. chomp;
  23. next if /^#/;
  24. my ($chr,$pos,$ref, $alts,$ft, $calls )=(split /\t/)[0,1,3,4,8,9];
  25. my @alt=split /,/,$alts;
  26. for my $i(0..$#alt){
  27. my $call=$alt[$i];
  28. next if $call eq '*';
  29. my $posid=join("\t",$chr, $pos, $ref,$call);
  30. my $id=exists $dat{$posid} ? $dat{$posid} : 'NO_ID';
  31. my ($af,$dp)=get_AF($ft,$calls,$i);
  32. print join("\t",$sample,$chr,$pos,$ref,$call,$id,$dp,$af),"\n";
  33. }
  34. }
  35. close($fh);
  36. sub get_AF{
  37. my ($ft,$call,$i)=@_;
  38. return (' -',0) unless $call=~/:/;
  39. my @formats=split /:/,$ft;
  40. my $ad_index;
  41. for my $j(0..$#formats){
  42. $ad_index=$j if $formats[$j] eq 'AD';
  43. }
  44. my $ads=(split /:/,$call)[$ad_index];
  45. return (' -',0) unless $ads =~/,/;
  46. my $tot;
  47. for my $num(split /,/,$ads){
  48. $tot+=$num;
  49. }
  50. my $alt_num=(split /,/,$ads)[$i+1];
  51. return(sprintf("%.2f",$alt_num/$tot),$tot );
  52. }