contam_sum.pl 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. use Getopt::Std;
  5. use File::Basename;
  6. use File::Find;
  7. my $usage="Usage: trim verifyBam and gatk4 CalculateContamination result.
  8. result files name like \$sample.verify.selfSM and \$sample.contEST.tab.
  9. \tperl $0 outfile
  10. Options:
  11. -v verifyBam output directory. default '.' .
  12. -g gatk4 CalculateContamination output directory. default '.'.\n";
  13. my ($g_dir,$v_dir,@samples,%data);
  14. my %opts = (v => '.', g => '.', );
  15. my %opts1;
  16. getopts(':v:g:', \%opts1);
  17. unless (@ARGV){
  18. print $usage;
  19. exit;
  20. }
  21. for my $key (keys %opts1){
  22. my $val=$opts1{$key};
  23. $opts{$key}=$val;
  24. }
  25. my $out=shift;
  26. for my $key (keys %opts){
  27. my $val=$opts{$key};
  28. if($key eq 'g'){
  29. $g_dir=$val;
  30. print "gatk4 output directory: $val .\n";
  31. }
  32. if($key eq 'v' ){
  33. $v_dir= $val;
  34. print "verify output directory: $val .\n";
  35. }
  36. }
  37. print "\$g_dir:$g_dir;\$v_dir:$v_dir\n";
  38. my @gfiles;
  39. find(sub {
  40. push @gfiles,$File::Find::name if -f and /\.contEST.tab$/;
  41. }, $g_dir);
  42. die "No *.contEST.tab file under $g_dir,Please check.\n" unless @gfiles;
  43. @gfiles=sort @gfiles;
  44. for my $gfile(@gfiles){
  45. open my $gfh,'<',$gfile or die "Cannot open file:$!";
  46. my $sample=basename($gfile);
  47. $sample=~s/.contEST.tab//;
  48. push @samples,$sample;
  49. my $col;
  50. while(<$gfh>){
  51. chomp;
  52. if($.==1){
  53. my @conts=split /\t/;
  54. for my $i (0..$#conts){
  55. $col=$i if $conts[$i] eq 'contamination';
  56. }
  57. next;
  58. }
  59. my ($cont)=(split /\t/)[$col];
  60. $data{g}{$sample}=sprintf "%.4f",$cont;
  61. }
  62. close($gfh);
  63. }
  64. my @vfiles;
  65. find(sub {
  66. push @vfiles,$File::Find::name if -f and /\.verify.selfSM$/;
  67. }, $v_dir);
  68. die "No *.verify.selfSM file under $v_dir,Please check.\n" unless @vfiles;
  69. for my $vfile(@vfiles){
  70. open my $vfh,'<',$vfile or die "Cannot open file:$!";
  71. my $sample=basename($vfile);
  72. $sample=~s/.verify.selfSM//;
  73. push @samples,$sample unless is_elmt($sample,@samples);
  74. while(<$vfh>){
  75. next if /^#/;
  76. my ($id,$nsnp,$read,$avg_dp,$cont,$val1,$val2)=(split /\t/)[0,3..8] ;
  77. my $conts=sprintf("%.4f",($val2-$val1)/$val2);
  78. $data{v}{$sample}=join("\t",$nsnp,$avg_dp,sprintf("%.4f",$cont),$conts );
  79. }
  80. close($vfh);
  81. }
  82. open my $wfh,'>',$out or die "Cannot write file:$!\n";
  83. print $wfh join("\t","#Sample","SNPS","AVG_DP","Cont1","Cont2","Cont3"),"\n";
  84. for my $sample(@samples){
  85. my $g_result=exists $data{g}{$sample} ? $data{g}{$sample} :" ";
  86. my $v_result=exists $data{v}{$sample} ? $data{v}{$sample} :"\t"x3;
  87. print $wfh join("\t",$sample,$v_result,$g_result),"\n";
  88. }
  89. ;
  90. sub is_elmt{
  91. my ($var,@lists)=@_;
  92. for my $list(@lists){
  93. return 1 if $var eq $list;
  94. }
  95. return 0;
  96. }