calc_isnp_kin.pl 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. use File::Find;
  5. use Getopt::Std;
  6. use File::Basename;
  7. my $usage="Usage:\n\tperl $0 outfile\n
  8. Options:\n
  9. -d input directory. default '.' .
  10. -q GQ,genotype quality. default 30.
  11. -x suffix,iSNP tab file ending with. default .isnp.tab.
  12. -s If Scan is required.True for YES,False for NO. default 1 (YES) . \n\n";
  13. my ($in_dir,$GQ,$scan,$suffix);
  14. my %opts = (d => '.', s=>1,q=>30, x=> '.isnp.tab');
  15. my %opts1;
  16. getopts(':d:q:s:x:', \%opts1);
  17. for my $key (keys %opts1){
  18. my $val=$opts1{$key};
  19. $opts{$key}=$val;
  20. }
  21. for my $key (keys %opts){
  22. my $val=$opts{$key};
  23. if($key eq 'd'){
  24. $in_dir=$val;
  25. print "Input directory: $val .\n";
  26. }
  27. if($key eq 'q'){
  28. $GQ=$val;
  29. print "Minimal genotype quality: $val .\n";
  30. }
  31. if($key eq 'x'){
  32. $suffix=$val;
  33. print "Input files suffix : $val .\n";
  34. }
  35. if($key eq 's'){
  36. $scan=$val;
  37. print "Directory Scan is required.\n" if $scan;
  38. print "Sinple glob $in_dir for VariantsToTable outputs.\n" unless $scan;
  39. }
  40. }
  41. my @files;
  42. if($scan){
  43. find(sub {
  44. push @files,$File::Find::name if -f and /$suffix$/;
  45. }, $in_dir.'/');
  46. # print "sanning...\n";
  47. }else{
  48. @files=<$in_dir/*$suffix>;
  49. # print "No sanning...\n";
  50. }
  51. unless (@ARGV){
  52. print $usage;
  53. exit;
  54. }
  55. my (@samples,%data,%diffs);
  56. my $out=shift;
  57. my @cols;
  58. for my $file (sort @files){
  59. my $sample=basename $file;
  60. $sample=~s/$suffix//;
  61. push @samples,$sample;
  62. open my $fh , '<', $file or die "Cannot open file:$!\n";
  63. while(<$fh>){
  64. chomp;
  65. if($.==1 and /^CHROM/){
  66. my @conts=split /\t/;
  67. for my $i(0..$#conts){
  68. push @cols,$i if $conts[$i] eq 'CHROM' or $conts[$i] eq 'POS' or $conts[$i] eq 'REF'
  69. or $conts[$i] eq 'ALT' or $conts[$i]=~/GT$/ or $conts[$i]=~/GQ$/;
  70. }
  71. next;
  72. }
  73. my ($chr,$pos,$ref,$alt,$call,$gq)=(split /\t/)[@cols];
  74. next if $gq eq 'NA';
  75. next if $gq<$GQ;
  76. my $id=join("\t",$chr,$pos,$ref,$alt);
  77. $data{$id}{$sample}=$call;
  78. }
  79. close($fh);
  80. }
  81. open my $wfh, '>' ,$out or die "Cannot write file:$!\n";
  82. print $wfh join("\t",'ID1','ID2','Z0','Z1','Z2','PI_HAT','Num_probe'),"\n";
  83. for my $i (0 .. $#samples-1 ){
  84. my $sam_i=$samples[$i];
  85. for my $j($i+1..$#samples){
  86. my $sam_j=$samples[$j];
  87. my $sam_id= $sam_i."\t".$sam_j;
  88. my ($diff0,$diff1,$diff2,$total)=(0,0,0,0);
  89. for my $id(keys %data){
  90. my $call_i=exists $data{$id}{$sam_i} ? $data{$id}{$sam_i} : '.' ;
  91. my $call_j=exists $data{$id}{$sam_j} ? $data{$id}{$sam_j} : '.';
  92. my $diff=call_diff($call_i,$call_j);
  93. if(defined $diff){
  94. if($diff ==2){
  95. $diff2++;
  96. }elsif($diff==1){
  97. $diff1++;
  98. }elsif($diff==0){
  99. $diff0++;
  100. }
  101. $total++;
  102. }
  103. }
  104. if($total){
  105. my $z1=sprintf("%.3f",$diff1/$total);
  106. my $z2=sprintf("%.3f",$diff2/$total);
  107. my $pi_hat=sprintf("%.3f",$z2+$z1/2);
  108. print $wfh join("\t", $sam_i, $sam_j, sprintf("%.3f",$diff0/$total), $z1,
  109. $z2,$pi_hat,$total),"\n";
  110. }else{
  111. print $wfh join("\t",$sam_i,$sam_j,0,0,0,0,$total),"\n";
  112. }
  113. }
  114. }
  115. sub call_diff{
  116. my ($call_i,$call_j)=@_;
  117. if ($call_i =~/\./ or $call_j =~/\./){
  118. return undef;
  119. }
  120. my ($pi,$mi)=split /\//, $call_i;
  121. my ($pj,$mj)=split /\//, $call_j;
  122. my $n_hom=1;
  123. $n_hom=2 if $pi eq $mi;
  124. $n_hom=2 if $pj eq $mj;
  125. my $match=0;
  126. for my $f($pi,$mi){
  127. for my $s($pj,$mj){
  128. $match++ if $f eq $s;
  129. }
  130. }
  131. return $match/$n_hom;
  132. }