#!/usr/bin/perl use strict; use warnings; use File::Find; use Getopt::Std; use File::Basename; my $usage="Usage:\n\tperl $0 outfile\n Options:\n -d input directory. default '.' . -q GQ,genotype quality. default 30. -x suffix,iSNP tab file ending with. default .isnp.tab. -s If Scan is required.True for YES,False for NO. default 1 (YES) . \n\n"; my ($in_dir,$GQ,$scan,$suffix); my %opts = (d => '.', s=>1,q=>30, x=> '.isnp.tab'); my %opts1; getopts(':d:q:s:x:', \%opts1); for my $key (keys %opts1){ my $val=$opts1{$key}; $opts{$key}=$val; } for my $key (keys %opts){ my $val=$opts{$key}; if($key eq 'd'){ $in_dir=$val; print "Input directory: $val .\n"; } if($key eq 'q'){ $GQ=$val; print "Minimal genotype quality: $val .\n"; } if($key eq 'x'){ $suffix=$val; print "Input files suffix : $val .\n"; } if($key eq 's'){ $scan=$val; print "Directory Scan is required.\n" if $scan; print "Sinple glob $in_dir for VariantsToTable outputs.\n" unless $scan; } } my @files; if($scan){ find(sub { push @files,$File::Find::name if -f and /$suffix$/; }, $in_dir.'/'); # print "sanning...\n"; }else{ @files=<$in_dir/*$suffix>; # print "No sanning...\n"; } unless (@ARGV){ print $usage; exit; } my (@samples,%data,%diffs); my $out=shift; my @cols; for my $file (sort @files){ my $sample=basename $file; $sample=~s/$suffix//; push @samples,$sample; open my $fh , '<', $file or die "Cannot open file:$!\n"; while(<$fh>){ chomp; if($.==1 and /^CHROM/){ my @conts=split /\t/; for my $i(0..$#conts){ push @cols,$i if $conts[$i] eq 'CHROM' or $conts[$i] eq 'POS' or $conts[$i] eq 'REF' or $conts[$i] eq 'ALT' or $conts[$i]=~/GT$/ or $conts[$i]=~/GQ$/; } next; } my ($chr,$pos,$ref,$alt,$call,$gq)=(split /\t/)[@cols]; next if $gq eq 'NA'; next if $gq<$GQ; my $id=join("\t",$chr,$pos,$ref,$alt); $data{$id}{$sample}=$call; } close($fh); } open my $wfh, '>' ,$out or die "Cannot write file:$!\n"; print $wfh join("\t",'ID1','ID2','Z0','Z1','Z2','PI_HAT','Num_probe'),"\n"; for my $i (0 .. $#samples-1 ){ my $sam_i=$samples[$i]; for my $j($i+1..$#samples){ my $sam_j=$samples[$j]; my $sam_id= $sam_i."\t".$sam_j; my ($diff0,$diff1,$diff2,$total)=(0,0,0,0); for my $id(keys %data){ my $call_i=exists $data{$id}{$sam_i} ? $data{$id}{$sam_i} : '.' ; my $call_j=exists $data{$id}{$sam_j} ? $data{$id}{$sam_j} : '.'; my $diff=call_diff($call_i,$call_j); if(defined $diff){ if($diff ==2){ $diff2++; }elsif($diff==1){ $diff1++; }elsif($diff==0){ $diff0++; } $total++; } } if($total){ my $z1=sprintf("%.3f",$diff1/$total); my $z2=sprintf("%.3f",$diff2/$total); my $pi_hat=sprintf("%.3f",$z2+$z1/2); print $wfh join("\t", $sam_i, $sam_j, sprintf("%.3f",$diff0/$total), $z1, $z2,$pi_hat,$total),"\n"; }else{ print $wfh join("\t",$sam_i,$sam_j,0,0,0,0,$total),"\n"; } } } sub call_diff{ my ($call_i,$call_j)=@_; if ($call_i =~/\./ or $call_j =~/\./){ return undef; } my ($pi,$mi)=split /\//, $call_i; my ($pj,$mj)=split /\//, $call_j; my $n_hom=1; $n_hom=2 if $pi eq $mi; $n_hom=2 if $pj eq $mj; my $match=0; for my $f($pi,$mi){ for my $s($pj,$mj){ $match++ if $f eq $s; } } return $match/$n_hom; }