123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- #!/usr/bin/perl
- use strict;
- use warnings;
- use Getopt::Std;
- use File::Basename;
- use File::Find;
- my $usage="Usage: trim verifyBam and gatk4 CalculateContamination result.
- result files name like \$sample.verify.selfSM and \$sample.contEST.tab.
- \tperl $0 outfile
- Options:
- -v verifyBam output directory. default '.' .
- -g gatk4 CalculateContamination output directory. default '.'.\n";
-
- my ($g_dir,$v_dir,@samples,%data);
- my %opts = (v => '.', g => '.', );
- my %opts1;
- getopts(':v:g:', \%opts1);
- unless (@ARGV){
- print $usage;
- exit;
- }
- for my $key (keys %opts1){
- my $val=$opts1{$key};
- $opts{$key}=$val;
- }
- my $out=shift;
- for my $key (keys %opts){
- my $val=$opts{$key};
- if($key eq 'g'){
- $g_dir=$val;
- print "gatk4 output directory: $val .\n";
- }
- if($key eq 'v' ){
- $v_dir= $val;
- print "verify output directory: $val .\n";
- }
- }
- print "\$g_dir:$g_dir;\$v_dir:$v_dir\n";
- my @gfiles;
- find(sub {
- push @gfiles,$File::Find::name if -f and /\.contEST.tab$/;
- }, $g_dir);
- die "No *.contEST.tab file under $g_dir,Please check.\n" unless @gfiles;
- @gfiles=sort @gfiles;
- for my $gfile(@gfiles){
- open my $gfh,'<',$gfile or die "Cannot open file:$!";
- my $sample=basename($gfile);
- $sample=~s/.contEST.tab//;
- push @samples,$sample;
- my $col;
- while(<$gfh>){
- chomp;
- if($.==1){
- my @conts=split /\t/;
- for my $i (0..$#conts){
- $col=$i if $conts[$i] eq 'contamination';
- }
- next;
- }
- my ($cont)=(split /\t/)[$col];
- $data{g}{$sample}=sprintf "%.4f",$cont;
- }
- close($gfh);
- }
- my @vfiles;
- find(sub {
- push @vfiles,$File::Find::name if -f and /\.verify.selfSM$/;
- }, $v_dir);
- die "No *.verify.selfSM file under $v_dir,Please check.\n" unless @vfiles;
- for my $vfile(@vfiles){
- open my $vfh,'<',$vfile or die "Cannot open file:$!";
- my $sample=basename($vfile);
- $sample=~s/.verify.selfSM//;
- push @samples,$sample unless is_elmt($sample,@samples);
- while(<$vfh>){
- next if /^#/;
- my ($id,$nsnp,$read,$avg_dp,$cont,$val1,$val2)=(split /\t/)[0,3..8] ;
- my $conts=sprintf("%.4f",($val2-$val1)/$val2);
- $data{v}{$sample}=join("\t",$nsnp,$avg_dp,sprintf("%.4f",$cont),$conts );
- }
- close($vfh);
- }
- open my $wfh,'>',$out or die "Cannot write file:$!\n";
- print $wfh join("\t","#Sample","SNPS","AVG_DP","Cont1","Cont2","Cont3"),"\n";
- for my $sample(@samples){
- my $g_result=exists $data{g}{$sample} ? $data{g}{$sample} :" ";
- my $v_result=exists $data{v}{$sample} ? $data{v}{$sample} :"\t"x3;
- print $wfh join("\t",$sample,$v_result,$g_result),"\n";
- }
- ;
- sub is_elmt{
- my ($var,@lists)=@_;
- for my $list(@lists){
- return 1 if $var eq $list;
- }
- return 0;
- }
|