12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- #!/usr/bin/python
- # -*- coding:utf-8 -*-
- import argparse,time
- from StatReadsFusion import StatDoubleReadsFusion as SDRF
- def GetUniqUMIPrimer(ID_list,readsPrimer_dict):
- UMIs_dict={}
- Primer_dict={}
- for ID in ID_list:
- temp=ID.split("_")
- if len(temp)>=5:
- UMI_tmp=ID.split("_")
- UMI = UMI_tmp[2]
- ReadsID = UMI_tmp[0]
- primer = readsPrimer_dict[ReadsID]
- UMIs_dict[UMI]=0
- Primer_dict[primer]=0
- return len(UMIs_dict),len(Primer_dict)
- def OutResults(double_readsID_dict,double_site_dict,readsPrimer_dict,indir):
- #out fusionseq and stat double and single readsID
- Final_FusionSeq_dict={}
- for key_seq in double_readsID_dict.keys():
- site_list = double_site_dict[key_seq]
- ID_list=double_readsID_dict[key_seq]
- doubleID=len(ID_list)
- singleID=0
- results=GetUniqUMIPrimer(ID_list,readsPrimer_dict)
- UMIs = results[0]
- primerNum = results[1]
- site_list.append(str(doubleID))
- site_list.append(str(singleID))
- site_list.append(str(UMIs))
- site_list.append(str(primerNum))
- Final_FusionSeq_dict[key_seq]=site_list
-
- fusion_points_file=indir+"/fusion_stat.txt"
- fusion_points=open(fusion_points_file,'w')
- fusion_points.write("\t".join(['Point','Point1_End','Point2_End','Overlap','DoubleRead','SingleRead','UMIkind','PrimerPair','FusionSeq'])+"\n")
- for key_seq in Final_FusionSeq_dict.keys():
- fusion_points.write("\t".join(Final_FusionSeq_dict[key_seq])+"\t"+key_seq+"\n")
- fusion_points.close()
- #输出每个ID对应fusion
- fusion_points_readsID_file=indir+"/fusion_readsID.txt"
- fusion_points_readsID=open(fusion_points_readsID_file,'w')
- fusion_points_readsID.write("\t".join(['Point','ReadsID','DoubleSingle'])+"\n")
- for key_seq in double_readsID_dict.keys():
- ID_list=double_readsID_dict[key_seq]
- site=double_site_dict[key_seq][0]
- for ID in ID_list:
- fusion_points_readsID.write("\t".join([site,ID,"Double"])+"\n")
- fusion_points_readsID.close()
- def main(indir,readsIDFile):
- t1=time.time()
- double_readsID_dict={}
- double_site_dict={}
- readsPrimer_dict={}
- sdrf=SDRF()
- #double fusion
- double_points_file=indir+"/double_breakpoint.txt"
- sdrf.StatDouble(double_points_file,double_readsID_dict,double_site_dict)
- sdrf.StatPrimer(readsIDFile,readsPrimer_dict)
- #out results
- OutResults(double_readsID_dict,double_site_dict,readsPrimer_dict,indir)
- t2=time.time()
- print("Times: "+str(int(t2-t1))+"s")
- print("BreakPoints Stat Done!")
- if __name__ == '__main__':
- parser = argparse.ArgumentParser(description='get reads mapping location')
- parser.add_argument('-i', required=True, type=str, help="indir")
- parser.add_argument('-r', required=True, type=str, help="reads primer file")
- args = parser.parse_args()
- main(args.i,args.r)
|