#!/usr/bin/python # -*- coding:utf-8 -*- import argparse,time from StatReadsFusion import StatDoubleReadsFusion as SDRF def GetUniqUMIPrimer(ID_list,readsPrimer_dict): UMIs_dict={} Primer_dict={} for ID in ID_list: temp=ID.split("_") if len(temp)>=5: UMI_tmp=ID.split("_") UMI = UMI_tmp[2] ReadsID = UMI_tmp[0] primer = readsPrimer_dict[ReadsID] UMIs_dict[UMI]=0 Primer_dict[primer]=0 return len(UMIs_dict),len(Primer_dict) def OutResults(double_readsID_dict,double_site_dict,readsPrimer_dict,indir): #out fusionseq and stat double and single readsID Final_FusionSeq_dict={} for key_seq in double_readsID_dict.keys(): site_list = double_site_dict[key_seq] ID_list=double_readsID_dict[key_seq] doubleID=len(ID_list) singleID=0 results=GetUniqUMIPrimer(ID_list,readsPrimer_dict) UMIs = results[0] primerNum = results[1] site_list.append(str(doubleID)) site_list.append(str(singleID)) site_list.append(str(UMIs)) site_list.append(str(primerNum)) Final_FusionSeq_dict[key_seq]=site_list fusion_points_file=indir+"/fusion_stat.txt" fusion_points=open(fusion_points_file,'w') fusion_points.write("\t".join(['Points','Point1_End','Point2_End','Overlap','DoubleReads','SingleReads','UMIkinds','PrimerPairs','FusionSeq'])+"\n") for key_seq in Final_FusionSeq_dict.keys(): fusion_points.write("\t".join(Final_FusionSeq_dict[key_seq])+"\t"+key_seq+"\n") fusion_points.close() #输出每个ID对应fusion fusion_points_readsID_file=indir+"/fusion_readsID.txt" fusion_points_readsID=open(fusion_points_readsID_file,'w') fusion_points_readsID.write("\t".join(['Points','ReadsID','DoubleSingle'])+"\n") for key_seq in double_readsID_dict.keys(): ID_list=double_readsID_dict[key_seq] site=double_site_dict[key_seq][0] for ID in ID_list: fusion_points_readsID.write("\t".join([site,ID,"Double"])+"\n") fusion_points_readsID.close() def main(indir,readsIDFile): t1=time.time() double_readsID_dict={} double_site_dict={} readsPrimer_dict={} sdrf=SDRF() #double fusion double_points_file=indir+"/double_breakpoints.txt" sdrf.StatDouble(double_points_file,double_readsID_dict,double_site_dict) sdrf.StatPrimer(readsIDFile,readsPrimer_dict) #out results OutResults(double_readsID_dict,double_site_dict,readsPrimer_dict,indir) t2=time.time() print("Times: "+str(int(t2-t1))+"s") print("BreakPoints Stat Done!") if __name__ == '__main__': parser = argparse.ArgumentParser(description='get reads mapping location') parser.add_argument('-i', required=True, type=str, help="indir") parser.add_argument('-r', required=True, type=str, help="reads primer file") args = parser.parse_args() main(args.i,args.r)