breakpoint_stat_v6.3.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. #!/usr/bin/python
  2. # -*- coding:utf-8 -*-
  3. import argparse,time
  4. from StatReadsFusion import StatDoubleReadsFusion as SDRF
  5. def GetUniqUMIPrimer(ID_list,readsPrimer_dict):
  6. UMIs_dict={}
  7. Primer_dict={}
  8. for ID in ID_list:
  9. temp=ID.split("_")
  10. if len(temp)>=5:
  11. UMI_tmp=ID.split("_")
  12. UMI = UMI_tmp[2]
  13. ReadsID = UMI_tmp[0]
  14. primer = readsPrimer_dict[ReadsID]
  15. UMIs_dict[UMI]=0
  16. Primer_dict[primer]=0
  17. return len(UMIs_dict),len(Primer_dict)
  18. def OutResults(double_readsID_dict,double_site_dict,readsPrimer_dict,indir):
  19. #out fusionseq and stat double and single readsID
  20. Final_FusionSeq_dict={}
  21. for key_seq in double_readsID_dict.keys():
  22. site_list = double_site_dict[key_seq]
  23. ID_list=double_readsID_dict[key_seq]
  24. doubleID=len(ID_list)
  25. singleID=0
  26. results=GetUniqUMIPrimer(ID_list,readsPrimer_dict)
  27. UMIs = results[0]
  28. primerNum = results[1]
  29. site_list.append(str(doubleID))
  30. site_list.append(str(singleID))
  31. site_list.append(str(UMIs))
  32. site_list.append(str(primerNum))
  33. Final_FusionSeq_dict[key_seq]=site_list
  34. fusion_points_file=indir+"/fusion_stat.txt"
  35. fusion_points=open(fusion_points_file,'w')
  36. fusion_points.write("\t".join(['Point','Point1_End','Point2_End','Overlap','DoubleRead','SingleRead','UMIkind','PrimerPair','FusionSeq'])+"\n")
  37. for key_seq in Final_FusionSeq_dict.keys():
  38. fusion_points.write("\t".join(Final_FusionSeq_dict[key_seq])+"\t"+key_seq+"\n")
  39. fusion_points.close()
  40. #输出每个ID对应fusion
  41. fusion_points_readsID_file=indir+"/fusion_readsID.txt"
  42. fusion_points_readsID=open(fusion_points_readsID_file,'w')
  43. fusion_points_readsID.write("\t".join(['Point','ReadsID','DoubleSingle'])+"\n")
  44. for key_seq in double_readsID_dict.keys():
  45. ID_list=double_readsID_dict[key_seq]
  46. site=double_site_dict[key_seq][0]
  47. for ID in ID_list:
  48. fusion_points_readsID.write("\t".join([site,ID,"Double"])+"\n")
  49. fusion_points_readsID.close()
  50. def main(indir,readsIDFile):
  51. t1=time.time()
  52. double_readsID_dict={}
  53. double_site_dict={}
  54. readsPrimer_dict={}
  55. sdrf=SDRF()
  56. #double fusion
  57. double_points_file=indir+"/double_breakpoint.txt"
  58. sdrf.StatDouble(double_points_file,double_readsID_dict,double_site_dict)
  59. sdrf.StatPrimer(readsIDFile,readsPrimer_dict)
  60. #out results
  61. OutResults(double_readsID_dict,double_site_dict,readsPrimer_dict,indir)
  62. t2=time.time()
  63. print("Times: "+str(int(t2-t1))+"s")
  64. print("BreakPoints Stat Done!")
  65. if __name__ == '__main__':
  66. parser = argparse.ArgumentParser(description='get reads mapping location')
  67. parser.add_argument('-i', required=True, type=str, help="indir")
  68. parser.add_argument('-r', required=True, type=str, help="reads primer file")
  69. args = parser.parse_args()
  70. main(args.i,args.r)