#!/usr/bin/python # -*- coding:utf-8 -*- import argparse,time,pysam,os from SearchReadsFusion import DoubleReadsFusion as DRF from SearchReadsFusion import SingleReadsFusion as SRF def ReadBam(inbam): ID_dict={} bf=pysam.AlignmentFile(inbam,'rb') for read in bf: ID = read.qname POS = read.pos+1 CHR = read.reference_name FLAG = read.flag MAP_INFO = read.cigarstring MAPQ = read.mapq SEQ = read.seq #过滤掉比对质量低的hardclip reads if FLAG >245 and MAPQ <30: continue read_list=[ID,FLAG,CHR,POS,MAPQ,MAP_INFO,SEQ] if ID not in ID_dict.keys(): ID_dict[ID]=[read_list] else: ID_dict[ID].append(read_list) return ID_dict def CheckClip(reads_info): flag = False #[ID,flag,chr,pos,MAPQ,MAP_info,SEQ] for reads in reads_info: MAP_info = reads[5] if MAP_info == None: continue if 'H' in MAP_info: return "HARD_CLIP" elif 'S' in MAP_info: flag = True if flag: return "SOFT_CLIP" else: return "NO_CLIP" def main(inbam,outdir): t1=time.time() #将sam内容按照ID区分,放到ID_info中 ID_dict=ReadBam(inbam) #存放soft_clip的reads ID soft_clip_ID=[] #输出到本地hard clip fusion hard_clip_file=outdir+"/hard_clip_fusion_breakpoints.txt" hard_clip_out=open(hard_clip_file,'w') header=["Site","Site1","Site2","Seq","Overlap","Site1_Down","Site2_Down","ReadsID"] hard_clip_out.write("\t".join(header)+"\n") #找double reads 类 drf =DRF() for ID in ID_dict.keys(): reads_info = ID_dict[ID] clip_stat=CheckClip(reads_info) if clip_stat == "SOFT_CLIP": soft_clip_ID.append(ID) elif clip_stat == "HARD_CLIP": out_list=drf.HardClipFusion(reads_info) if len(out_list) !=0: out_list.append(ID) hard_clip_out.write("\t".join(out_list)+"\n") hard_clip_out.close() #输出到本地single single_points=outdir+"/soft_clip_single_breakpoints.txt" SingleInput=open(single_points,'w') header=["Site","MismatchSeq","MatchSeq","Site_Down","ReadsID"] SingleInput.write("\t".join(header)+"\n") #输出到本地double double_points=outdir+"/soft_clip_double_breakpoints.txt" DoubleInput=open(double_points,'w') #DoubleInput.write("\t".join(header)+"\n") #找single reads 类 srf=SRF() # for soft_ID in soft_clip_ID: reads_info = ID_dict[soft_ID] soft_clip_results=srf.SoftClipFusion(reads_info) if len(soft_clip_results[0]) !=0 and len(soft_clip_results[1]) !=0: if soft_clip_results[0][0] != soft_clip_results[1][0]: pass else: SingleInput.write("\t".join(soft_clip_results[0])+"\n") else: if len(soft_clip_results[0]) !=0: SingleInput.write("\t".join(soft_clip_results[0])+"\n") elif len(soft_clip_results[1]) !=0: SingleInput.write("\t".join(soft_clip_results[1])+"\n") SingleInput.close() DoubleInput.close() double_points_file=outdir+"/double_breakpoints.txt" os.system("cat "+hard_clip_file+" > "+double_points_file+" && cat "+double_points+" >> "+double_points_file+" && rm "+hard_clip_file+" && rm "+double_points) single_points_file=outdir+"/single_breakpoints.txt" os.system("cp "+single_points+" "+single_points_file+" && rm "+single_points) t2=time.time() print("Times: "+str(int(t2-t1))+"s") print("BreakPoints Search Done!") if __name__ == '__main__': parser = argparse.ArgumentParser(description='get reads mapping location') parser.add_argument('-i', required=True, type=str, help="inbam") parser.add_argument('-o', required=True, type=str, help="outdir") args = parser.parse_args() main(args.i, args.o)