QAseq_batch_pbs_v5.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. #!/usr/bin/python
  2. # -*- coding:utf-8 -*-
  3. import argparse,os
  4. SCRIPT_DIR=os.path.split(os.path.realpath(__file__))[0]
  5. primer_info=SCRIPT_DIR+"/DB/primer_info_final_v3.txt"
  6. downsample_fq_script=SCRIPT_DIR+"/shell/00_downsample_fq.sh"
  7. Primer_Check_script=SCRIPT_DIR+"/py_script/Primer_Check_big_v6.1.py"
  8. fastp_script=SCRIPT_DIR+"/shell/01_fastp_v3.sh"
  9. map_QAseq_script=SCRIPT_DIR+"/shell/02_map_QAseq_nodup.sh"
  10. fusion_breakpoint=SCRIPT_DIR+"/shell/04_fusion_breakpoint_v1.3.sh"
  11. combine_script=SCRIPT_DIR+"/shell/05_results_combine.sh"
  12. def get_pbs(sample,R1,R2,outdir):
  13. tmp_command="mkdir -p "+outdir+"/pbs"
  14. os.system(tmp_command)
  15. sample_pbs=open(outdir+"/pbs/"+sample+".pbs",'w')
  16. outdir_sample=outdir+"/"+sample
  17. command0="source /cgdata/IVDRD/py3_18gene_env/bin/activate \n"
  18. sample_pbs.write(command0)
  19. #header="#!/bin/bash\n\n#SBATCH --mail-type=end\n#SBATCH --mail-user=yanfang.tan@nuprobe.com\n#SBATCH --job-name=test\n#SBATCH --partition=64c512g\n#SBATCH -N 1\n#SBATCH -J QAseqFus_v2\n#SBATCH --ntasks-per-node=4\n#SBATCH --output=%j.o\n#SBATCH --error=%j.e\n\n"
  20. #sample_pbs.write(header)
  21. #command0="module load miniconda3 \nsource activate QAseq_fusion \n"
  22. #sample_pbs.write(command0)
  23. #downsample 1M
  24. command1="mkdir -p "+outdir_sample+";\n"
  25. sample_pbs.write(command1)
  26. command2="mkdir -p "+outdir_sample+"/1.QC;\n"
  27. sample_pbs.write(command2)
  28. #command3="sh "+downsample_fq_script+" "+R1+" "+R2+" "+outdir_sample+"/1.QC "+sample+";\n"
  29. #sample_pbs.write(command3)
  30. #check primer
  31. command4="python "+Primer_Check_script+" -p "+primer_info+" -R1 "+R1+" -R2 "+R2+" -o "+outdir_sample+"/1.QC/"+sample+"_reads.txt;\n"
  32. sample_pbs.write(command4)
  33. #trim adapter
  34. command5="sh "+fastp_script+" "+outdir_sample+"/1.QC/"+sample+"_reads_primer_check_R1.fastq "+outdir_sample+"/1.QC/"+sample+"_reads_primer_check_R2.fastq "+sample+" "+outdir_sample+";\n"
  35. sample_pbs.write(command5)
  36. #mapping
  37. command6="mkdir -p "+outdir_sample+"/2.mapping;\n"
  38. sample_pbs.write(command6)
  39. command7="sh "+map_QAseq_script+" "+outdir_sample+"/1.QC/"+sample+"_trim_1.fq.gz "+outdir_sample+"/1.QC/"+sample+"_trim_2.fq.gz "+sample+" "+outdir_sample+" 2> "+outdir_sample+"/2.mapping/"+sample+".map.log;\n"
  40. sample_pbs.write(command7)
  41. #breakpoint
  42. command20="mkdir -p "+outdir_sample+"/4.fusion;\n"
  43. sample_pbs.write(command20)
  44. command22="sh "+fusion_breakpoint+" "+outdir_sample+"/4.fusion "+outdir_sample+"/1.QC/"+sample+"_reads.txt "+outdir_sample+"/2.mapping/"+sample+".sort.bam ;\n"
  45. sample_pbs.write(command22)
  46. #combine final results
  47. command25="sh "+combine_script+" "+sample+" "+outdir+";\n"
  48. sample_pbs.write(command25)
  49. sample_pbs.close()
  50. return outdir+"/pbs/"+sample+".pbs"
  51. def submit_pbs(pbsfile):
  52. command="qsub -l mem=30G "+pbsfile
  53. os.system(command)
  54. def get_sample(sampleFile):
  55. sample_dict={}
  56. with open(sampleFile,'r') as sf:
  57. for line in sf:
  58. sample=line.strip("\n").split("\t")[0]
  59. sample_dict[sample]=["",""]
  60. return sample_dict
  61. def main(indir,outdir,sampleFile):
  62. sample_dict=get_sample(sampleFile)
  63. list_files=os.listdir(indir)
  64. for item in list_files:
  65. sample_file_list=os.listdir(indir+"/"+item) if os.path.isdir(indir+"/"+item) else []
  66. for sample_file in sample_file_list:
  67. if sample_file.endswith(".gz"):
  68. for sample in sample_dict.keys():
  69. if sample+"_" in sample_file:
  70. if "R1" in sample_file:
  71. sample_dict[sample][0]=indir+"/"+item+"/"+sample_file
  72. elif "R2" in sample_file:
  73. sample_dict[sample][1]=indir+"/"+item+"/"+sample_file
  74. #
  75. for sample in sample_dict.keys():
  76. tmp_dict=sample_dict[sample]
  77. R1=tmp_dict[0]
  78. R2=tmp_dict[1]
  79. if os.path.exists(R1) and os.path.exists(R2):
  80. pbsfile=get_pbs(sample,R1,R2,outdir)
  81. submit_pbs(pbsfile)
  82. if __name__ == '__main__':
  83. parser = argparse.ArgumentParser(description='generate qaseq fusion batch pbs')
  84. parser.add_argument('-i','--inputpath', required=True, type=str, help="indir")
  85. parser.add_argument('-o','--outdir' ,required=True, type=str, help="outdir")
  86. parser.add_argument('-s', '--samplelist',required=True, type=str, help="sample")
  87. args = parser.parse_args()
  88. main(args.inputpath,args.outdir,args.samplelist)