123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102 |
- #!/usr/bin/python
- # -*- coding:utf-8 -*-
- import argparse,os
- SCRIPT_DIR=os.path.split(os.path.realpath(__file__))[0]
- primer_info=SCRIPT_DIR+"/DB/primer_info_final_v3.txt"
- downsample_fq_script=SCRIPT_DIR+"/shell/00_downsample_fq.sh"
- Primer_Check_script=SCRIPT_DIR+"/py_script/Primer_Check_big_v6.1.py"
- fastp_script=SCRIPT_DIR+"/shell/01_fastp_v3.sh"
- map_QAseq_script=SCRIPT_DIR+"/shell/02_map_QAseq_nodup.sh"
- fusion_breakpoint=SCRIPT_DIR+"/shell/04_fusion_breakpoint_v1.3.sh"
- combine_script=SCRIPT_DIR+"/shell/05_results_combine.sh"
- def get_pbs(sample,R1,R2,outdir):
- tmp_command="mkdir -p "+outdir+"/src"
- os.system(tmp_command)
- sample_pbs=open(outdir+"/src/"+sample+".slurm",'w')
- outdir_sample=outdir+"/"+sample
- command0="source /cgdata/IVDRD/py3_18gene_env/bin/activate \n"
- sample_pbs.write(command0)
- #header="#!/bin/bash\n\n#SBATCH --mail-type=end\n#SBATCH --mail-user=yanfang.tan@nuprobe.com\n#SBATCH --job-name=test\n#SBATCH --partition=64c512g\n#SBATCH -N 1\n#SBATCH -J QAseqFus_v2\n#SBATCH --ntasks-per-node=4\n#SBATCH --output=%j.o\n#SBATCH --error=%j.e\n\n"
- #sample_pbs.write(header)
- #command0="module load miniconda3 \nsource activate QAseq_fusion \n"
- #sample_pbs.write(command0)
- #downsample 1M
- command1="mkdir -p "+outdir_sample+";\n"
- sample_pbs.write(command1)
- command2="mkdir -p "+outdir_sample+"/1.QC;\n"
- sample_pbs.write(command2)
- #command3="sh "+downsample_fq_script+" "+R1+" "+R2+" "+outdir_sample+"/1.QC "+sample+";\n"
- #sample_pbs.write(command3)
- #check primer
- command4="python "+Primer_Check_script+" -p "+primer_info+" -R1 "+R1+" -R2 "+R2+" -o "+outdir_sample+"/1.QC/"+sample+"_reads.txt;\n"
- sample_pbs.write(command4)
- #trim adapter
- command5="sh "+fastp_script+" "+outdir_sample+"/1.QC/"+sample+"_reads_primer_check_R1.fastq "+outdir_sample+"/1.QC/"+sample+"_reads_primer_check_R2.fastq "+sample+" "+outdir_sample+";\n"
- sample_pbs.write(command5)
- #mapping
- command6="mkdir -p "+outdir_sample+"/2.mapping;\n"
- sample_pbs.write(command6)
- command7="sh "+map_QAseq_script+" "+outdir_sample+"/1.QC/"+sample+"_trim_1.fq.gz "+outdir_sample+"/1.QC/"+sample+"_trim_2.fq.gz "+sample+" "+outdir_sample+" 2> "+outdir_sample+"/2.mapping/"+sample+".map.log;\n"
- sample_pbs.write(command7)
- #breakpoint
- command20="mkdir -p "+outdir_sample+"/4.fusion;\n"
- sample_pbs.write(command20)
- command22="sh "+fusion_breakpoint+" "+outdir_sample+"/4.fusion "+outdir_sample+"/1.QC/"+sample+"_reads.txt "+outdir_sample+"/2.mapping/"+sample+".sort.bam ;\n"
- sample_pbs.write(command22)
- #combine final results
- command25="sh "+combine_script+" "+sample+" "+outdir+";\n"
- sample_pbs.write(command25)
- sample_pbs.close()
- return outdir+"/pbs/"+sample+".pbs"
- def submit_pbs(pbsfile):
- command="qsub -l mem=30G "+pbsfile
- os.system(command)
- def get_sample(sampleFile):
- sample_dict={}
- with open(sampleFile,'r') as sf:
- for line in sf:
- sample=line.strip("\n").split("\t")[0]
- sample_dict[sample]=["",""]
- return sample_dict
- def main(indir,outdir,sampleFile):
-
- sample_dict=get_sample(sampleFile)
- list_files=os.listdir(indir)
- for item in list_files:
- sample_file_list=os.listdir(indir+"/"+item) if os.path.isdir(indir+"/"+item) else []
- for sample_file in sample_file_list:
- if sample_file.endswith(".gz"):
- for sample in sample_dict.keys():
- if sample+"_" in sample_file:
- if "R1" in sample_file:
- sample_dict[sample][0]=indir+"/"+item+"/"+sample_file
- elif "R2" in sample_file:
- sample_dict[sample][1]=indir+"/"+item+"/"+sample_file
- #
- for sample in sample_dict.keys():
- tmp_dict=sample_dict[sample]
- R1=tmp_dict[0]
- R2=tmp_dict[1]
- if os.path.exists(R1) and os.path.exists(R2):
- pbsfile=get_pbs(sample,R1,R2,outdir)
- submit_pbs(pbsfile)
- if __name__ == '__main__':
- parser = argparse.ArgumentParser(description='generate qaseq fusion batch pbs')
- parser.add_argument('-i','--inputpath', required=True, type=str, help="indir")
- parser.add_argument('-o','--outdir' ,required=True, type=str, help="outdir")
- parser.add_argument('-s', '--samplelist',required=True, type=str, help="sample")
- args = parser.parse_args()
- main(args.inputpath,args.outdir,args.samplelist)
|