import pandas as pd import os,sys import argparse def ontarget_sum(inputpath): #inputpath = '/cgdata/bioproject/pancancer602gene/CGB0329' laneid = inputpath.split('/')[-1] QCsumdir = '/cgdata/bioproject/pancancer602gene/NGS_QC_602pancancer' outputdir = os.path.join(QCsumdir, laneid) if not os.path.exists(outputdir): os.mkdir(outputdir) tempfile = os.path.join(inputpath, 'tempfile') QCdir = os.path.join(tempfile, 'QC') # 对目录下所有的ontarget.txt进行汇总 files = os.listdir(QCdir) ontargetfile = [s for s in files if s.endswith('ontarget.txt')] targetsum = pd.DataFrame() for i in range(len(ontargetfile)): sampledir = os.path.join(QCdir, ontargetfile[i]) sampleQC = pd.read_table(sampledir, sep='\t', header=0) targetsum = sampleQC.append(targetsum) cols = ['sampleid', 'Total_Read(M)', 'Map(%)', 'T_size', 'On_Target(%)', 'T_Dup(%)', 'T_Mean', 'Insert_Size', 'SD', 'Fold80', '>1X', '>20X', '>30X', '>50X', '>100X', '>300X', 'Adjust_30X'] targetsum = targetsum[cols] #输出结果 targetoutput = os.path.join(outputdir, laneid + '_' + 'target_sum.txt') targetsum.to_csv(targetoutput, sep='\t', header=True, index=False) def report_sum(inputpath): # inputpath = '/cgdata/bioproject/pancancer602gene/CGB0329' laneid = inputpath.split('/')[-1] QCsumdir = '/cgdata/bioproject/pancancer602gene/NGS_QC_602pancancer' outputdir = os.path.join(QCsumdir, laneid) if not os.path.exists(outputdir): os.mkdir(outputdir) tempfile = os.path.join(inputpath, 'tempfile') QCdir = os.path.join(tempfile, 'QC') ###对目录下所有的report QC进行汇总 files = os.listdir(QCdir) reportfile = [s for s in files if s.endswith('_qc_report.txt')] reportsum = pd.DataFrame() for i in range(len(reportfile)): sampledir = os.path.join(QCdir, reportfile[i]) sample_report = pd.read_table(sampledir, sep='\t', header=0) reportsum = sample_report.append(reportsum) #output the result reportoutput = os.path.join(outputdir, laneid + '_report_sum.txt') reportsum.to_csv(reportoutput, sep='\t', header=True, index=False) def run(inputpath): ontarget_sum(inputpath) report_sum(inputpath) if __name__=='__main__': parser = argparse.ArgumentParser(description='QC sum') parser.add_argument('-i', '--inputpath', type=str, help='the path of lane') args = parser.parse_args() Inputpath = args.inputpath run(Inputpath)