import sys,os,os.path import pandas as pd from pandas.core.frame import DataFrame import argparse import re sys.path.append('/cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/') import datafile_QC_v0_20220906_finish as qcsummary #inputpath='/cgdata/bioproject/pancancer602gene/CGB0158_1' #laneid='CGB0158_1' def sampleQCcheck(inputpath,laneid): ###读入样本表 sampledir = os.path.join(inputpath, laneid + '_sample_infor_label.txt') sampletable = pd.read_table(sampledir, sep='\t', header=0) for i in range(len(sampletable)): tumor = sampletable.loc[i, 'tumor'] try: qcsummary.qcrun(inputpath, tumor) except: print(tumor + ' qc data is wrong,please check!') def qcsummary_tumor(inputpath,laneid): QCdir = os.path.join(inputpath, 'tempfile/QC') ###读入样本表 sampledir = os.path.join(inputpath, laneid + '_sample_infor_label.txt') sampletable = pd.read_table(sampledir, sep='\t', header=0) QCsummary_tumor = pd.DataFrame() for i in range(len(sampletable)): tumor = sampletable.loc[i, 'tumor'] samplename = sampletable.loc[i, 'samplename'] tumorQCdir = os.path.join(QCdir, tumor + '_qc_report.txt') if os.path.exists(tumorQCdir): tumorQCdata = pd.read_table(tumorQCdir, sep='\t', header=0) tumorQCdata.loc[0, 'samplename'] = samplename tumorQCdata.loc[0, 'laneid'] = laneid else: tumorQCdata = pd.DataFrame() tumorQCdata.loc[0, 'samplename'] = samplename tumorQCdata.loc[0, 'sampleid'] = tumor tumorQCdata.loc[0, 'laneid'] = laneid QCsummary_tumor = tumorQCdata.append(QCsummary_tumor) QCsummary_tumor['Total_base(G)'] = pd.DataFrame(round(QCsummary_tumor['Total_base'] / 1000000000, 2)) del QCsummary_tumor['Total_base'] titlelist = ['samplename', 'sampleid', 'Total_base(G)', 'Total_average_depth', 'Unique_average_depth', 'insert_size', 'coverge_uniform', 'Q30', 'QC_overall', 'laneid'] QCsummary_tumor = QCsummary_tumor[titlelist] return QCsummary_tumor def qcsummary_normal(inputpath,laneid): QCdir = os.path.join(inputpath, 'tempfile/QC') ###读入样本表 sampledir = os.path.join(inputpath, laneid + '_sample_infor_label.txt') sampletable = pd.read_table(sampledir, sep='\t', header=0) QCsummary_normal = pd.DataFrame() for i in range(len(sampletable)): normal = sampletable.loc[i, 'normal'] samplename = sampletable.loc[i, 'samplename'] tumorQCdir = os.path.join(QCdir, normal + '_qc_report.txt') if os.path.exists(tumorQCdir): tumorQCdata = pd.read_table(tumorQCdir, sep='\t', header=0) tumorQCdata.loc[0, 'samplename'] = samplename tumorQCdata.loc[0, 'laneid'] = laneid else: tumorQCdata = pd.DataFrame() tumorQCdata.loc[0, 'samplename'] = samplename tumorQCdata.loc[0, 'sampleid'] = tumor tumorQCdata.loc[0, 'laneid'] = laneid QCsummary_normal = tumorQCdata.append(QCsummary_normal) QCsummary_normal['Total_base(G)'] = pd.DataFrame(round(QCsummary_normal['Total_base'] / 1000000000, 2)) del QCsummary_normal['Total_base'] titlelist=['samplename', 'sampleid','Total_base(G)', 'Total_average_depth', 'Unique_average_depth', 'insert_size', 'coverge_uniform', 'Q30', 'QC_overall', 'laneid'] QCsummary_normal=QCsummary_normal[titlelist] return QCsummary_normal def QCSum_allsum_main(inputpath,laneid): #先对运行后的结果进行检验 sampleQCcheck(inputpath, laneid) #进行qcsummary qctumor = qcsummary_tumor(inputpath, laneid) qcnormal = qcsummary_normal(inputpath, laneid) qcsummarydata = qctumor.append(qcnormal) outputdir = os.path.join(inputpath, 'datasummary') if not os.path.exists(outputdir): os.mkdir(outputdir) outputname = os.path.join(outputdir, laneid + '_' + 'table1_QCsummary.txt') qcsummarydata.to_csv(outputname, sep='\t', index=False, header=True) if __name__=='__main__': parser = argparse.ArgumentParser(description='for the QCsum') parser.add_argument('-i', '--inputpath', type=str, help='the path of lane') parser.add_argument('-l', '--laneid', type=str, help='the laneid') args = parser.parse_args() Inputpath = args.inputpath Laneid = args.laneid QCSum_allsum_main(Inputpath,Laneid)