123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101 |
- import sys,os,os.path
- import pandas as pd
- from pandas.core.frame import DataFrame
- import argparse
- import re
- sys.path.append('/cgdata/liuxiangqiong/work62pancancer/Client/v0/script/20220705/')
- import datafile_QC_v0_20220906_finish as qcsummary
- #inputpath='/cgdata/bioproject/pancancer602gene/CGB0158_1'
- #laneid='CGB0158_1'
- def sampleQCcheck(inputpath,laneid):
- ###读入样本表
- sampledir = os.path.join(inputpath, laneid + '_sample_infor_label.txt')
- sampletable = pd.read_table(sampledir, sep='\t', header=0)
- for i in range(len(sampletable)):
- tumor = sampletable.loc[i, 'tumor']
- try:
- qcsummary.qcrun(inputpath, tumor)
- except:
- print(tumor + ' qc data is wrong,please check!')
- def qcsummary_tumor(inputpath,laneid):
- QCdir = os.path.join(inputpath, 'tempfile/QC')
- ###读入样本表
- sampledir = os.path.join(inputpath, laneid + '_sample_infor_label.txt')
- sampletable = pd.read_table(sampledir, sep='\t', header=0)
- QCsummary_tumor = pd.DataFrame()
- for i in range(len(sampletable)):
- tumor = sampletable.loc[i, 'tumor']
- samplename = sampletable.loc[i, 'samplename']
- tumorQCdir = os.path.join(QCdir, tumor + '_qc_report.txt')
- if os.path.exists(tumorQCdir):
- tumorQCdata = pd.read_table(tumorQCdir, sep='\t', header=0)
- tumorQCdata.loc[0, 'samplename'] = samplename
- tumorQCdata.loc[0, 'laneid'] = laneid
- else:
- tumorQCdata = pd.DataFrame()
- tumorQCdata.loc[0, 'samplename'] = samplename
- tumorQCdata.loc[0, 'sampleid'] = tumor
- tumorQCdata.loc[0, 'laneid'] = laneid
- QCsummary_tumor = tumorQCdata.append(QCsummary_tumor)
- QCsummary_tumor['Total_base(G)'] = pd.DataFrame(round(QCsummary_tumor['Total_base'] / 1000000000, 2))
- del QCsummary_tumor['Total_base']
- titlelist = ['samplename', 'sampleid', 'Total_base(G)', 'Total_average_depth', 'Unique_average_depth',
- 'insert_size', 'coverge_uniform', 'Q30', 'QC_overall', 'laneid']
- QCsummary_tumor = QCsummary_tumor[titlelist]
- return QCsummary_tumor
- def qcsummary_normal(inputpath,laneid):
- QCdir = os.path.join(inputpath, 'tempfile/QC')
- ###读入样本表
- sampledir = os.path.join(inputpath, laneid + '_sample_infor_label.txt')
- sampletable = pd.read_table(sampledir, sep='\t', header=0)
- QCsummary_normal = pd.DataFrame()
- for i in range(len(sampletable)):
- normal = sampletable.loc[i, 'normal']
- samplename = sampletable.loc[i, 'samplename']
- tumorQCdir = os.path.join(QCdir, normal + '_qc_report.txt')
- if os.path.exists(tumorQCdir):
- tumorQCdata = pd.read_table(tumorQCdir, sep='\t', header=0)
- tumorQCdata.loc[0, 'samplename'] = samplename
- tumorQCdata.loc[0, 'laneid'] = laneid
- else:
- tumorQCdata = pd.DataFrame()
- tumorQCdata.loc[0, 'samplename'] = samplename
- tumorQCdata.loc[0, 'sampleid'] = tumor
- tumorQCdata.loc[0, 'laneid'] = laneid
- QCsummary_normal = tumorQCdata.append(QCsummary_normal)
- QCsummary_normal['Total_base(G)'] = pd.DataFrame(round(QCsummary_normal['Total_base'] / 1000000000, 2))
- del QCsummary_normal['Total_base']
- titlelist=['samplename', 'sampleid','Total_base(G)', 'Total_average_depth', 'Unique_average_depth', 'insert_size', 'coverge_uniform', 'Q30', 'QC_overall', 'laneid']
- QCsummary_normal=QCsummary_normal[titlelist]
- return QCsummary_normal
- def QCSum_allsum_main(inputpath,laneid):
- #先对运行后的结果进行检验
- sampleQCcheck(inputpath, laneid)
- #进行qcsummary
- qctumor = qcsummary_tumor(inputpath, laneid)
- qcnormal = qcsummary_normal(inputpath, laneid)
- qcsummarydata = qctumor.append(qcnormal)
- outputdir = os.path.join(inputpath, 'datasummary')
- if not os.path.exists(outputdir):
- os.mkdir(outputdir)
- outputname = os.path.join(outputdir, laneid + '_' + 'table1_QCsummary.txt')
- qcsummarydata.to_csv(outputname, sep='\t', index=False, header=True)
- if __name__=='__main__':
- parser = argparse.ArgumentParser(description='for the QCsum')
- parser.add_argument('-i', '--inputpath', type=str, help='the path of lane')
- parser.add_argument('-l', '--laneid', type=str, help='the laneid')
- args = parser.parse_args()
- Inputpath = args.inputpath
- Laneid = args.laneid
- QCSum_allsum_main(Inputpath,Laneid)
|