12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- from typing import Any
- import pandas as pd
- import os,sys
- import argparse
- ####补充拷贝样本表
- def sampleresult_cpfile(inputpath,laneid,sampleid):
- pancancer_dir = os.path.join('/cgdata/pancancer_report', laneid)
- if not os.path.exists(pancancer_dir):
- os.mkdir(pancancer_dir)
- pancancer_result_dir = os.path.join(pancancer_dir, 'resultfile')
- if not os.path.exists(pancancer_result_dir):
- os.mkdir(pancancer_result_dir)
- result_dir = os.path.join(inputpath, 'resultfile')
- #拷贝样本表
- sampletable=os.path.join(inputpath,laneid+'_sample_infor_label.txt')
- #samplecp=os.path.join(pancancer_result_dir,laneid+'_sample_infor_labeltest.txt')
- #cp_samplefile = 'cp -r ' + sampletable + ' ' + samplecp
- #os.system(cp_samplefile)
- #拷贝所有的结果
- sample_dir = os.path.join(result_dir, sampleid)
- files = os.listdir(sample_dir)
- filefile = [laneid + '-' + sampleid + '.qc.xlsx', laneid + '-' + sampleid + '.snv.xlsx',
- laneid + '-' + sampleid + '.tmb.xlsx', laneid + '-' + sampleid + '.germline.xlsx',
- laneid + '-' + sampleid + '.cnv.xlsx', laneid + '-' + sampleid + '.chemical.xlsx',
- laneid + '-' + sampleid + '.fusion.xlsx', laneid + '-' + sampleid + '.fusion.html',
- laneid + '-' + sampleid + '.msi.xlsx', laneid + '-' + sampleid + '.hla.xlsx']
- k = 0
- for i in range(len(files)):
- samplefile = files[i]
- for j in range(len(filefile)):
- aimfile = filefile[j]
- if samplefile == aimfile:
- k = k + 1
- if k == 10:
- #pancancer_result_sample_dir = os.path.join(pancancer_result_dir, sampleid)
- #if not os.path.exists(pancancer_result_sample_dir):
- # os.mkdir(pancancer_result_sample_dir)
- cp_allfile = 'cp -r ' + sample_dir + ' ' + pancancer_result_dir
- os.system(cp_allfile)
- else:
- ###record all the data
- rawfilefile1 = pd.DataFrame(filefile)
- rawfilefile1.columns = ['file']
- actualfiles = pd.DataFrame(files)
- actualfiles.columns = ['file']
- actualfiles['label'] = 'PASS'
- logmerge = pd.merge(rawfilefile1, actualfiles, on='file', how='left')
- logmerge['label'] = logmerge['label'].fillna('Fail')
- temp_dir = os.path.join(inputpath, 'tempfile')
- cpfilelog_dir = os.path.join(temp_dir, 'cpfile_log')
- if not os.path.exists(cpfilelog_dir):
- os.mkdir(cpfilelog_dir)
- outputname = os.path.join(cpfilelog_dir, laneid + '_' + sampleid + '_fail.log')
- logmerge.to_csv(outputname, index=False, header=True, encoding='gbk', sep='\t')
- def cpmain(inputpath,laneid):
- sampledir = os.path.join(inputpath, laneid + '_sample_infor_label.txt')
- samplelist = pd.read_table(sampledir, sep='\t', header=0)
- for i in range(len(samplelist)):
- sampleid = samplelist.loc[i, 'samplename']
- print(sampleid)
- sampleresult_cpfile(inputpath, laneid, sampleid)
- if __name__=='__main__':
- parser = argparse.ArgumentParser(description='cp the data')
- parser.add_argument('-i', '--inputpath', type=str, help='the path of lane')
- parser.add_argument('-l', '--laneid', type=str, help='laneid')
- parser.add_argument('-s', '--sampleid', type=str, help='sampleid')
- args = parser.parse_args()
- Inputpath = args.inputpath
- Laneid = args.laneid
- Sampleid=args.sampleid
- sampleresult_cpfile(Inputpath,Laneid,Sampleid)
|