chaowen.xi
/
602panel_test


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
							from typing import Any
import pandas as pd
import os,sys
import argparse
####补充拷贝样本表

def sampleresult_cpfile(inputpath,laneid,sampleid):
	pancancer_dir = os.path.join('/cgdata/pancancer_report', laneid)
	if not os.path.exists(pancancer_dir):
		os.mkdir(pancancer_dir)
	pancancer_result_dir = os.path.join(pancancer_dir, 'resultfile')
	if not os.path.exists(pancancer_result_dir):
		os.mkdir(pancancer_result_dir)
	result_dir = os.path.join(inputpath, 'resultfile')
	#拷贝样本表
	sampletable=os.path.join(inputpath,laneid+'_sample_infor_label.txt')
	#samplecp=os.path.join(pancancer_result_dir,laneid+'_sample_infor_labeltest.txt')
	#cp_samplefile = 'cp -r ' + sampletable + ' ' + samplecp
	#os.system(cp_samplefile)
	#拷贝所有的结果
	sample_dir = os.path.join(result_dir, sampleid)
	files = os.listdir(sample_dir)
	filefile = [laneid + '-' + sampleid + '.qc.xlsx', laneid + '-' + sampleid + '.snv.xlsx',
				laneid + '-' + sampleid + '.tmb.xlsx', laneid + '-' + sampleid + '.germline.xlsx',
				laneid + '-' + sampleid + '.cnv.xlsx', laneid + '-' + sampleid + '.chemical.xlsx',
				laneid + '-' + sampleid + '.fusion.xlsx', laneid + '-' + sampleid + '.fusion.html',
				laneid + '-' + sampleid + '.msi.xlsx', laneid + '-' + sampleid + '.hla.xlsx']
	k = 0
	for i in range(len(files)):
		samplefile = files[i]
		for j in range(len(filefile)):
			aimfile = filefile[j]
			if samplefile == aimfile:
				k = k + 1
	if k == 10:
		#pancancer_result_sample_dir = os.path.join(pancancer_result_dir, sampleid)
		#if not os.path.exists(pancancer_result_sample_dir):
		#	os.mkdir(pancancer_result_sample_dir)
		cp_allfile = 'cp -r ' + sample_dir + ' ' + pancancer_result_dir
		os.system(cp_allfile)
	else:
		###record all the data
		rawfilefile1 = pd.DataFrame(filefile)
		rawfilefile1.columns = ['file']
		actualfiles = pd.DataFrame(files)
		actualfiles.columns = ['file']
		actualfiles['label'] = 'PASS'
		logmerge = pd.merge(rawfilefile1, actualfiles, on='file', how='left')
		logmerge['label'] = logmerge['label'].fillna('Fail')
		temp_dir = os.path.join(inputpath, 'tempfile')
		cpfilelog_dir = os.path.join(temp_dir, 'cpfile_log')
		if not os.path.exists(cpfilelog_dir):
			os.mkdir(cpfilelog_dir)
		outputname = os.path.join(cpfilelog_dir, laneid + '_' + sampleid + '_fail.log')
		logmerge.to_csv(outputname, index=False, header=True, encoding='gbk', sep='\t')


def cpmain(inputpath,laneid):
	sampledir = os.path.join(inputpath, laneid + '_sample_infor_label.txt')
	samplelist = pd.read_table(sampledir, sep='\t', header=0)
	for i in range(len(samplelist)):
		sampleid = samplelist.loc[i, 'samplename']
		print(sampleid)
		sampleresult_cpfile(inputpath, laneid, sampleid)


if __name__=='__main__':
	parser = argparse.ArgumentParser(description='cp the data')
	parser.add_argument('-i', '--inputpath', type=str, help='the path of lane')
	parser.add_argument('-l', '--laneid', type=str, help='laneid')
	parser.add_argument('-s', '--sampleid', type=str, help='sampleid')
	args = parser.parse_args()
	Inputpath = args.inputpath
	Laneid = args.laneid
	Sampleid=args.sampleid
	sampleresult_cpfile(Inputpath,Laneid,Sampleid)