123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- #####
- #将样本信息分别到不同的目录下
- ####
- import pandas as pd
- import xlrd
- import os,sys
- import argparse
- ####creat the analysis
- def analis_dir(inputpath,laneid):
- #新建出报告的目录
- pancancer_dir = os.path.join('/cgdata/pancancer_report', laneid)
- if not os.path.exists(pancancer_dir):
- os.mkdir(pancancer_dir)
- svdir1 = os.path.join(inputpath, '1SV_varscan_pair')
- if not os.path.exists(svdir1):
- os.mkdir(svdir1)
- svdir2= os.path.join(inputpath, '1SV_vardict_pair')
- if not os.path.exists(svdir2):
- os.mkdir(svdir2)
- # for CNV
- CNVdir = os.path.join(inputpath, '2CNV_cnvkit_pair')
- if not os.path.exists(CNVdir):
- os.mkdir(CNVdir)
- # for MSI
- MSI_dir = os.path.join(inputpath, '3MSI_msisensor2_pair')
- if not os.path.exists(MSI_dir):
- os.mkdir(MSI_dir)
- # for germline result
- gemerlinedir = os.path.join(inputpath, '4Germline_unpair')
- if not os.path.exists(gemerlinedir):
- os.mkdir(gemerlinedir)
- # for HL result
- HLdir = os.path.join(inputpath, '5HL_gatk_unpair')
- if not os.path.exists(HLdir):
- os.mkdir(HLdir)
- # for fusion
- fusion_method1_dir = os.path.join(inputpath, '6Fusion_manta_pair')
- if not os.path.exists(fusion_method1_dir):
- os.mkdir(fusion_method1_dir)
- fusion_method2_dir = os.path.join(inputpath, '6Fusion_genefusion_unpair')
- if not os.path.exists(fusion_method2_dir):
- os.mkdir(fusion_method2_dir)
- #for HLA
- HLA_dir = os.path.join(inputpath, '7HLA-HD_unpair')
- if not os.path.exists(HLA_dir):
- os.mkdir(HLA_dir)
- # for qc
- qc_dir = os.path.join(inputpath, '8Fastqc')
- if not os.path.exists(qc_dir):
- os.mkdir(qc_dir)
- # for ontarget
- ontarget_dir = os.path.join(inputpath, '9Ontarget')
- if not os.path.exists(ontarget_dir):
- os.mkdir(ontarget_dir)
- # for coverage
- coverage_dir = os.path.join(inputpath, '10Coverage')
- if not os.path.exists(coverage_dir):
- os.mkdir(coverage_dir)
- # datasummary
- datasummary_dir = os.path.join(inputpath, 'datasummary')
- if not os.path.exists(datasummary_dir):
- os.mkdir(datasummary_dir)
- def tumortyple(inputpath,laneid):
- sampledir = os.path.join(inputpath, laneid + '_sample_infor_label.txt')
- sampleinfor = pd.read_table(sampledir, sep='\t', header=0)
- for i in range(len(sampleinfor)):
- sample = sampleinfor.loc[i, 'samplename']
- # make the result dir
- result_dir = os.path.join(inputpath, 'resultfile')
- if not os.path.exists(result_dir):
- os.mkdir(result_dir)
- sample_dir = os.path.join(result_dir, sample)
- if not os.path.exists(sample_dir):
- os.mkdir(sample_dir)
- sampledata = pd.DataFrame()
- sampledata.loc[0, 'sampleid'] = sample
- sampledata.loc[0, 'tumortype'] = sampleinfor.loc[i, 'tumortype']
- outputfile = os.path.join(sample_dir, laneid + '-' + sample + '.tumortype.xlsx')
- writer = pd.ExcelWriter(outputfile)
- sampledata.to_excel(writer, sheet_name='tumortype', index=False)
- writer.save()
- writer.close()
- def tumortyperunmain(inputpath,laneid):
- analis_dir(inputpath,laneid)
- tumortyple(inputpath,laneid)
- #inputpath='/cgdata/liuxiangqiong/work62pancancer/pipelinetest-CGB0158'
- #laneid='CGB0158'
- #tumortyperunmain(inputpath,laneid)
- if __name__=='__main__':
- parser = argparse.ArgumentParser(description='sample infor for tumortype')
- parser.add_argument('-i', '--inputpath', type=str, help='the path of lane')
- parser.add_argument('-l', '--laneid', type=str, help='laneid')
- args = parser.parse_args()
- Inputpath = args.inputpath
- Laneid = args.laneid
- tumortyperunmain(Inputpath,Laneid)
|