123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657 |
- import pandas as pd
- import os
- import argparse
- def igv_format(inputpath,sample):
- ##0.creat the analysis path
- tempfile = os.path.join(inputpath, 'tempfile')
- if not os.path.exists(tempfile):
- os.mkdir(tempfile)
- igv = os.path.join(tempfile, 'igv')
- if not os.path.exists(igv):
- os.mkdir(igv)
- #1.acqure the dir
- laneid = inputpath.split('/')[-1]
- resultdir = os.path.join(inputpath, 'resultfile')
- sampledir = os.path.join(resultdir, sample)
- #2for somatic sv
- snvdir = os.path.join(sampledir, laneid + '-' + sample + '.snv.xlsx')
- snv_raw = pd.read_excel(snvdir)
- select_columns = ['Chr', 'Start', 'End', 'Ref', 'Alt', 'Gene.refGene', 'Transid_uniq','ExonicFunc.refGene', 'VAF_tumor', 'DP_tumor',
- 'VD_tumor', 'DP_normal', 'VD_normal', 'VAF_normal', 'sampleid']
- snv_select = snv_raw[select_columns]
- snv_select['Chr'] = 'chr' + snv_select['Chr'].astype('str')
- snvoutputname = os.path.join(igv, laneid + '-' + sample + '.snv.txt')
- snv_select.to_csv(snvoutputname, sep='\t', index=False, header=True)
- #3.for germline sv
- germlinedir = os.path.join(sampledir, laneid + '-' + sample + '.germline.xlsx')
- germline_raw = pd.read_excel(germlinedir)
- germline_columns = ['Chr', 'Start', 'End', 'Ref', 'Alt', 'Gene.refGene', 'AAChange_select', 'ExonicFunc.refGene','VAF','N_ref','N_alt','sampleid']
- germline_select = germline_raw[germline_columns]
- germline_select['Chr'] = 'chr' + germline_select['Chr'].astype('str')
- germlineoutputname = os.path.join(igv, laneid + '-' + sample + '.germline.txt')
- germline_select.to_csv(germlineoutputname, sep='\t', index=False, header=True)
- def allsample(inputpath):
- laneid = inputpath.split('/')[-1]
- sampledir = os.path.join(inputpath, laneid + '_sample_infor_label.txt')
- samplelist = pd.read_table(sampledir, sep='\t', header=0)
- for i in range(len(samplelist)):
- sample = samplelist.loc[i, 'samplename']
- igv_format(inputpath, sample)
- if __name__=='__main__':
- parser = argparse.ArgumentParser(description='the igv for sv')
- parser.add_argument('-i', '--inputpath', type=str, help='the path of lane')
- parser.add_argument('-s', '--sample', type=str, help='the samplename')
- args = parser.parse_args()
- Inputpath = args.inputpath
- Sample=args.sample
- igv_format(Inputpath,Sample)
|