igv_input.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. import pandas as pd
  2. import os
  3. import argparse
  4. def igv_format(inputpath,sample):
  5. ##0.creat the analysis path
  6. tempfile = os.path.join(inputpath, 'tempfile')
  7. if not os.path.exists(tempfile):
  8. os.mkdir(tempfile)
  9. igv = os.path.join(tempfile, 'igv')
  10. if not os.path.exists(igv):
  11. os.mkdir(igv)
  12. #1.acqure the dir
  13. laneid = inputpath.split('/')[-1]
  14. resultdir = os.path.join(inputpath, 'resultfile')
  15. sampledir = os.path.join(resultdir, sample)
  16. #2for somatic sv
  17. snvdir = os.path.join(sampledir, laneid + '-' + sample + '.snv.xlsx')
  18. snv_raw = pd.read_excel(snvdir)
  19. select_columns = ['Chr', 'Start', 'End', 'Ref', 'Alt', 'Gene.refGene', 'Transid_uniq','ExonicFunc.refGene', 'VAF_tumor', 'DP_tumor',
  20. 'VD_tumor', 'DP_normal', 'VD_normal', 'VAF_normal', 'sampleid']
  21. snv_select = snv_raw[select_columns]
  22. snv_select['Chr'] = 'chr' + snv_select['Chr'].astype('str')
  23. snvoutputname = os.path.join(igv, laneid + '-' + sample + '.snv.txt')
  24. snv_select.to_csv(snvoutputname, sep='\t', index=False, header=True)
  25. #3.for germline sv
  26. germlinedir = os.path.join(sampledir, laneid + '-' + sample + '.germline.xlsx')
  27. germline_raw = pd.read_excel(germlinedir)
  28. germline_columns = ['Chr', 'Start', 'End', 'Ref', 'Alt', 'Gene.refGene', 'AAChange_select', 'ExonicFunc.refGene','VAF','N_ref','N_alt','sampleid']
  29. germline_select = germline_raw[germline_columns]
  30. germline_select['Chr'] = 'chr' + germline_select['Chr'].astype('str')
  31. germlineoutputname = os.path.join(igv, laneid + '-' + sample + '.germline.txt')
  32. germline_select.to_csv(germlineoutputname, sep='\t', index=False, header=True)
  33. def allsample(inputpath):
  34. laneid = inputpath.split('/')[-1]
  35. sampledir = os.path.join(inputpath, laneid + '_sample_infor_label.txt')
  36. samplelist = pd.read_table(sampledir, sep='\t', header=0)
  37. for i in range(len(samplelist)):
  38. sample = samplelist.loc[i, 'samplename']
  39. igv_format(inputpath, sample)
  40. if __name__=='__main__':
  41. parser = argparse.ArgumentParser(description='the igv for sv')
  42. parser.add_argument('-i', '--inputpath', type=str, help='the path of lane')
  43. parser.add_argument('-s', '--sample', type=str, help='the samplename')
  44. args = parser.parse_args()
  45. Inputpath = args.inputpath
  46. Sample=args.sample
  47. igv_format(Inputpath,Sample)