s02_samplefile_v0_20220915_finish.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. #####
  2. #将样本信息分别到不同的目录下
  3. ####
  4. import pandas as pd
  5. import xlrd
  6. import os,sys
  7. import argparse
  8. ####creat the analysis
  9. def analis_dir(inputpath,laneid):
  10. #新建出报告的目录
  11. pancancer_dir = os.path.join('/cgdata/pancancer_report', laneid)
  12. if not os.path.exists(pancancer_dir):
  13. os.mkdir(pancancer_dir)
  14. svdir1 = os.path.join(inputpath, '1SV_varscan_pair')
  15. if not os.path.exists(svdir1):
  16. os.mkdir(svdir1)
  17. svdir2= os.path.join(inputpath, '1SV_vardict_pair')
  18. if not os.path.exists(svdir2):
  19. os.mkdir(svdir2)
  20. # for CNV
  21. CNVdir = os.path.join(inputpath, '2CNV_cnvkit_pair')
  22. if not os.path.exists(CNVdir):
  23. os.mkdir(CNVdir)
  24. # for MSI
  25. MSI_dir = os.path.join(inputpath, '3MSI_msisensor2_pair')
  26. if not os.path.exists(MSI_dir):
  27. os.mkdir(MSI_dir)
  28. # for germline result
  29. gemerlinedir = os.path.join(inputpath, '4Germline_unpair')
  30. if not os.path.exists(gemerlinedir):
  31. os.mkdir(gemerlinedir)
  32. # for HL result
  33. HLdir = os.path.join(inputpath, '5HL_gatk_unpair')
  34. if not os.path.exists(HLdir):
  35. os.mkdir(HLdir)
  36. # for fusion
  37. fusion_method1_dir = os.path.join(inputpath, '6Fusion_manta_pair')
  38. if not os.path.exists(fusion_method1_dir):
  39. os.mkdir(fusion_method1_dir)
  40. fusion_method2_dir = os.path.join(inputpath, '6Fusion_genefusion_unpair')
  41. if not os.path.exists(fusion_method2_dir):
  42. os.mkdir(fusion_method2_dir)
  43. #for HLA
  44. HLA_dir = os.path.join(inputpath, '7HLA-HD_unpair')
  45. if not os.path.exists(HLA_dir):
  46. os.mkdir(HLA_dir)
  47. # for qc
  48. qc_dir = os.path.join(inputpath, '8Fastqc')
  49. if not os.path.exists(qc_dir):
  50. os.mkdir(qc_dir)
  51. # for ontarget
  52. ontarget_dir = os.path.join(inputpath, '9Ontarget')
  53. if not os.path.exists(ontarget_dir):
  54. os.mkdir(ontarget_dir)
  55. # for coverage
  56. coverage_dir = os.path.join(inputpath, '10Coverage')
  57. if not os.path.exists(coverage_dir):
  58. os.mkdir(coverage_dir)
  59. # datasummary
  60. datasummary_dir = os.path.join(inputpath, 'datasummary')
  61. if not os.path.exists(datasummary_dir):
  62. os.mkdir(datasummary_dir)
  63. def tumortyple(inputpath,laneid):
  64. sampledir = os.path.join(inputpath, laneid + '_sample_infor_label.txt')
  65. sampleinfor = pd.read_table(sampledir, sep='\t', header=0)
  66. for i in range(len(sampleinfor)):
  67. sample = sampleinfor.loc[i, 'samplename']
  68. # make the result dir
  69. result_dir = os.path.join(inputpath, 'resultfile')
  70. if not os.path.exists(result_dir):
  71. os.mkdir(result_dir)
  72. sample_dir = os.path.join(result_dir, sample)
  73. if not os.path.exists(sample_dir):
  74. os.mkdir(sample_dir)
  75. sampledata = pd.DataFrame()
  76. sampledata.loc[0, 'sampleid'] = sample
  77. sampledata.loc[0, 'tumortype'] = sampleinfor.loc[i, 'tumortype']
  78. outputfile = os.path.join(sample_dir, laneid + '-' + sample + '.tumortype.xlsx')
  79. writer = pd.ExcelWriter(outputfile)
  80. sampledata.to_excel(writer, sheet_name='tumortype', index=False)
  81. writer.save()
  82. writer.close()
  83. def tumortyperunmain(inputpath,laneid):
  84. analis_dir(inputpath,laneid)
  85. tumortyple(inputpath,laneid)
  86. #inputpath='/cgdata/liuxiangqiong/work62pancancer/pipelinetest-CGB0158'
  87. #laneid='CGB0158'
  88. #tumortyperunmain(inputpath,laneid)
  89. if __name__=='__main__':
  90. parser = argparse.ArgumentParser(description='sample infor for tumortype')
  91. parser.add_argument('-i', '--inputpath', type=str, help='the path of lane')
  92. parser.add_argument('-l', '--laneid', type=str, help='laneid')
  93. args = parser.parse_args()
  94. Inputpath = args.inputpath
  95. Laneid = args.laneid
  96. tumortyperunmain(Inputpath,Laneid)