datafile_cpsamplefile_sample_v0_20220929_finish.py 3.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. from typing import Any
  2. import pandas as pd
  3. import os,sys
  4. import argparse
  5. ####补充拷贝样本表
  6. def sampleresult_cpfile(inputpath,laneid,sampleid):
  7. pancancer_dir = os.path.join('/cgdata/pancancer_report', laneid)
  8. if not os.path.exists(pancancer_dir):
  9. os.mkdir(pancancer_dir)
  10. pancancer_result_dir = os.path.join(pancancer_dir, 'resultfile')
  11. if not os.path.exists(pancancer_result_dir):
  12. os.mkdir(pancancer_result_dir)
  13. result_dir = os.path.join(inputpath, 'resultfile')
  14. #拷贝样本表
  15. sampletable=os.path.join(inputpath,laneid+'_sample_infor_label.txt')
  16. #samplecp=os.path.join(pancancer_result_dir,laneid+'_sample_infor_labeltest.txt')
  17. #cp_samplefile = 'cp -r ' + sampletable + ' ' + samplecp
  18. #os.system(cp_samplefile)
  19. #拷贝所有的结果
  20. sample_dir = os.path.join(result_dir, sampleid)
  21. files = os.listdir(sample_dir)
  22. filefile = [laneid + '-' + sampleid + '.qc.xlsx', laneid + '-' + sampleid + '.snv.xlsx',
  23. laneid + '-' + sampleid + '.tmb.xlsx', laneid + '-' + sampleid + '.germline.xlsx',
  24. laneid + '-' + sampleid + '.cnv.xlsx', laneid + '-' + sampleid + '.chemical.xlsx',
  25. laneid + '-' + sampleid + '.fusion.xlsx', laneid + '-' + sampleid + '.fusion.html',
  26. laneid + '-' + sampleid + '.msi.xlsx', laneid + '-' + sampleid + '.hla.xlsx']
  27. k = 0
  28. for i in range(len(files)):
  29. samplefile = files[i]
  30. for j in range(len(filefile)):
  31. aimfile = filefile[j]
  32. if samplefile == aimfile:
  33. k = k + 1
  34. if k == 10:
  35. #pancancer_result_sample_dir = os.path.join(pancancer_result_dir, sampleid)
  36. #if not os.path.exists(pancancer_result_sample_dir):
  37. # os.mkdir(pancancer_result_sample_dir)
  38. cp_allfile = 'cp -r ' + sample_dir + ' ' + pancancer_result_dir
  39. os.system(cp_allfile)
  40. else:
  41. ###record all the data
  42. rawfilefile1 = pd.DataFrame(filefile)
  43. rawfilefile1.columns = ['file']
  44. actualfiles = pd.DataFrame(files)
  45. actualfiles.columns = ['file']
  46. actualfiles['label'] = 'PASS'
  47. logmerge = pd.merge(rawfilefile1, actualfiles, on='file', how='left')
  48. logmerge['label'] = logmerge['label'].fillna('Fail')
  49. temp_dir = os.path.join(inputpath, 'tempfile')
  50. cpfilelog_dir = os.path.join(temp_dir, 'cpfile_log')
  51. if not os.path.exists(cpfilelog_dir):
  52. os.mkdir(cpfilelog_dir)
  53. outputname = os.path.join(cpfilelog_dir, laneid + '_' + sampleid + '_fail.log')
  54. logmerge.to_csv(outputname, index=False, header=True, encoding='gbk', sep='\t')
  55. def cpmain(inputpath,laneid):
  56. sampledir = os.path.join(inputpath, laneid + '_sample_infor_label.txt')
  57. samplelist = pd.read_table(sampledir, sep='\t', header=0)
  58. for i in range(len(samplelist)):
  59. sampleid = samplelist.loc[i, 'samplename']
  60. print(sampleid)
  61. sampleresult_cpfile(inputpath, laneid, sampleid)
  62. if __name__=='__main__':
  63. parser = argparse.ArgumentParser(description='cp the data')
  64. parser.add_argument('-i', '--inputpath', type=str, help='the path of lane')
  65. parser.add_argument('-l', '--laneid', type=str, help='laneid')
  66. parser.add_argument('-s', '--sampleid', type=str, help='sampleid')
  67. args = parser.parse_args()
  68. Inputpath = args.inputpath
  69. Laneid = args.laneid
  70. Sampleid=args.sampleid
  71. sampleresult_cpfile(Inputpath,Laneid,Sampleid)