s03_datasoftlink_20220915_finish.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. import pandas as pd
  2. import os,sys
  3. import re
  4. import argparse
  5. def subdir_list(dirname):
  6. """获取目录下所有子目录名
  7. @param dirname: str 目录的完整路径
  8. @return: list(str) 所有子目录完整路径组成的列表
  9. """
  10. return list(filter(os.path.isdir,
  11. map(lambda filename: os.path.join(dirname, filename),
  12. os.listdir(dirname))))
  13. def file_list(dirname, ext='.gz'):
  14. """获取目录下所有特定后缀的文件
  15. @param dirname: str 目录的完整路径
  16. @param ext: str 后缀名, 以点号开头
  17. @return: list(str) 所有子文件名(不包含路径)组成的列表
  18. """
  19. return list(filter(
  20. lambda filename: os.path.splitext(filename)[1] == ext,
  21. os.listdir(dirname)))
  22. def linkfile(inputpath,file0,tumor_fastq,tumor_project):
  23. tumorfile = [s for s in file0 if tumor_fastq in s]
  24. for j in range(len(tumorfile)):
  25. files_tumor = tumorfile[j]
  26. files_tumor_name = files_tumor.split('-')[-1]
  27. if files_tumor_name == tumor_fastq:
  28. file1 = file_list(files_tumor, ext='.gz')
  29. for m in range(len(file1)):
  30. file1_0 = file1[m]
  31. Rlabel = file1_0.split('_')[-1]
  32. samplenew = tumor_project + '_' + Rlabel
  33. samplenew_dir = os.path.join(inputpath, samplenew)
  34. sampleold_dir = os.path.join(files_tumor, file1_0)
  35. linkcmd = 'ln -s ' + sampleold_dir + ' ' + samplenew_dir
  36. os.system(linkcmd)
  37. def linkmain(inputpath,laneid,bclpath):
  38. file0 = subdir_list(bclpath)
  39. sampledir = os.path.join(inputpath, laneid + '_sample_infor_label.txt')
  40. sampleinfor = pd.read_table(sampledir, sep='\t', header=0)
  41. for i in range(len(sampleinfor)):
  42. # for the tumor
  43. case_project = sampleinfor.loc[i, 'tumor']
  44. case_fastq = sampleinfor.loc[i, 'fastq_tumor']
  45. linkfile(inputpath,file0, case_fastq, case_project)
  46. # for the normal
  47. normal_project = sampleinfor.loc[i, 'normal']
  48. normal_fastq = sampleinfor.loc[i, 'fastq_normal']
  49. linkfile(inputpath,file0, normal_fastq, normal_project)
  50. if __name__=='__main__':
  51. parser = argparse.ArgumentParser(description='sample pair')
  52. parser.add_argument('-i', '--inputpath', type=str, help='the path of lane')
  53. parser.add_argument('-l', '--laneid', type=str, help='laneid')
  54. parser.add_argument('-f', '--bclpath', type=str, help='the fastq file')
  55. args = parser.parse_args()
  56. Inputpath = args.inputpath
  57. Laneid = args.laneid
  58. Bclpath=args.bclpath
  59. linkmain(Inputpath,Laneid,Bclpath)