1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768 |
- import pandas as pd
- import os,sys
- import re
- import argparse
- def subdir_list(dirname):
- """获取目录下所有子目录名
- @param dirname: str 目录的完整路径
- @return: list(str) 所有子目录完整路径组成的列表
- """
- return list(filter(os.path.isdir,
- map(lambda filename: os.path.join(dirname, filename),
- os.listdir(dirname))))
- def file_list(dirname, ext='.gz'):
- """获取目录下所有特定后缀的文件
- @param dirname: str 目录的完整路径
- @param ext: str 后缀名, 以点号开头
- @return: list(str) 所有子文件名(不包含路径)组成的列表
- """
- return list(filter(
- lambda filename: os.path.splitext(filename)[1] == ext,
- os.listdir(dirname)))
- def linkfile(inputpath,file0,tumor_fastq,tumor_project):
- tumorfile = [s for s in file0 if tumor_fastq in s]
- for j in range(len(tumorfile)):
- files_tumor = tumorfile[j]
- files_tumor_name = files_tumor.split('-')[-1]
- if files_tumor_name == tumor_fastq:
- file1 = file_list(files_tumor, ext='.gz')
- for m in range(len(file1)):
- file1_0 = file1[m]
- Rlabel = file1_0.split('_')[-1]
- samplenew = tumor_project + '_' + Rlabel
- samplenew_dir = os.path.join(inputpath, samplenew)
- sampleold_dir = os.path.join(files_tumor, file1_0)
- linkcmd = 'ln -s ' + sampleold_dir + ' ' + samplenew_dir
- os.system(linkcmd)
- def linkmain(inputpath,laneid,bclpath):
- file0 = subdir_list(bclpath)
- sampledir = os.path.join(inputpath, laneid + '_sample_infor_label.txt')
- sampleinfor = pd.read_table(sampledir, sep='\t', header=0)
- for i in range(len(sampleinfor)):
- # for the tumor
- case_project = sampleinfor.loc[i, 'tumor']
- case_fastq = sampleinfor.loc[i, 'fastq_tumor']
- linkfile(inputpath,file0, case_fastq, case_project)
- # for the normal
- normal_project = sampleinfor.loc[i, 'normal']
- normal_fastq = sampleinfor.loc[i, 'fastq_normal']
- linkfile(inputpath,file0, normal_fastq, normal_project)
- if __name__=='__main__':
- parser = argparse.ArgumentParser(description='sample pair')
- parser.add_argument('-i', '--inputpath', type=str, help='the path of lane')
- parser.add_argument('-l', '--laneid', type=str, help='laneid')
- parser.add_argument('-f', '--bclpath', type=str, help='the fastq file')
- args = parser.parse_args()
- Inputpath = args.inputpath
- Laneid = args.laneid
- Bclpath=args.bclpath
- linkmain(Inputpath,Laneid,Bclpath)
|