import pandas as pd import os,sys import re import argparse def subdir_list(dirname): """获取目录下所有子目录名 @param dirname: str 目录的完整路径 @return: list(str) 所有子目录完整路径组成的列表 """ return list(filter(os.path.isdir, map(lambda filename: os.path.join(dirname, filename), os.listdir(dirname)))) def file_list(dirname, ext='.gz'): """获取目录下所有特定后缀的文件 @param dirname: str 目录的完整路径 @param ext: str 后缀名, 以点号开头 @return: list(str) 所有子文件名(不包含路径)组成的列表 """ return list(filter( lambda filename: os.path.splitext(filename)[1] == ext, os.listdir(dirname))) def linkfile(inputpath,file0,tumor_fastq,tumor_project): tumorfile = [s for s in file0 if tumor_fastq in s] for j in range(len(tumorfile)): files_tumor = tumorfile[j] files_tumor_name = files_tumor.split('-')[-1] if files_tumor_name == tumor_fastq: file1 = file_list(files_tumor, ext='.gz') for m in range(len(file1)): file1_0 = file1[m] Rlabel = file1_0.split('_')[-1] samplenew = tumor_project + '_' + Rlabel samplenew_dir = os.path.join(inputpath, samplenew) sampleold_dir = os.path.join(files_tumor, file1_0) linkcmd = 'ln -s ' + sampleold_dir + ' ' + samplenew_dir os.system(linkcmd) def linkmain(inputpath,laneid,bclpath): file0 = subdir_list(bclpath) sampledir = os.path.join(inputpath, laneid + '_sample_infor_label.txt') sampleinfor = pd.read_table(sampledir, sep='\t', header=0) for i in range(len(sampleinfor)): # for the tumor case_project = sampleinfor.loc[i, 'tumor'] case_fastq = sampleinfor.loc[i, 'fastq_tumor'] linkfile(inputpath,file0, case_fastq, case_project) # for the normal normal_project = sampleinfor.loc[i, 'normal'] normal_fastq = sampleinfor.loc[i, 'fastq_normal'] linkfile(inputpath,file0, normal_fastq, normal_project) if __name__=='__main__': parser = argparse.ArgumentParser(description='sample pair') parser.add_argument('-i', '--inputpath', type=str, help='the path of lane') parser.add_argument('-l', '--laneid', type=str, help='laneid') parser.add_argument('-f', '--bclpath', type=str, help='the fastq file') args = parser.parse_args() Inputpath = args.inputpath Laneid = args.laneid Bclpath=args.bclpath linkmain(Inputpath,Laneid,Bclpath)