123456789101112131415161718192021222324252627282930313233343536373839404142434445464748 |
- import sys,collections,math,os,os.path,re
- import pandas as pd
- from pandas.core.frame import DataFrame
- import argparse
- #####计算靶区间的平均测序深度
- def coverage_sample(inputpath,sampleid):
- coveragedir = os.path.join(inputpath, '10Coverage')
- unfilterreadsdir = coveragedir + '/' + sampleid + '.cov.samtools.txt'
- inputdata1 = pd.read_table(unfilterreadsdir, header=None, sep='\t', low_memory=False, names=['chr', 'pos', 'reads'])
- targetdir = '/cgdata/liuxiangqiong/work62pancancer/pipeline/v0/refdata/NanOnco_Plus_Panel_v2.0_Covered_b37_cg.parY2X.sort.bed'
- targetdata = pd.read_table(targetdir, sep='\t', header=None,names=['chr', 'start', 'end', 'gene', 'infor1', 'strand'])
- samplecov = pd.DataFrame()
- for j in range(len(targetdata)):
- regionchr = targetdata.loc[j, 'chr']
- regionstart = targetdata.loc[j, 'start']
- regionend = targetdata.loc[j, 'end']
- regioninfor = inputdata1[
- (inputdata1['chr'] >= regionchr) & (inputdata1['pos'] >= regionstart) & (
- inputdata1['pos'] <= regionend)]
- if len(regioninfor)!=0:
- samplecov.loc[j, 'chr'] = targetdata.loc[j, 'chr']
- samplecov.loc[j, 'start'] = targetdata.loc[j, 'start']
- samplecov.loc[j, 'end'] = targetdata.loc[j, 'end']
- samplecov.loc[j, 'gene'] = targetdata.loc[j, 'gene']
- samplecov.loc[j, 'strand'] = targetdata.loc[j, 'strand']
- samplecov.loc[j, sampleid] = int(regioninfor['reads'].sum() / len(regioninfor))
- else:
- continue
- #print(samplecov)
- outputdir=os.path.join(coveragedir,sampleid+'.cov.samtools_coverage.txt')
- samplecov.to_csv(outputdir,sep='\t',index=False,header=True)
- if __name__=='__main__':
- parser = argparse.ArgumentParser(description='coverage for probe')
- parser.add_argument('-i', '--inputpath', type=str, help='the path of lane')
- parser.add_argument('-s', '--sampleid', type=str, help='samplename')
- args = parser.parse_args()
- Inputpath = args.inputpath
- Sampelid=args.sampleid
- coverage_sample(Inputpath, Sampelid)
|