sample_coverage_v0_20220906.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. import sys,collections,math,os,os.path,re
  2. import pandas as pd
  3. from pandas.core.frame import DataFrame
  4. import argparse
  5. #####计算靶区间的平均测序深度
  6. def coverage_sample(inputpath,sampleid):
  7. coveragedir = os.path.join(inputpath, '10Coverage')
  8. unfilterreadsdir = coveragedir + '/' + sampleid + '.cov.samtools.txt'
  9. inputdata1 = pd.read_table(unfilterreadsdir, header=None, sep='\t', low_memory=False, names=['chr', 'pos', 'reads'])
  10. targetdir = '/cgdata/liuxiangqiong/work62pancancer/pipeline/v0/refdata/NanOnco_Plus_Panel_v2.0_Covered_b37_cg.parY2X.sort.bed'
  11. targetdata = pd.read_table(targetdir, sep='\t', header=None,names=['chr', 'start', 'end', 'gene', 'infor1', 'strand'])
  12. samplecov = pd.DataFrame()
  13. for j in range(len(targetdata)):
  14. regionchr = targetdata.loc[j, 'chr']
  15. regionstart = targetdata.loc[j, 'start']
  16. regionend = targetdata.loc[j, 'end']
  17. regioninfor = inputdata1[
  18. (inputdata1['chr'] >= regionchr) & (inputdata1['pos'] >= regionstart) & (
  19. inputdata1['pos'] <= regionend)]
  20. if len(regioninfor)!=0:
  21. samplecov.loc[j, 'chr'] = targetdata.loc[j, 'chr']
  22. samplecov.loc[j, 'start'] = targetdata.loc[j, 'start']
  23. samplecov.loc[j, 'end'] = targetdata.loc[j, 'end']
  24. samplecov.loc[j, 'gene'] = targetdata.loc[j, 'gene']
  25. samplecov.loc[j, 'strand'] = targetdata.loc[j, 'strand']
  26. samplecov.loc[j, sampleid] = int(regioninfor['reads'].sum() / len(regioninfor))
  27. else:
  28. continue
  29. #print(samplecov)
  30. outputdir=os.path.join(coveragedir,sampleid+'.cov.samtools_coverage.txt')
  31. samplecov.to_csv(outputdir,sep='\t',index=False,header=True)
  32. if __name__=='__main__':
  33. parser = argparse.ArgumentParser(description='coverage for probe')
  34. parser.add_argument('-i', '--inputpath', type=str, help='the path of lane')
  35. parser.add_argument('-s', '--sampleid', type=str, help='samplename')
  36. args = parser.parse_args()
  37. Inputpath = args.inputpath
  38. Sampelid=args.sampleid
  39. coverage_sample(Inputpath, Sampelid)