1234567891011121314151617181920212223242526 |
- import pandas as pd
- import os,sys
- import argparse
- def vcfsplit(inputdata):
- #inputdata = '/cgdata/bioproject/pancancer602gene/CGB0183/1SV_varscan_pair/Lib26611T.merge.Somatic.hc.nohead.vcf'
- dir = inputdata.split('.')
- vcfdata = pd.read_table(inputdata, sep='\t', header=None, low_memory=False)
- chrom = pd.DataFrame(vcfdata[0].drop_duplicates(keep='first'))
- chrom.reset_index(drop=True, inplace=True)
- for i in range(len(chrom)):
- chrid = chrom.loc[i, 0]
- chr_vcf = vcfdata[vcfdata[0] == chrid]
- chr_vcf.reset_index(drop=True, inplace=True)
- outputname = ('.'.join(dir[0:(len(dir) - 1)])) + '_' + str(chrid) + '.vcf'
- chr_vcf.to_csv(outputname, sep='\t', index=False, header=None)
- if __name__=='__main__':
- parser = argparse.ArgumentParser(description='filter the MSI')
- parser.add_argument('-i', '--inputdata', type=str, help='the inputdata of vcf')
- parser.add_argument('-s', '--sample', type=str, help='the sample name')
- args = parser.parse_args()
- Inputdata = args.inputdata
- vcfsplit(Inputdata)
|