高手不敢当,刚学习,随手写了一下,不知道要匹配的具体内容,自己改正则表达式
import os
import sys
import string
import re
class
jassonjack():
"""
split a big file into two files which contains words and numbers respectively
"""
pattern_ = (r'^[a-zA-Z]\D+\d+$')
def __init__(self,srcfile):
self.srcfile = srcfile
parentname, filename = os.path.split(srcfile)
dstfilename = filename.replace(filename,'.txt','_word_number.txt')
self.dstfile = os.path.join(parentname,dstfilename)
self.wordlist = []
def split(self):
ret = False
try:
srcfd = open(self.srcfile,'r')
dstfd = open(self.dstfile,'w+')
contents = srcfd.readlines()
for content in contents:
while True:
m = jassonjack.pattern_.match(content)
if not m:
self.wordlist.append(m.group(0))
contentlist = content.split(m.group(0))
content = ''
for i in range(0,len(contentlist)-1):
content = content + contentlist[i]
else:
break;
for word in self.wordlist:
dstdf.write(word)
dstdf.write(" ")
dstdf.write("\n")
ret = True
except Exception,e:
print("the file is not existed")
return ret
if __name__ == "__main__":
srcfile = raw_input("input file:")
instance = jassonjack(srcfile)
print instance.split()