全部的代码。。。
import linecache
import re,urllib
import threading
import time
import Queue
from xml.sax import make_parser
from xml.sax import ContentHandler
class FriendHandler(ContentHandler):
isFriend=""
Friend=""
mode=""
dateCreated=""
isBirth=""
birth=""
interests=""
isposted=""
yaposted=""
def startElement(self,name,attrs):
if name=="rdf:RDF":
self.mode="person"
elif name=="foaf:knows":
self.mode="knows"
if name=="foaf:dateOfBirth":
self.isBirth=1
if name=="foaf:weblog" and self.mode=='person':
self.dateCreated=attrs.get('lj:dateCreated')
elif self.mode=="knows" and name=="foaf:nick":
self.isFriend=1
elif name=="ya:posted":
self.isposted=1
def endElement(self,name):
if name=="foaf:nick" and self.mode=="knows":
self.isFriend=""
self.mode=""
if name=="foaf:dateOfBirth":
self.isBirth=""
if name=="ya:posted":
self.isposted=""
def characters(self,content):
if self.isFriend:
self.Friend+=content+','
elif self.isBirth:
self.birth=content
elif self.isposted:
self.yaposted=content
################
def thread():
while True:
i=jobs.get()
line=linecache.getline('nick50000.txt',i)
lj=('(.*?)\n')
mat=lj.match(line)
if mat:
nick=mat.groups()[0]
try:
saxparser.parse('http://'+nick+'.')
print i
f2=file('foaf.txt','a')
f2.write(ch.data)
f2.close()
ch.data=''
except:
pass
jobs.task_done()
jobs=Queue.Queue()
limit=10
ch = FriendHandler()
saxparser = make_parser()
saxparser.setContentHandler(ch)
for n in xrange(limit):
t = threading.Thread(target=thread)
t.setDaemon(True)
t.start()
for i in xrange(1,1001):
jobs.put(i)
jobs.join()