python爬虫出了个问题
程序代码:
import re import urllib def get(url): page = urllib.urlopen(url) html = page.read() return html def getima(html): res = r'"(.*\.png)"' ress = (res) list_ima = ress.findall(html) print list_ima i = 0 for x in list_ima: urllib.urlretrieve(x,'%s.jpg' % i) i+=1 print '%s has been downloaded' % i html = get("http://mm.") getima(html)
错误信息说:
['890" height="440" src="//img.', '890" height="440" src="//img.', '890" height="440" src="//img.', '890" height="440" src="//img.', '290" height="280" src="//img.', '290" height="280" src="//img.', '290" height="280" src="//img.', 'http://gtms03.', '270" height="280" src="http://gtms04.', '270" height="280" src="http://gtms03.', '270" height="280" src="http://gtms02.', '270" height="280" src="http://gtms01.', '270" height="280" src="http://gtms03.', '270" height="280" src="http://gtms04.', '270" height="280" src="http://gtms01.', '270" height="280" src="http://gtms04.', '270" height="280" src="http://gtms01.', '115" height="126" src="http://gtms04.', '115" height="126" src="http://gtms03.', '115" height="126" src="http://gtms03.', '115" height="126" src="http://gtms02.', '115" height="126" src="http://gtms01.', '115" height="126" src="http://gtms01.', '115" height="126" src="http://gtms02.', '115" height="126" src="http://gtms04.', '115" height="126" src="http://gtms04.', '220" height="295" src="http://img03.', '220" height="295" src="http://img02.', '220" height="295" src="http://img04.', '220" height="295" src="http://img03.', '220" height="295" src="http://img02.', '220" height="295" src="http://img04.', '220" height="295" src="http://img01.', '220" height="295" src="http://img03.', '220" height="295" src="http://img02.', '220" height="295" src="http://img01.', 'http://gtms01.', '140" height="390" src="//img.', '140" height="390" src="//img.', '475" height="444" src="//img.', '140" height="390" src="//img.', '140" height="390" src="//img.', '140" height="390" src="//img.', '225" height="290" src="http://gtms02.', '100" height="100" src="http://gtms03.', '100" height="100" src="http://gtms02.', '100" height="100" src="http://gtms04.', '225" height="290" src="http://gtms01.', '100" height="100" src="http://gtms03.', '100" height="100" src="http://gtms04.', '100" height="100" src="http://gtms03.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms01.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms01.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms02.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms04.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms03.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms02.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms04.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms01.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms04.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms04.', '180" height="180" class="mm_debut_item_avatar" src="http://img03.', '180" height="180" class="mm_debut_item_avatar" src="http://img01.', '180" height="180" class="mm_debut_item_avatar" src="http://img01.', '180" height="180" class="mm_debut_item_avatar" src="http://img04.', '180" height="180" class="mm_debut_item_avatar" src="http://img02.', '238" height="238" src="http://gtms01.', '238" height="238" src="http://gtms02.', '238" height="238" src="http://gtms03.', '238" height="238" src="http://gtms04.', '238" height="238" src="http://gtms01.', '238" height="238" src="http://gtms02.', '238" height="238" src="http://gtms03.', '238" height="238" src="http://gtms04.', '238" height="238" src="http://gtms01.', '238" height="238" src="http://gtms02.', '238" height="238" src="http://gtms01.', '238" height="238" src="http://gtms04.', '238" height="238" src="http://gtms03.', '238" height="238" src="http://gtms02.', '238" height="238" src="http://gtms01.', '238" height="238" src="http://gtms04.', '238" height="238" src="http://gtms03.', '238" height="238" src="http://gtms02.', '238" height="238" src="http://gtms01.', '238" height="238" src="http://gtms02.', '238" height="238" sTraceback (most recent call last):rc="http://gtms03.', '238" height="238" src="http://gtms01.', '238" height="238" src="http://gtms04.', '238" height="238" src="http://gtms01.', '238" height="238" src="http://gtms02.', '238" height="238" src="http://gtms03.', '238" height="238" src="http://gtms04.', '383" height="140" src="http://gtms04.', '383" height="140" src="http://gtms04.', '383" height="140" src="http://gtms02.']
File "C:\Workspace\taobao.py", line 25, in <module>
getima(html)
File "C:\Workspace\taobao.py", line 19, in getima
urllib.urlretrieve(x,'%s.jpg' % i)
File "C:\Python27\lib\urllib.py", line 98, in urlretrieve
return opener.retrieve(url, filename, reporthook, data)
File "C:\Python27\lib\urllib.py", line 245, in retrieve
fp = self.open(url, data)
File "C:\Python27\lib\urllib.py", line 213, in open
return getattr(self, name)(url)
File "C:\Python27\lib\urllib.py", line 469, in open_file
return self.open_local_file(url)
File "C:\Python27\lib\urllib.py", line 483, in open_local_file
raise IOError(e.errno, e.strerror, e.filename)
IOError: [Errno 22] : '890" height="440" src="\\\\img.\\tps\\i2\\TB1glSeHFXXXXcQXFXXblszJFXX-890-440.png'
[Finished in 0.9s with exit code 1]
是哪里出问题了呢?