#2
皮影092015-04-22 15:54
|
程序代码:
import re
import urllib
def get(url):
page = urllib.urlopen(url)
html = page.read()
return html
def getima(html):
res = r'"(.*\.png)"'
ress = (res)
list_ima = ress.findall(html)
print list_ima
i = 0
for x in list_ima:
urllib.urlretrieve(x,'%s.jpg' % i)
i+=1
print '%s has been downloaded' % i
html = get("http://mm.")
getima(html)
import urllib
def get(url):
page = urllib.urlopen(url)
html = page.read()
return html
def getima(html):
res = r'"(.*\.png)"'
ress = (res)
list_ima = ress.findall(html)
print list_ima
i = 0
for x in list_ima:
urllib.urlretrieve(x,'%s.jpg' % i)
i+=1
print '%s has been downloaded' % i
html = get("http://mm.")
getima(html)
错误信息说:
['890" height="440" src="//img.', '890" height="440" src="//img.', '890" height="440" src="//img.', '890" height="440" src="//img.', '290" height="280" src="//img.', '290" height="280" src="//img.', '290" height="280" src="//img.', 'http://gtms03.', '270" height="280" src="http://gtms04.', '270" height="280" src="http://gtms03.', '270" height="280" src="http://gtms02.', '270" height="280" src="http://gtms01.', '270" height="280" src="http://gtms03.', '270" height="280" src="http://gtms04.', '270" height="280" src="http://gtms01.', '270" height="280" src="http://gtms04.', '270" height="280" src="http://gtms01.', '115" height="126" src="http://gtms04.', '115" height="126" src="http://gtms03.', '115" height="126" src="http://gtms03.', '115" height="126" src="http://gtms02.', '115" height="126" src="http://gtms01.', '115" height="126" src="http://gtms01.', '115" height="126" src="http://gtms02.', '115" height="126" src="http://gtms04.', '115" height="126" src="http://gtms04.', '220" height="295" src="http://img03.', '220" height="295" src="http://img02.', '220" height="295" src="http://img04.', '220" height="295" src="http://img03.', '220" height="295" src="http://img02.', '220" height="295" src="http://img04.', '220" height="295" src="http://img01.', '220" height="295" src="http://img03.', '220" height="295" src="http://img02.', '220" height="295" src="http://img01.', 'http://gtms01.', '140" height="390" src="//img.', '140" height="390" src="//img.', '475" height="444" src="//img.', '140" height="390" src="//img.', '140" height="390" src="//img.', '140" height="390" src="//img.', '225" height="290" src="http://gtms02.', '100" height="100" src="http://gtms03.', '100" height="100" src="http://gtms02.', '100" height="100" src="http://gtms04.', '225" height="290" src="http://gtms01.', '100" height="100" src="http://gtms03.', '100" height="100" src="http://gtms04.', '100" height="100" src="http://gtms03.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms01.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms01.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms02.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms04.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms03.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms02.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms04.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms01.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms04.', '180" height="180" class="mm_debut_item_avatar" src="http://gtms04.', '180" height="180" class="mm_debut_item_avatar" src="http://img03.', '180" height="180" class="mm_debut_item_avatar" src="http://img01.', '180" height="180" class="mm_debut_item_avatar" src="http://img01.', '180" height="180" class="mm_debut_item_avatar" src="http://img04.', '180" height="180" class="mm_debut_item_avatar" src="http://img02.', '238" height="238" src="http://gtms01.', '238" height="238" src="http://gtms02.', '238" height="238" src="http://gtms03.', '238" height="238" src="http://gtms04.', '238" height="238" src="http://gtms01.', '238" height="238" src="http://gtms02.', '238" height="238" src="http://gtms03.', '238" height="238" src="http://gtms04.', '238" height="238" src="http://gtms01.', '238" height="238" src="http://gtms02.', '238" height="238" src="http://gtms01.', '238" height="238" src="http://gtms04.', '238" height="238" src="http://gtms03.', '238" height="238" src="http://gtms02.', '238" height="238" src="http://gtms01.', '238" height="238" src="http://gtms04.', '238" height="238" src="http://gtms03.', '238" height="238" src="http://gtms02.', '238" height="238" src="http://gtms01.', '238" height="238" src="http://gtms02.', '238" height="238" sTraceback (most recent call last):rc="http://gtms03.', '238" height="238" src="http://gtms01.', '238" height="238" src="http://gtms04.', '238" height="238" src="http://gtms01.', '238" height="238" src="http://gtms02.', '238" height="238" src="http://gtms03.', '238" height="238" src="http://gtms04.', '383" height="140" src="http://gtms04.', '383" height="140" src="http://gtms04.', '383" height="140" src="http://gtms02.']
File "C:\Workspace\taobao.py", line 25, in <module>
getima(html)
File "C:\Workspace\taobao.py", line 19, in getima
urllib.urlretrieve(x,'%s.jpg' % i)
File "C:\Python27\lib\urllib.py", line 98, in urlretrieve
return opener.retrieve(url, filename, reporthook, data)
File "C:\Python27\lib\urllib.py", line 245, in retrieve
fp = self.open(url, data)
File "C:\Python27\lib\urllib.py", line 213, in open
return getattr(self, name)(url)
File "C:\Python27\lib\urllib.py", line 469, in open_file
return self.open_local_file(url)
File "C:\Python27\lib\urllib.py", line 483, in open_local_file
raise IOError(e.errno, e.strerror, e.filename)
IOError: [Errno 22] : '890" height="440" src="\\\\img.\\tps\\i2\\TB1glSeHFXXXXcQXFXXblszJFXX-890-440.png'
[Finished in 0.9s with exit code 1]
是哪里出问题了呢?