>>> i=0
>>> for url in listurl:
f=open('E:/python/mi/'+str(i)+'.jpg','wb')
req=urllib2.urlopen(url)
buf=req.read()
f.write(buf)
i+=1
Traceback (most recent call last):
File "<pyshell#14>", line 3, in <module>
req=urllib2.urlopen(url)
File "C:\Program Files\IBM\SPSS\Statistics\22\Python\lib\urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "C:\Program Files\IBM\SPSS\Statistics\22\Python\lib\urllib2.py", line 384, in open
protocol = req.get_type()
File "C:\Program Files\IBM\SPSS\Statistics\22\Python\lib\urllib2.py", line 245, in get_type
raise ValueError, "unknown url type: %s" % self.__original
ValueError: unknown url type: src="//c1.mifile.cn/f/i/2014/cn/placeholder-220!110x110.png" data-src="//c1.mifile.cn/f/i/g/2015/xiaomiNOTE2-320-220!160x110.jpg" srcset="//c1.mifile.cn/f/i/g/2015/xiaomiNOTE2-320-220!320x220.jpg
我用的pycharm,今天才手写的代码,改了几个小地方,都写在注释里面,楼主看看注意到没有,谢谢。
import re
import urllib
req = urllib.request.urlopen('http://www.imooc.com/course/list')
#此处加上decode(),不然拿下来的数据都是乱码
buf = req.read().decode("utf-8")
#老师讲课的url地址已经发生改变,改一下正则匹配就好
# listurl = re.findall(r'src=.+\.jpg', buf)
listurl = re.findall(r'//img.+?\.jpg', buf)
# 改成非贪婪模式就行了
#前面没有了http:,那么这里手动加上
for index,app_id in enumerate(listurl):
listurl[index] = str(app_id).replace('//', 'http://')
print(index, listurl[index])
print(listurl)
i = 0
for url in listurl:
#写入模式修改为“wb+”,不然不支持将bytes写入,亲测
f = open(str(i)+".jpg", "wb+")
req = urllib.request.urlopen(url)
buf = req.read()
f.write(buf)
i+=1
f.close()
我也是这个情况。。。
import urllib2,re req = urllib2.urlopen('http://www.imooc.com/course/list') buf = req.read() listurl = re.findall(r'http:.+\.jpg',buf) print listurl[1] i=0 for url in listurl: f = open(str(i)+'.jpg','wb') req = urllib2.urlopen(url) buf =req.read() f.write(buf) i+=1
import urllib2,re req = urllib2.urlopen('http://www.imooc.com/course/list') buf = req.read() listurl = re.findall(r'http:.+\.jpg',buf) print listurl[1] i=0 for url in listurl: f = open(str(i)+'.jpg','wb') req = urllib2.urlopen(url) buf =req.read() f.write(buf) i+=1