from urllib import request
import urllib
from bs4 import BeautifulSoup
import xlwt
import re
book=xlwt.Workbook(encoding='utf-8',style_compression=0)
sheet1=book.add_sheet('mymovie',cell_overwrite_ok=True)
url='http://www.meijuworld.com/category/uk'
req = request.Request(url)
req.add_header('user-agentkk','Mozilla/5.0')
response = request.urlopen(req)
html_doc = response.read()
wholepage = BeautifulSoup(html_doc,'html.parser',from_encoding='UTF-8')
meiju = wholepage.find_all('div',class_='an-widget-title')
ds = re.findall('http://www.meijuworld.com/.*.html',str(meiju))
for i in ds:
print(i)
print('ok')
#输出的结果是把美剧网页面的另外五个链接也抓到了,我不想要这前面五个链接,怎么去除,输出的时候只要后面的12个链接
格瑞克爱鲜荔枝
相关分类