import requests from bs4 import BeautifulSoup import lxml # url='http://www.qiushibaike.com/' headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'} urls = []; for i in range(1,36): url = 'http://www.qiushibaike.com/8hr/page/'+str(i)+'/?s=4940923' urls.append(url) def get_substance(url,data=None): web_data=requests.get(url,headers=headers) url_data=web_data.text.encode('utf-8') soup=BeautifulSoup(url_data,'lxml') names=soup.select('.author > a > h2') numberds=soup.select('.stats-vote > .number') contents=soup.select('.content > span') for name,numberd,content in zip (names,numberds,contents): data={ 'name':list(name.stripped_strings), 'numberd':list(numberd.stripped_strings), 'content':list(content.stripped_strings) } print(data) for single_url in urls: get_substance(single_url)
相关分类