#coding:utf8 from bs4 import BeautifulSoup import urllib2 import re import csv def get_attractions(url,data=None): web_request = urllib2.urlopen(url) web_data = web_request.read().decode('gb2312') soup = BeautifulSoup(web_data,'html.parser') div_title = soup.find_all('div',class_="title") div_content = soup.find_all('div',class_="message") for d_title,d_content in zip(div_title,div_content): data = { 'div_title':d_title.get_text(), 'div_content':d_content.get_text() } for datas in list(data.values()): a=datas.replace('\n','') b=a.replace('\r','') c=b.replace(" ",'') print c return c def write_data(data, name): file_name = name with open(file_name, 'wb') as f: f_csv = csv.writer(f) f_csv.writerows(data) url = 'http://liuyan.people.com.cn/list.php?fid=733' urls = ['http://liuyan.people.com.cn/list.php?fid=733&display=&total=112&page={}'.format(str(i)) for i in range(1,13,1)] for single_url in urls: datas = get_attractions(single_url) write_data(datas, 'liuyan.csv')
fengshunsgit
fengshunsgit
qq_迟来的秋天_04009602
weibo_漫河乡_03621568
相关分类