from bs4 import BeautifulSoup import requests import time info = [] url = 'http://cq.xiaozhu.com/search-duanzufang-p1-0/' url_save = 'http://cq.xiaozhu.com/fangzi/4783366714.html' urls = ['http://cq.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i)) for i in range(1, 14)] def get_fav(url, data=None): web_data = requests.get(url) time.sleep(2) Soup = BeautifulSoup(web_data.text, 'lxml') webs = Soup.select('#page_list > ul > li > a[target="_blank"]') if data is None: for web in webs: print(web.get('href')) def get_info(url_1, data=None): web_data = requests.get(url_1) Soup = BeautifulSoup(web_data.text, 'lxml') titles = Soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em') sites = Soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p') prices = Soup.select('#pricePart > div.day_l > span') images = Soup.select('#curBigImage') avatars = Soup.select('#floatRightBox > div.js_box.clearfix > div.member_pic > a > img') names = Soup.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a') sexs = Soup.select('#floatRightBox > div.js_box.clearfix > div.member_pic > div') if data is None: for title, site, price, image, avatar, name, sex in zip(titles, sites, prices, images, avatars, names, sexs): data = { 'title': title.get_text(), 'site': site.get('title'), 'price': price.get_text(), 'image': image.get('src'), 'avatar': avatar.get('src'), 'name': name.get_text(), } print(data) for single in urls: get_fav(single)
运行效果出来应该是每个租房链接中,租房名称,租房价格,租房地点,户主照片,户主ID等信息”
qq_幻梦_7
相关分类