python
-*- coding: utf-8 -* import scrapy class BokeItem(scrapy.Item): url=scrapy.Field() title=scrapy.Field() content=scrapy.Field()
python
-*- coding: utf-8 -*- from scrapy.contrib.spiders import CrawlSpider ,Rule from scrapy.contrib.linkextractors import LinkExtractor from boke.items import BokeItem class BokeItem(CrawlSpider): name = 'blog' start_urls =['http://blog.sina.com.cn/s/blog_4701280b0102eo83.html'] def parse_torrent(self,response): torrent=BokeItem() torrent['url']=response.url torrent['title']=response.xpath("//h2[@class='titName SG_txta']/text()").extract()[0] torrent['content']=response.xpath("//div[@style='min-height:22px']/text()").extract()[0] return torrent
摇曳的蔷薇
慕后森
忽然笑
相关分类