用scrapy爬取网易新闻时出错[新手]

items.py
importscrapy
classNews163Item(scrapy.Item):
title=scrapy.Field()
url=scrapy.Field()
source=scrapy.Field()
content=scrapy.Field()
news_spider.py
#coding:utf-8
fromscrapy.contrib.linkextractorsimportLinkExtractor
fromscrapy.contrib.spidersimportCrawlSpider,Rule
classExampleSpider(CrawlSpider):
name="news"
allowed_Domains=["news.163.com"]
start_urls=['http://news.163.com/']
rules=[
Rule(LinkExtractor(allow=r"/14/12\d+/\d+/*"),
'parse_news')
]
defparse_news(self,response):
news=News163Item()
news['title']=response.xpath("//*[@id="h1title"]/text()").extract()
news['source']=response.xpath("//*[@id="ne_article_source"]/text()").extract()
news['content']=response.xpath("//*[@id="endText"]/text()").extract()
news['url']=response.url
returnnews
cd进入所在目录后,命令行执行:
scrapycrawlnews-onews163.json
会跳出如下错误:
Traceback(mostrecentcalllast):
File"/usr/bin/scrapy",line9,in
load_entry_point('Scrapy==0.24.4','console_scripts','scrapy')()
File"/usr/lib/pymodules/python2.7/scrapy/cmdline.py",line143,inexecute
_run_print_help(parser,_run_command,cmd,args,opts)
File"/usr/lib/pymodules/python2.7/scrapy/cmdline.py",line89,in_run_print_help
func(*a,**kw)
File"/usr/lib/pymodules/python2.7/scrapy/cmdline.py",line150,in_run_command
cmd.run(args,opts)
File"/usr/lib/pymodules/python2.7/scrapy/commands/crawl.py",line57,inrun
crawler=self.crawler_process.create_crawler()
File"/usr/lib/pymodules/python2.7/scrapy/crawler.py",line87,increate_crawler
self.crawlers[name]=Crawler(self.settings)
File"/usr/lib/pymodules/python2.7/scrapy/crawler.py",line25,in__init__
self.spiders=spman_cls.from_crawler(self)
File"/usr/lib/pymodules/python2.7/scrapy/spidermanager.py",line35,infrom_crawler
sm=cls.from_settings(crawler.settings)
File"/usr/lib/pymodules/python2.7/scrapy/spidermanager.py",line31,infrom_settings
returncls(settings.getlist('SPIDER_MODULES'))
File"/usr/lib/pymodules/python2.7/scrapy/spidermanager.py",line22,in__init__
formoduleinwalk_modules(name):
File"/usr/lib/pymodules/python2.7/scrapy/utils/misc.py",line68,inwalk_modules
submod=import_module(fullpath)
File"/usr/lib/python2.7/importlib/__init__.py",line37,inimport_module
__import__(name)
File"/home/gao/news/news/spiders/news_spider.py",line15
news['title']=response.xpath("//*[@id="h1title"]/text()").extract()
^
SyntaxError:invalidsyntax
请问是哪里出错了?python新手,scrapy也是最近才用的,很生疏,求指点。
谢谢:@捏造的信仰的回答,但是更改过之后,还是有错误。
2014-12-0220:13:02+0800[news]ERROR:Spidererrorprocessing
Traceback(mostrecentcalllast):
File"/usr/lib/python2.7/dist-packages/twisted/internet/base.py",line824,inrunUntilCurrent
call.func(*call.args,**call.kw)
File"/usr/lib/python2.7/dist-packages/twisted/internet/task.py",line638,in_tick
taskObj._oneWorkUnit()
File"/usr/lib/python2.7/dist-packages/twisted/internet/task.py",line484,in_oneWorkUnit
result=next(self._iterator)
File"/usr/lib/pymodules/python2.7/scrapy/utils/defer.py",line57,in
work=(callable(elem,*args,**named)foreleminiterable)
------
File"/usr/lib/pymodules/python2.7/scrapy/utils/defer.py",line96,initer_errback
yieldnext(it)
File"/usr/lib/pymodules/python2.7/scrapy/contrib/spidermiddleware/offsite.py",line26,inprocess_spider_output
forxinresult:
File"/usr/lib/pymodules/python2.7/scrapy/contrib/spidermiddleware/referer.py",line22,in
return(_set_referer(r)forrinresultor())
File"/usr/lib/pymodules/python2.7/scrapy/contrib/spidermiddleware/urllength.py",line33,in
return(rforrinresultor()if_filter(r))
File"/usr/lib/pymodules/python2.7/scrapy/contrib/spidermiddleware/depth.py",line50,in
return(rforrinresultor()if_filter(r))
File"/usr/lib/pymodules/python2.7/scrapy/contrib/spiders/crawl.py",line67,in_parse_response
cb_res=callback(response,**cb_kwargs)or()
File"/home/gao/news/news/spiders/news_spider.py",line14,inparse_news
news=News163Item()
exceptions.NameError:globalname'News163Item'isnotdefined
请问这又是什么原因呢?
至尊宝的传说
浏览 301回答 2
2回答

慕田峪7331174

字符串外部使用的是双引号,在双引号内部还需要使用引号的话可以使用单引号。例如news['title']=response.xpath("//*[@id='h1title']/text()").extract()

慕桂英546537

字符串中的引号没有转码导致的语法错误。应该改为news['title']=response.xpath("//*[@id=\"h1title\"]/text()").extract()下面几行也是的。
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

JavaScript