当我在 cmd 中编写此命令时
爬行报价 -o item.csv -au=test_user_name -ap=test_passporw_name -a urls= http://books.toscrape.com/
它显示
raise ValueError('Missing scheme in request url: %s' % self._url) ValueError: Missing scheme in request url: h
# -*- coding: utf-8 -*-
from scrapy.contrib.spiders.init import InitSpider
from scrapy.http import Request, FormRequest
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.contrib.spiders import Rule
from scrapy.utils.response import open_in_browser
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
class QuotesSpider(InitSpider):
name = 'quotes'
allowed_domains = ['quotes.toscrape.com']
login_page='http://quotes.toscrape.com/login'
start_urls = ['']
username=''
password=''
def __init__(self,u,p,urls):
self.username=u
self.password=p
self.start_urls=urls
def init_request(self):
#"""This function is called before crawling starts."""
return Request(url=self.login_page, callback=self.login)
def login(self, response):
csrf_token=response.xpath('//*[@name="csrf_token"]//@value').extract_first()
return FormRequest.from_response(response,
formdata={'csrf_token': csrf_token,
'username': self.username,
'password': self.password,
},
callback=self.check_login_response)
def check_login_response(self, response):
# open_in_browser(response)
#"""Check the response returned by a login request to see if we aresuccessfully logged in."""
if "Logout" in response.body:
self.log("\n\n\nSuccessfully logged in. Let's start crawling!\n\n\n")
# Now the crawling can begin..
return self.initialized() # ****THIS LINE FIXED THE LAST PROBLEM*****
莫回无
相关分类