我从命令 python 收集 url,然后将它的插入到 start_urls
from flask import Flask, jsonify, request
import scrapy
import subprocess
class ClassSpider(scrapy.Spider):
name = 'mySpider'
#start_urls = []
#pages = 0
news = []
def __init__(self, url, nbrPage):
self.pages = nbrPage
self.start_urls = []
self.start_urlsappend(url)
def parse(self):
...
def run(self):
subprocess.check_output(['scrapy', 'crawl', 'mySpider', '-a', f'url={self.start_urls}', '-a', f'nbrPage={self.pages}'])
return self.news
app = Flask(__name__)
data = []
@app.route('/', methods=['POST'])
def getNews():
mySpiderClass = ClassSpider(request.json['url'], 2)
return jsonify({'data': mySpider.run()})
if __name__ == "__main__":
app.run(debug=True)
我得到这个错误: raise not supported("unsupported url scheme %s: %s" % scrapy.exceptions.NotSupported: Unsupported URL scheme '': no handler available for that scheme
当我放置 a print('my urls List: ' + str(self.start_urls))时,它会打印一个 url 列表,例如 --> my urls List: ['www.googole.com']
任何帮助请
临摹微笑
相关分类