在json文件中插入数据

代码将错误的结构 json 插入到文件中


import requests

from bs4 import BeautifulSoup

from selenium import webdriver

from selenium.webdriver.chrome.options import Options

import json


urls = {}

urls['Av'] = {'Áa', 'Bb'}


data = {}

for key, value in urls.items(): 

    for x in value: 


        url = 'https://www.google.pt/search?q=' + key + '%20' + x

        driver = webdriver.Chrome()

        driver.get(url)

        html = driver.page_source


        soup = BeautifulSoup(html, 'html.parser')

        a = soup.find("body")


        for child in a.find_all("div", {'class': 'g'}):

            h2 = child.find("span", {'class': 'Q8LRLc'})

            div = child.find("a", {'class': 'Fx4vi'})


        data[key] = []

        data[key].append({'h2': h2, 'div': div})

        print(data)


        with open("data_file.json", "a") as write_file: 

            json.dump(data, write_file, indent=4)


        driver.quit()


慕沐林林
浏览 150回答 1
1回答

qq_花开花谢_0

我看到了很多问题,大多数是当它们应该在循环之外时却在循环内,或者当它们应该在循环中时却在循环之外。您在循环内设置变量h2和,但将它们添加到循环外,因此只会添加最后一个值。divfor child in a.find_all("div", {'class': 'g'}):data此外,您在循环内为每个键初始化数据,并且应该在循环外完成,否则每次都会重新初始化。您还每次都打开文件附加到它,我只做一次。而且,你在每个循环中初始化你的驱动程序。requests并且selenium.webdriver.chrome.options.Options都是未使用的进口所以,我会这样改变它:urls = {}urls['Av'] = {'Áa', 'Bb'}data = {}driver = webdriver.Chrome()with open("data_file.json", "a") as write_file:     for key, value in urls.items():        data[key] = []. # initialize only once per key        for x in value:             url = 'https://www.google.pt/search?q=' + key + '%20' + x            driver.get(url)            html = driver.page_source            soup = BeautifulSoup(html, 'html.parser')            a = soup.find("body")            for child in a.find_all("div", {'class': 'g'}):                h2 = child.find("span", {'class': 'Q8LRLc'})                div = child.find("a", {'class': 'Fx4vi'})                data[key].append({'h2': h2, 'div': div})  # update data for every h2/div found    json.dump(data, write_file, indent=4) # This write can be done once, outside all loops!driver.quit()对我来说有点难测试,但希望能有所帮助!快乐编码!
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Python