是否可以从 JSON 文件中删除重复项?

我有一个 JSON 文件,其中填充了抓取网站的数据。重复数据的次数比通常要多,例如,我提供了 JSON 文件外观的片段。是否可以删除重复项并保留第一次出现?用我的完整代码更新了最多。如果这有所作为。


# grabs all the trending quotes for that day

def getTrendingQuotes(browser):

    # wait until trending links appear, not really needed only for example

    all_trendingQuotes = WebDriverWait(browser, 10).until(

        lambda d: d.find_elements_by_css_selector('#trendingQuotes a')

    )

    return [link.get_attribute('href') for link in all_trendingQuotes]



def getStockDetails(url, browser):


    print(url)

    browser.get(url)


    quote_wrapper = browser.find_element_by_css_selector('div.quote-wrapper')

    quote_name = quote_wrapper.find_element_by_class_name(

        "quote-name").find_element_by_tag_name('h2').text

    quote_price = quote_wrapper.find_element_by_class_name("quote-price").text

    quote_volume = quote_wrapper.find_element_by_class_name(

        "quote-volume").text


    print("\n")

    print("Quote Name: " + quote_name)

    print("Quote Price: " + quote_price)

    print("Quote Volume: " + quote_volume)

    print("\n")


    convertToJson(quote_name, quote_price, quote_volume, url)



quotesArr = []


# Convert to a JSON  file



def convertToJson(quote_name, quote_price, quote_volume, url):

    quoteObject = {

        "url": url,

        "Name": quote_name,

        "Price": quote_price,

        "Volume": quote_volume

    }

    quotesArr.append(quoteObject)



def trendingBot(url, browser):

    browser.get(url)

    trending = getTrendingQuotes(browser)

    for trend in trending:

        getStockDetails(trend, browser)

    # requests finished, write json to file

    with open('trendingQuoteData.json', 'w') as outfile:

        json.dump(quotesArr, outfile)



def Main():

    scheduler = BlockingScheduler()

    chrome_options = Options()

    chrome_options.add_argument("--headless")

    # applicable to windows os only

    chrome_options.add_argument('--disable-gpu')


莫回无
浏览 232回答 2
2回答
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Python