使用 BeautifulSoup 进行网页抓取索引错误

使用 BeautifulSoup 获取数据时,我在某个地方遇到索引错误。我可以提取大量数据,但它在某个地方损坏了。我该如何解决?


import requests

from bs4 import BeautifulSoup

totalCar = 0

for pageNumber in range(3, 7):

    r = requests.get("https://www.autoscout24.com/lst/bmw?sort=standard&desc=0&offer=U&ustate=N%2CU&size=20&page="+

        str(pageNumber)+"&cy=D&mmm=47%7C%7C&mmm=9%7C%7C&atype=C&")

    r.status_code

    r.content

    soup = BeautifulSoup(r.content,"lxml")

    #soup.prettify

    car_details = soup.find_all("div",attrs={"class":"cl-list-element cl-list-element-gap"})

    for detail in car_details:

        car_link = "https://www.autoscout24.com"+detail.a.get("href")

        #print(car_link)

        car_r = requests.get(car_link)

        car_soup = BeautifulSoup(car_r.content,"lxml")

        car_make = car_soup.find("div",attrs={"class":"cldt-categorized-data cldt-data-section sc-pull-right"}).select("dl > dd:nth-of-type(1)")[0].text

        #car_model = car_soup.find("div",attrs={"class":"cldt-categorized-data cldt-data-section sc-pull-right"}).select("dl > dd:nth-of-type(2)")[0].text

        car_model = car_soup.find("div",attrs={"class":"cldt-categorized-data cldt-data-section sc-pull-right"}).select("dl > dd > a")[0].text

        car_year = car_soup.find("div",attrs={"class":"cldt-categorized-data cldt-data-section sc-pull-right"}).select("dl > dd > a")[1].text

        car_color = car_soup.find("div",attrs={"class":"cldt-categorized-data cldt-data-section sc-pull-right"}).select("dl > dd > a")[2].text

        car_body = car_soup.find("div",attrs={"class":"cldt-categorized-data cldt-data-section sc-pull-right"}).select("dl > dd > a")[3].text


        print("Make:{} Model:{} Year:{} Color:{} Body:{}".format(car_make,car_model,car_year,car_color,car_body))

        print("-"*20)

        totalCar+=1

    print(totalCar)


四季花海
浏览 107回答 1
1回答

DIEA

有时,车身信息不存在。您需要检查:import requestsfrom bs4 import BeautifulSouptotalCar = 0for pageNumber in range(3, 7):&nbsp; &nbsp; r = requests.get("https://www.autoscout24.com/lst/bmw?sort=standard&desc=0&offer=U&ustate=N%2CU&size=20&page="+&nbsp; &nbsp; &nbsp; &nbsp; str(pageNumber)+"&cy=D&mmm=47%7C%7C&mmm=9%7C%7C&atype=C&")&nbsp; &nbsp; r.status_code&nbsp; &nbsp; r.content&nbsp; &nbsp; soup = BeautifulSoup(r.content,"lxml")&nbsp; &nbsp; #soup.prettify&nbsp; &nbsp; car_details = soup.find_all("div",attrs={"class":"cl-list-element cl-list-element-gap"})&nbsp; &nbsp; for detail in car_details:&nbsp; &nbsp; &nbsp; &nbsp; car_link = "https://www.autoscout24.com"+detail.a.get("href")&nbsp; &nbsp; &nbsp; &nbsp; #print(car_link)&nbsp; &nbsp; &nbsp; &nbsp; car_r = requests.get(car_link)&nbsp; &nbsp; &nbsp; &nbsp; print(car_link)&nbsp; &nbsp; &nbsp; &nbsp; car_soup = BeautifulSoup(car_r.content,"lxml")&nbsp; &nbsp; &nbsp; &nbsp; car_make = car_soup.find("div",attrs={"class":"cldt-categorized-data cldt-data-section sc-pull-right"}).select("dl > dd:nth-of-type(1)")[0].text&nbsp; &nbsp; &nbsp; &nbsp; a = car_soup.find("div",attrs={"class":"cldt-categorized-data cldt-data-section sc-pull-right"}).select("dl > dd > a")&nbsp; &nbsp; &nbsp; &nbsp; car_model = a[0].text&nbsp; &nbsp; &nbsp; &nbsp; car_year = a[1].text&nbsp; &nbsp; &nbsp; &nbsp; car_color = a[2].text&nbsp; &nbsp; &nbsp; &nbsp; car_body = car_body = a[3].text if len(a) > 3 else '-'&nbsp; # <-- check, if car body information is present&nbsp; &nbsp; &nbsp; &nbsp; print("Make:{} Model:{} Year:{} Color:{} Body:{}".format(car_make,car_model,car_year,car_color,car_body))&nbsp; &nbsp; &nbsp; &nbsp; print("-"*20)&nbsp; &nbsp; &nbsp; &nbsp; totalCar+=1&nbsp; &nbsp; print(totalCar)印刷:...--------------------https://www.autoscout24.com/offers/mercedes-benz-a-180-blueefficiency-limousine-5tuerig-gasoline-grey-73cbbad4-ab1c-4163-a7cf-76037408fcb8Make:Mercedes-Benz&nbsp;Model:A 180 Year:2009 Color:Grey Body:Sedans--------------------https://www.autoscout24.com/offers/audi-a4-ambiente-1-8-ahk-xenon-sitzh-pdc-tempom-8fach-gasoline-black-f6517012-9dfb-4d93-a7dd-d0b9b9bdbbc6Make:Audi&nbsp;Model:A4 Year:2008 Color:Black Body:Sedans--------------------80
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Python