def parse():
html = get_html(URL)
if html.status_code == 200:
phones = []
pages_count = pages(html.text)
for page in range(1, pages_count + 1):
print(f'Parsing a page {page} from {pages_count}...')
html = get_html(URL, params={'p': page})
phones.extend(get_content(html.text))
print(phones)
else:
print('Error')
嗨,我想列出项目,但出现错误
File "C:/Users/User/PycharmProjects/Parser/parser.py", line 52, in <module>
parse()
File "C:/Users/User/PycharmProjects/Parser/parser.py", line 46, in parse
phones.extend(get_content(html.text))
TypeError: 'NoneType' object is not iterab
这是所有代码:
import requests
from bs4 import BeautifulSoup
URL = 'https://comfy.ua/smartfon/'
HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0',
'accept': '*/*'}
def get_html(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
return r
def pages(html):
soup = BeautifulSoup(html, 'html.parser')
pagination = soup.find_all('li', class_='pager__number')
if pagination:
return int(pagination[-2].get_text())
else:
return 1
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
items = soup.find_all('div', class_="product-item__i")
phone = []
for item in items:
phone.append({
'title': item.find('p', class_="product-item__name").get_text(strip=True),
'link': item.find('a', class_="product-item__name-link js-gtm-product-title").get('href'),
'price': item.find('div', class_="price-box__content-i").get_text(strip=True).replace(u'\xa0', u' ')
})
print(phone)
我得到一个空列表,但应该拿到电话。我也收到一个错误。
潇潇雨雨
相关分类