我正在尝试从维基百科获取并解析一些数据以进行分析。我正在尝试提取数据来分析列中因素与幸福分数本身之间的相关性。
但该文件未按预期工作。事实上,最终的文件是空的。我尝试调试,但没有成功:
import requests
import bs4
from bs4 import BeautifulSoup
import csv
from csv import DictWriter
def get_page(url):
html_file = requests.get(url)
return html_file
def parse_html(html_file):
parsed_html = bs4.BeautifulSoup(html_file.text, 'html.parser')
t = parsed_html.find_all('table')[1]
tr = t.find_all('tr')[0]
headers = []
for header in tr:
for z in header:
if isinstance(z, bs4.element.NavigableString):
header_name = z.strip()
headers.append(header_name)
headers_original = headers
countries = []
prev_rank = ""
for ln, row in enumerate(t.find_all('tr')[2:]):
country = {}
i = 0
col_values = row.find_all('td')
while i < len(headers):
col = col_values[i]
value = col.text.strip()
if headers[i] in ('Country or region',):
value = str(value)
else:
value = float(value)
country[headers[i]] = value
i += 1
countries.append(country)
return headers, countries
def write_csv(filename, data, fieldnames):
with open(filename, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)
def main():
source = "https://en.wikipedia.org/wiki/World_Happiness_Report"
print(f'Fetching URL {source}...')
page = get_page(source)
print(f'Parsing {source}...')
headers, countries = parse_html(page)
print(f'Writing to a CSV file 1...')
write_csv('worldhappiness.csv', countries, headers)
print(f'Parsing table 3 (simpler table) {source} ...')
headers, countries = parse_html(page)
慕丝7291255
相关分类