将网络抓取的表格放入excel(selenium,python)

我想将表格及其标题放入 excel 中。我尝试了很多东西,但我似乎无法弄清楚如何在 excel 中正确显示它。下面还有一张图片展示了我希望它如何理想地显示。先感谢您。


from selenium import webdriver

from selenium.webdriver.support.ui import Select

from selenium.webdriver.common.keys import Keys

from selenium.webdriver.support.ui import WebDriverWait

from selenium.webdriver.common.by import By

from selenium.webdriver.support import expected_conditions as EC


driver = webdriver.Chrome("drivers/chromedriver")

driver.get("https://web3.ncaa.org/hsportal/exec/hsAction")

Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, "state")))).select_by_visible_text("New Hampshire")

driver.find_element_by_xpath("//input[@id='city']").send_keys("Moultonborough")

driver.find_element_by_xpath("//input[@id='name']").send_keys("Moultonborough Academy")

driver.find_element_by_xpath("//input[@value='Search']").click()

WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//input[@name='hsCode']"))).click()

print([my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//table[@id='approvedCourseTable_1']//th[@class='header']")))])


table = ([my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "table#approvedCourseTable_1.tablesorter")))])


with open('out.csv', 'w', newline='') as csvfile:

    writer = csv.writer(csvfile)

    writer.writerow(table)




出于某种原因,在使用 table#approvedCourseTable_1.tablesorter 时将表格抓取到 excel 仅显示“课程”,仅此而已。当我将标题和表格内容分开时,我可以将它们分别抓取到 excel,但不能一起抓取。此外,当我设法将其抓取到 excel 时,表格内容没有正确排列。


x = ([my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "table#approvedCourseTable_1 th.header")))])

y = ([my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "table#approvedCourseTable_1 td")))])

如果可能的话,我希望它像这样显示:

http://img4.mukewang.com/64479f7c0001516c06550370.jpg

慕码人8056858
浏览 106回答 1
1回答

米脂

我有这个使用 Selenium/Python 的工作。试试下面的代码示例,from selenium import webdriverfrom selenium.webdriver.support.ui import Selectfrom selenium.webdriver.common.keys import Keysfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support import expected_conditions as ECimport csvcsvFile = open('out.csv', 'w')writer = csv.writer(csvFile)driver = webdriver.Chrome("drivers/chromedriver")driver.get("https://web3.ncaa.org/hsportal/exec/hsAction")Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, "state")))).select_by_visible_text("New Hampshire")driver.find_element_by_xpath("//input[@id='city']").send_keys("Moultonborough")driver.find_element_by_xpath("//input[@id='name']").send_keys("Moultonborough Academy")driver.find_element_by_xpath("//input[@value='Search']").click()WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//input[@name='hsCode']"))).click()print([my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//table[@id='approvedCourseTable_1']//th[@class='header']")))])#table = ([my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "table#approvedCourseTable_1.tablesorter")))])table_header = driver.find_element_by_xpath("(//table[@id='NcaaCrs_ApprovedCategory_All']//td[@class='hs_tableHeader'])[1]")print(table_header.text)writer.writerow(table_header.text)#Find All Approved Categoriesapproved_Categories = driver.find_elements_by_xpath("//div[contains(@id,'NcaaCrs_ApprovedCategory_')]")for i in range(len(approved_Categories)):    cateogry_header = driver.find_element_by_xpath("//div[contains(@id,'NcaaCrs_ApprovedCategory_"+str(i+1)+"')]//td[@class='hs_tableHeader']")    print(cateogry_header.text)    writer.writerow(cateogry_header.text)    #Find Course table header and rows    course_headers = driver.find_elements_by_xpath("//table[contains(@id,'approvedCourseTable_"+str(i+1)+"')]/thead//th")    header_val = []    for headers in course_headers:        header_val.append(headers.text)    print(header_val)    writer.writerow(header_val)    course_rows = driver.find_elements_by_xpath("//table[@id='approvedCourseTable_"+str(i+1)+"']//tbody/tr")    for j in range(len(course_rows)):        row_values = driver.find_elements_by_xpath("//table[@id='approvedCourseTable_"+str(i+1)+"']//tbody/tr["+str(j+1)+"]/td")        row_val = []        for row in row_values:            row_val.append(row.text)        print(row_val)        writer.writerow(row_val)csvFile.close()driver.quit()CSV 输出将是这样的,['Course\nWeight', 'Title', 'Notes', 'Max\nCredits', 'OK\nThrough', 'Disability\nCourse']Approved CoursesEnglish['Course\nWeight', 'Title', 'Notes', 'Max\nCredits', 'OK\nThrough', 'Disability\nCourse']['', 'AFRICAN LITERATURE', '', '', '', 'No']['', 'AMERICAN LITERATURE', '', '', '', 'No']['', 'AP ENGLISH LANGUAGE & COMPOSITION', '', '', '', 'No']['', 'AP ENGLISH LITERATURE & COMPOSITION', '', '', '', 'No']['', 'COLLEGE COMPOSITION', '', '', '', 'No']['', 'ENGLISH 9 (ENG 091/092/093)', '', '', '', 'No']['', 'ENGLISH 9/H', '', '', '', 'No']['', 'PUBLIC SPEAKING', '', '', '', 'No']['', 'WORLD STUDIES', '', '', '', 'No']['', 'WORLD STUDIES HBC', '', '', '', 'No']Social Science['Course\nWeight', 'Title', 'Notes', 'Max\nCredits', 'OK\nThrough', 'Disability\nCourse']['', 'AP WORLD HISTORY', '', '', '', 'No']['', 'ECONOMICS', '', '', '', 'No']['', 'GOVERNMENT', '', '', '', 'No']['', 'PSYCHOLOGY', '', '', '', 'No']['', 'US HISTORY', '', '', '', 'No']['', 'US HISTORY/AP', '', '', '', 'No']['', 'WORLD STUDIES', '', '', '', 'No']['', 'WORLD STUDIES HBC', '', '', '', 'No']
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Python