我正在尝试获取网页部分的内容。该部分中的数据由 JavaScript 动态加载。我在这里找到了一些代码,对其进行了编辑,但是当我运行脚本时我返回None
这是代码
import bs4 as bs
import sys
import urllib.request
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
from pprint import pprint
class Page(QWebEnginePage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebEnginePage.__init__(self)
self.html = ''
self.loadFinished.connect(self._on_load_finished)
self.load(QUrl(url))
self.app.exec_()
def _on_load_finished(self):
self.html = self.toHtml(self.Callable)
print('Load finished')
def Callable(self, html_str):
self.html = html_str
self.app.quit()
def main():
page = Page('https://www.ibm.com/support/fixcentral/swg/selectFixes?parent=IBM%20Security&product=ibm/Information+Management/InfoSphere+Guardium&release=10.0&platform=Linux&function=all')
soup = bs.BeautifulSoup(page.html, 'html.parser')
section = soup.find('table', {'id' : 'DataTables_Table_0'})
pprint (section)
if __name__ == '__main__': main()
这是输出
Load finished
None
蝴蝶不菲
相关分类