如何从网页获取链接 - BeautifulSoup/Python

我想获得该网站所有链接(href)的列表 -


https://clinicaltrials.gov/ct2/results?cond=brain+tumor&term=&cntry=&state=&city=&dist=


这是我正在使用的代码,它生成一个空白列表


import requests

from bs4 import BeautifulSoup

import pandas as pd

productlinks=[]

url='https://clinicaltrials.gov/ct2/results?cond=brain+tumor&term=&cntry=&state=&city=&dist='

r=requests.get(url)

soup=BeautifulSoup(r.content,'html.parser')

section=soup.find_all('tr', class_='odd parent')

for link in section:

    productlinks.append(link.a['href'])

print(productlinks)


慕容708150
浏览 119回答 2
2回答

慕盖茨4494581

尝试使用selenium而不是requests从页面上抓取所有链接。这是执行此操作的完整代码:from selenium import webdriverfrom bs4 import BeautifulSoupimport timeproductlinks=[]url='https://clinicaltrials.gov/ct2/results?cond=brain+tumor&term=&cntry=&state=&city=&dist='driver = webdriver.Chrome()driver.get(url)time.sleep(4)html = driver.page_sourcedriver.close()soup=BeautifulSoup(html,'html5lib')a_tags = soup.find_all('a')for a in a_tags:    if a.get('href'):        productlinks.append(a.get('href'))print(productlinks)输出:['https://clinicaltrials.gov/ct2/manage-recs/resources#DataElement', 'https://prsinfo.clinicaltrials.gov/results_definitions.html#DelayResultsType', 'https://www.fda.gov/news-events/public-health-focus/expanded-access', 'https://prsinfo.clinicaltrials.gov/results_definitions.html#DelayResultsType', 'https://clinicaltrials.gov/ct2/about-site/history', 'https://clinicaltrials.gov/ct2/about-studies/learn#Participating', 'https://clinicaltrials.gov/ct2/about-studies/learn#Participating', '#main-content', 'https://www.coronavirus.gov', 'https://www.nih.gov/coronavirus', '/ct2/home', '/ct2/search/index', '/ct2/home', '/ct2/search/advanced', '/ct2/search/browse?brwse=cond_cat', '/ct2/search/map', '/ct2/help/how-find/index', '/ct2/help/how-use-search-results', '/ct2/help/how-find/find-study-results', '/ct2/help/how-read-study', '/ct2/about-studies', '/ct2/about-studies/learn', '/ct2/about-studies/other-sites', '/ct2/about-studies/glossary', '/ct2/manage-recs', '/ct2/manage-recs/submit-study', '/ct2/manage-recs/background', '/ct2/manage-recs/fdaaa', '/ct2/manage-recs/how-apply', '/ct2/manage-recs/how-register', '/ct2/manage-recs/how-edit', '/ct2/manage-recs/how-report', '/ct2/manage-recs/faq', '/ct2/manage-recs/resources', '/ct2/manage-recs/present', '/ct2/resources', '/ct2/resources/pubs', '/ct2/resources/alert', '/ct2/resources/rss', '/ct2/resources/trends', '/ct2/resources/download', '/ct2/about-site', '/ct2/about-site/new', '/ct2/about-site/background', '/ct2/about-site/results', '/ct2/about-site/history', '/ct2/about-site/modernization', '/ct2/about-site/for-media', '/ct2/about-site/link-to', '/ct2/about-site/terms-conditions', '/ct2/about-site/disclaimer', '/ct2/manage-recs/register', '/ct2/search/index', '/ct2/home', '/ct2/search/advanced', '/ct2/search/browse?brwse=cond_cat', '/ct2/search/map', '/ct2/help/how-find/index', '/ct2/help/how-use-search-results', '/ct2/help/how-find/find-study-results', '/ct2/help/how-read-study', '/ct2/about-studies', '/ct2/about-studies/learn', '/ct2/about-studies/other-sites', '/ct2/about-studies/glossary', '/ct2/manage-recs', '/ct2/manage-recs/submit-study', '/ct2/manage-recs/background', '/ct2/manage-recs/fdaaa', '/ct2/manage-recs/how-apply', '/ct2/manage-recs/how-register', '/ct2/manage-recs/how-edit', '/ct2/manage-recs/how-report', '/ct2/manage-recs/faq', '/ct2/manage-recs/resources', '/ct2/manage-recs/present', '/ct2/resources', '/ct2/resources/pubs', '/ct2/resources/alert', '/ct2/resources/rss', '/ct2/resources/trends', '/ct2/resources/download', '/ct2/about-site', '/ct2/about-site/new', '/ct2/about-site/background', '/ct2/about-site/results', '/ct2/about-site/history', '/ct2/about-site/modernization', '/ct2/about-site/for-media', '/ct2/about-site/link-to', '/ct2/about-site/terms-conditions', '/ct2/about-site/disclaimer', '/ct2/manage-recs/register', '/ct2/home', '#', '/ct2/home', '/ct2/results/refine?cond=brain+tumor', '/ct2/results/details?cond=brain+tumor', '/ct2/results/browse?cond=brain+tumor&brwse=cond_alpha_all', '/ct2/results/map?cond=brain+tumor&map=', '/ct2/results/details?cond=brain+tumor', '/ct2/resources/rss', '/ct2/resources/download', '/ct2/resources/download#DownloadAllData', '/ct2/show/NCT03286335?cond=brain+tumor&draw=2&rank=1', '/ct2/show/NCT02740933?cond=brain+tumor&draw=2&rank=2', '/ct2/show/NCT03328858?cond=brain+tumor&draw=2&rank=3', '/ct2/show/NCT02367469?cond=brain+tumor&draw=2&rank=4', '/ct2/show/NCT01627535?cond=brain+tumor&draw=2&rank=5', '/ct2/show/NCT03980431?cond=brain+tumor&draw=2&rank=6', '/ct2/show/NCT02956291?cond=brain+tumor&draw=2&rank=7', '/ct2/show/NCT02824731?cond=brain+tumor&draw=2&rank=8', '/ct2/show/results/NCT02034708?cond=brain+tumor&draw=2&rank=9', '/ct2/show/NCT02034708?cond=brain+tumor&draw=2&rank=9', '/ct2/show/NCT00557375?cond=brain+tumor&draw=2&rank=10', '#wrapper', '/ct2/help/for-patient', '/ct2/help/for-researcher', '/ct2/help/for-manager', '/ct2/home', '/ct2/resources/rss', '/ct2/sitemap', '/ct2/about-site/terms-conditions', '/ct2/about-site/disclaimer', 'https://support.nlm.nih.gov/knowledgebase/category/?id=CAT-01242&category=clinicaltrials.gov&hd_url=https%3A%2F%2Fclinicaltrials.gov%2Fct2%2Fresults%3Fcond%3Dbrain%2Btumor', 'https://www.nlm.nih.gov/copyright.html', 'https://www.nlm.nih.gov/privacy.html', '/ct2/accessibility', 'https://www.nlm.nih.gov/plugins.html', 'https://www.nih.gov/icd/od/foia/index.htm', 'https://www.usa.gov/', 'https://www.nlm.nih.gov/', 'https://www.nih.gov/', 'https://www.hhs.gov/']

拉风的咖菲猫

import reimport jsonimport requestsfrom bs4 import BeautifulSoupurl = 'https://clinicaltrials.gov/ct2/results?cond=brain+tumor&term=&cntry=&state=&city=&dist='ajax_url = 'https://clinicaltrials.gov/' + re.search(r'"url": "(.*?)"', requests.get(url).text).group(1)payload = {&nbsp; &nbsp; 'start': 0,&nbsp; &nbsp; 'length':&nbsp; 10}for payload['start'] in range(0, 100, 10):&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # <-- increase to number of pages&nbsp; &nbsp; data = requests.post(ajax_url, data=payload).json()&nbsp; &nbsp; # uncomment this to see all data:&nbsp; &nbsp; # print(json.dumps(data, indent=4))&nbsp; &nbsp; for d in data['data']:&nbsp; &nbsp; &nbsp; &nbsp; s = BeautifulSoup(d[3], 'html.parser')&nbsp; &nbsp; &nbsp; &nbsp; print( s.a['title'] )&nbsp; &nbsp; &nbsp; &nbsp; print( s.a['href'] )&nbsp; &nbsp; print('-' * 80)印刷:...--------------------------------------------------------------------------------Show study NCT00003475: Antineoplaston Therapy in Treating Patients With Primary Malignant Brain Tumors/ct2/show/NCT00003475?cond=brain+tumor&rank=81Show study NCT00949026: Assessment of Systemically Administered Torisel Delivery to Brain Tumors by Intratumoral Microdialysis/ct2/show/NCT00949026?cond=brain+tumor&rank=82Show study NCT04118426: Cognitive Function After Radiation Therapy for Brain Tumours/ct2/show/NCT04118426?cond=brain+tumor&rank=83Show study NCT03033706: Intraoperative Goal Directed Fluid Management in Supratentorial Brain Tumor Craniotomy/ct2/show/NCT03033706?cond=brain+tumor&rank=84Show study NCT00996450: Educational Follow-up in a Cohort of Children at the Royal Marsden Hospital (RMH)/ct2/show/NCT00996450?cond=brain+tumor&rank=85Show study NCT03373487: Cognitive Rehabilitation in Brain Tumor Patients After Neurosurgery/ct2/show/NCT03373487?cond=brain+tumor&rank=86Show study NCT03619694: Role of MR Spectroscopy in Brain Tumors/ct2/show/NCT03619694?cond=brain+tumor&rank=87Show study NCT00850278: Assessment of [18F]FLT-PET Imaging for Diagnosis and Prognosis of Brain Tumors/ct2/show/NCT00850278?cond=brain+tumor&rank=88Show study NCT02575521: Effect of Propofol-Dexmedetomidine on Cerebral Oxygenation and Metabolism During Brain Tumor Resection/ct2/show/NCT02575521?cond=brain+tumor&rank=89Show study NCT03248544: Therapeutic Benefit of Preoperative Supplemental Vitamin D in Patients Undergoing Brain Tumor Surgery/ct2/show/NCT03248544?cond=brain+tumor&rank=90--------------------------------------------------------------------------------Show study NCT03248544: Therapeutic Benefit of Preoperative Supplemental Vitamin D in Patients Undergoing Brain Tumor Surgery/ct2/show/NCT03248544?cond=brain+tumor&rank=91Show study NCT00418899: Gliogene: Brain Tumor Linkage Study/ct2/show/NCT00418899?cond=brain+tumor&rank=92Show study NCT03216148: 18F-FET PET in Childhood Brain Tumours/ct2/show/NCT03216148?cond=brain+tumor&rank=93Show study NCT00961922: Pediatric Research on Improving Speed, Memory and Attention/ct2/show/NCT00961922?cond=brain+tumor&rank=94Show study NCT02006563: Metabolic Tumor Volumes in Radiation Treatment of Primary Brain Tumors/ct2/show/NCT02006563?cond=brain+tumor&rank=95Show study NCT03234309: Ferumoxytol in Magnetic Resonance Imaging of Pediatric Patients With Brain Tumors/ct2/show/NCT03234309?cond=brain+tumor&rank=96Show study NCT01737671: Methotrexate Infusion Into the Fourth Ventricle in Children With Malignant Fourth Ventricular Brain Tumors: A Pilot Study/ct2/show/NCT01737671?cond=brain+tumor&rank=97Show study NCT03649880: Feasibility of FMISO in Brain Tumors/ct2/show/NCT03649880?cond=brain+tumor&rank=98Show study NCT03465618: A First in Human Study Using 89Zr-cRGDY Ultrasmall Silica Particle Tracers for Malignant Brain Tumors/ct2/show/NCT03465618?cond=brain+tumor&rank=99Show study NCT02389530: Use of Fluorescein Dye for the Removal of Brain Tumors/ct2/show/NCT02389530?cond=brain+tumor&rank=100--------------------------------------------------------------------------------...
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Python