代码如下:
from urllib.request import urlopenfrom urllib.request import Requestfrom urllib import parsefrom bs4 import BeautifulSoup as bsimport re# req = Request('http://www.baidu.com')req = Request('https://www.csdn.net/') req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36') resp = urlopen(req) html_doc = resp.read().decode('utf-8')# html_doc = '<html><head><title>哈哈哈哈哈</title></head><body></body></html>'soup = bs(html_doc,'xml')# print(soup.title.string)# for link in soup.findAll('a'):# print(link.string)**links = soup.findAll('a',href=re.compile("^(https://www.csdn.net/)")**for link in links: if re.search("^(_blank)$",link['target']) print(link.get_text())
报错信息:
加※行语法错误,请大神看看哪里写的不对,在线等### 问题描述
// 请把代码文本粘贴到下方(请勿用图片代替代码)
相关分类