import requests
root_tag=["article", {"class":"sorted-article"}]
image_tag=["img",{"":""},"src"]
session = requests.Session()
response = session.get("https://phys.org/earth-news/", headers=headers)
webContent = response.content
for div in all_tab_data:
image_url = None
div_img = str(div)
match = re.search(r"(http(s?):)([/|.|\w|\s|-])*\.(?:jpg|gif|png|jpeg)", div_img)
if match!=None:
image_url = match.group(0)
else:
image_url = div.find(image_tag[0],image_tag[1]).get(image_tag[2])
if image_url!=None:
if image_url[0] == '/' and image_url[1] != '/':
image_url = main_url + image_url
我的图像 url 输出是output_url但图像的实际 url 是actual_url。我怎样才能抓取主图像?
吃鸡游戏
慕神8447489
相关分类