使用 selenium 下载“401 Unauthorized”视频

我正在尝试创建一个机器人,它将使用 selenium 和 python3 从名为“Sdarot”的网站下载视频。


网站中的每个视频(或剧集)都有一个唯一的页面和 URL。加载剧集时,您必须等待 30 秒才能“加载”该剧集,然后 <video> 标记才会出现在 HTML 源文件中。


问题在于,对视频的请求是以一种或另一种方式加密或保护的(我真的不明白它是如何工作的)!当我尝试简单地等待视频标签出现,然后使用 urllib 库下载视频(参见下面的代码)时,出现以下错误:urllib.error.HTTPError: HTTP Error 401: Unauthorized


我应该注意到,当我尝试打开 selenium 驱动程序中下载视频的链接时,它打开得完全正常,我可以手动下载它。


如何自动下载视频?提前致谢!


代码:


from selenium import webdriver

from selenium.webdriver.common.by import By

from selenium.webdriver.support.ui import WebDriverWait

from selenium.webdriver.support import expected_conditions as EC


import urllib.request



def load(driver, url):


    driver.get(url)  # open the page in the browser


    try:

        # wait for the episode to "load"

        # if something is wrong and the episode doesn't load after 45 seconds,

        # the function will call itself again and try to load again.

        continue_btn = WebDriverWait(driver, 45).until(

            EC.element_to_be_clickable((By.ID, "proceed"))

        )

    except:

        load(url)



def save_video(driver, filename):


    video_element = driver.find_element_by_tag_name(

        "video")  # get the video element

    video_url = video_element.get_property('src')  # get the video url

    # trying to download the video

    urllib.request.urlretrieve(video_url, filename)

    # ERROR: "urllib.error.HTTPError: HTTP Error 401: Unauthorized"



def main():


    URL = r'https://www.sdarot.dev/watch/339-%D7%94%D7%A4%D7%99%D7%92-%D7%9E%D7%95%D7%AA-ha-pijamot/season/1/episode/23'


    DRIVER = webdriver.Chrome()

    load(DRIVER, URL)

    video_url = save_video(DRIVER, "video.mp4")



if __name__ == "__main__":

    main()


慕哥9229398
浏览 84回答 1
1回答

慕哥6287543

您收到未经授权的错误,因为他们使用 cookie 来存储与您的会话相关的一些信息。具体来说,cookie 名为Sdarot. 我已经使用requests库来下载并保存视频。要点是,当您使用 selenium 打开 url 时,它工作正常,因为 selenium 使用相同的 http 客户端(浏览器),该客户端已经具有可用的 cookie 详细信息,但是当您使用 urllib 调用时,基本上它是不同的 http 客户端,因此它是对服务器。为了克服这个问题,您必须像浏览器一样提供足够的会话信息,在本例中由 cookie 维护。检查我如何提取Sdarotcookie 的值并将其应用到requests.get方法中。您也可以使用来做到这一点urllib。from selenium import webdriverfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as ECimport requestsdef load(driver, url):&nbsp; &nbsp; driver.get(url)&nbsp; # open the page in the browser&nbsp; &nbsp; try:&nbsp; &nbsp; &nbsp; &nbsp; # wait for the episode to "load"&nbsp; &nbsp; &nbsp; &nbsp; # if something is wrong and the episode doesn't load after 45 seconds,&nbsp; &nbsp; &nbsp; &nbsp; # the function will call itself again and try to load again.&nbsp; &nbsp; &nbsp; &nbsp; continue_btn = WebDriverWait(driver, 45).until(&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; EC.element_to_be_clickable((By.ID, "proceed"))&nbsp; &nbsp; &nbsp; &nbsp; )&nbsp; &nbsp; &nbsp; &nbsp; continue_btn.click()&nbsp; &nbsp; except:&nbsp; &nbsp; &nbsp; &nbsp; load(driver,url) #corrected parameter errordef save_video(driver, filename):&nbsp; &nbsp; video_element = driver.find_element_by_tag_name(&nbsp; &nbsp; &nbsp; &nbsp; "video")&nbsp; # get the video element&nbsp; &nbsp; video_url = video_element.get_property('src')&nbsp; # get the video url&nbsp; &nbsp; cookies = driver.get_cookies()&nbsp; &nbsp; #iterate all the cookies and extract cookie value named Sdarot&nbsp; &nbsp; for entry in cookies:&nbsp; &nbsp; &nbsp; &nbsp; if(entry["name"] == 'Sdarot'):&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; cookies = dict({entry["name"]:entry["value"]})&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; #set request with proper cookies&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; r = requests.get(video_url, cookies=cookies,stream = True)&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # start download&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; with open(filename, 'wb') as f:&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; for chunk in r.iter_content(chunk_size = 1024*1024):&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if chunk:&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; f.write(chunk)&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;def main():&nbsp; &nbsp; URL = r'https://www.sdarot.dev/watch/339-%D7%94%D7%A4%D7%99%D7%92-%D7%9E%D7%95%D7%AA-ha-pijamot/season/1/episode/23'&nbsp; &nbsp; DRIVER = webdriver.Chrome()&nbsp; &nbsp; load(DRIVER, URL)&nbsp; &nbsp; video_url = save_video(DRIVER, "video.mp4")if __name__ == "__main__":&nbsp; &nbsp; main()
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Python