【爬虫】爬取b站目标视频

查看 70|回复 9
作者:Derik   
内附接口,修改目标url即可下载目标视频
使用者需自行下载ffmpeg并修改代码中的路径
(仅做学习,如有侵权,请私信)
[Python] 纯文本查看 复制代码import os
import re
import json
import subprocess
import requests
import fake_useragent
from lxml import etree

ua = fake_useragent.UserAgent().random
headers = {
    'referer': 'https://www.bilibili.com/',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'
}


def get_url(url):
    # 爬取视频页网页源代码
    r = requests.get(url, headers=headers)

    # 提取视频和音频的播放地址
    info = re.findall('window.__playinfo__=(.*?)', r.text)[0]
    video_url = json.loads(info)['data']['dash']['video'][0]['baseUrl']
    audio_url = json.loads(info)['data']['dash']['audio'][0]['baseUrl']
    html = etree.HTML(r.text)
    filename = html.xpath('//h1/text()')[0]
    return filename, video_url, audio_url


# 下载并保存音频和视频
def download(name, video_url, audio_url):
    video_content = requests.get(video_url, headers=headers).content
    audio_content = requests.get(audio_url, headers=headers).content
    path = os.getcwd()
    with open(f'{path}/{name}.mp4', 'wb') as f:
        f.write(video_content)
        print("已下载视频部分")
    with open(f'{path}/{name}.mp3', 'wb') as f:
        f.write(audio_content)
        print("已下载音频部分")


# 合并音频和视频
def combine(name):
    path = os.getcwd()
    cmd = [
        r"C:\download_solftware\ffmpeg-7.0.2-full_build\bin\ffmpeg",   //自行下载,并修改此处路径
        "-i", f"{path}\\{name}.mp4",
        "-i", f"{path}\\{name}.mp3",
        "-c:v", "copy",
        "-c:a", "aac",
        "-strict", "experimental",
        f"{path}\\output-{name}.mp4"
    ]
    subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    print("已完成合并")
    # 删除源文件
    os.remove(f"{path}\\{name}.mp4")
    os.remove(f"{path}\\{name}.mp3")


if __name__ == '__main__':
    url = 'https://www.bilibili.com/video/BV1Tf421i7fV/?spm_id_from=333.1007.tianma.4-2-12.click'
    name, video_url, audio_url = get_url(url)
    download(name, video_url, audio_url)
    combine(name)

视频, 音频

hui2002   

我尝试了优化了下代码,楼主请指教!!
import os
import re
import json
import requests
from lxml import etree
# 使用fake_useragent库生成随机的User-Agent
from fake_useragent import UserAgent
# 初始化User-Agent
ua = UserAgent().random
# 设置请求头
headers = {
    'User-Agent': ua
}
def get_url(url):
    # 爬取视频页网页源代码
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # 确保请求成功
    # 提取视频和音频的播放地址
    info = re.search(r'window.__playinfo__=(.*?)', response.text)
    if not info:
        raise ValueError("无法找到播放信息")
    video_url = json.loads(info.group(1))['data']['dash']['video'][0]['baseUrl']
    audio_url = json.loads(info.group(1))['data']['dash']['audio'][0]['baseUrl']
    html = etree.HTML(response.text)
    filename = html.xpath('//h1/text()')[0].strip()  # 使用strip()去除可能的空白字符
    return filename, video_url, audio_url
def download(name, video_url, audio_url):
    # 下载并保存音频和视频
    for url, file_type in [(video_url, 'mp4'), (audio_url, 'mp3')]:
        content = requests.get(url, headers=headers).content
        with open(f'{os.getcwd()}/{name}.{file_type}', 'wb') as f:
            f.write(content)
            print(f"已下载{name}的{file_type}部分")
def combine(name):
    # 合并音频和视频
    cmd = [
        r"C:\download_solftware\ffmpeg-7.0.2-full_build\bin\ffmpeg",  # 确保路径正确
        "-i", f"{os.getcwd()}/{name}.mp4",
        "-i", f"{os.getcwd()}/{name}.mp3",
        "-c:v", "copy",
        "-c:a", "aac",
        "-strict", "experimental",
        f"{os.getcwd()}\\output-{name}.mp4"
    ]
    try:
        subprocess.run(cmd, check=True)
        print("已完成合并")
    except subprocess.CalledProcessError as e:
        print(f"合并失败: {e}")
    finally:
        # 删除源文件
        os.remove(f"{os.getcwd()}\\{name}.mp4")
        os.remove(f"{os.getcwd()}\\{name}.mp3")
if __name__ == '__main__':
    url = 'https://www.bilibili.com/video/your_video_id'  # 替换为实际的视频ID
    name, video_url, audio_url = get_url(url)
    download(name, video_url, audio_url)
    combine(name)
89684828   

感谢楼主分享,支持一下!
jinwenqing   

感谢分享 不可错过的实用工具
gxchyf1168   

url是怎么找的呀
三滑稽甲苯   

哔哩哔哩下载器有很多现成的,但是研究一下原理也是很不错的
GGxiaoyuboy   

牛啤,大佬!
darkf   

太实用了
Derik
OP
  


gxchyf1168 发表于 2024-9-21 16:04
url是怎么找的呀

你在网页打开b站具体视频的时候链接就是
cyzhaojia   

跟着学习
您需要登录后才可以回帖 登录 | 立即注册

返回顶部