爬取快手全部喜欢视频

查看 86|回复 11
作者:MiddleMan   
通过cookie的方式下载喜欢(红心)下的全部视频
直接上代码
[Python] 纯文本查看 复制代码#登录模块
from io import BytesIO
from PIL import Image
import base64
import requests
import aiohttp
import asyncio
import aiofiles
import random
#获取登录二维码
def get_login_qrcode():
   
    url ="https://id.kuaishou.cn/rest/c/infra/ks/qr/start"
    params = {
    'sid': 'kuaishou.server.webday7',
    'channelType': 'UNKNOWN',
    'encryptHeaders': ''
}
    headers = {
    "Accept": "*/*",
    "Accept-Encoding": "gzip, deflate, br, zstd",
    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
    "Cache-Control": "no-cache",
    "Connection": "keep-alive",
    "Content-Length": "63",
    "Content-Type": "application/x-www-form-urlencoded",
    "Cookie": "did=web_740982b1f8a4b3ccc1b1eb37ddb1261e",
    "Host": "id.kuaishou.cn",
    "Origin": "https://www.kuaishou.cn",
    "Pragma": "no-cache",
    "Referer": "https://www.kuaishou.cn/?isHome=1",
    "Sec-CH-UA": '"Chromium";v="130", "Microsoft Edge";v="130", "Not?A_Brand";v="99"',
    "Sec-CH-UA-Mobile": "?0",
    "Sec-CH-UA-Platform": '"Windows"',
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-site",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 Edg/130.0.0.0"
}
   
    data = requests.post(url, headers=headers, data=params).json()
   
    return [data['imageData'],data['qrLoginToken'],data['qrLoginSignature']]
# 解码Base64字符串
def base64_to_image(base64_string):
   
    image_data = base64.b64decode(base64_string)
   
    # 将字节数据转化为图像
    image = Image.open(BytesIO(image_data))
   
    # 返回图像对象
    return image
# 检查登录是否生效
def Check_takes_effect(qrLoginToken,qrLoginSignature):
    url = "https://id.kuaishou.com/rest/c/infra/ks/qr/scanResult"
    params = {
        'qrLoginToken': qrLoginToken,
        'qrLoginSignature': qrLoginSignature,
        'channelType': 'UNKNOWN',
        'encryptHeaders': ''
    }
    headers = {
        "Accept": "*/*",
        "Accept-Encoding": "gzip, deflate, br, zstd",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Connection": "keep-alive",
        "Content-Length": "93",
        "Content-Type": "application/x-www-form-urlencoded",
        "Host": "id.kuaishou.com",
        "Origin": "https://www.kuaishou.com",
        "Referer": "https://www.kuaishou.com/?isHome=1",
        "Sec-CH-UA": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
        "Sec-CH-UA-Mobile": "?0",
        "Sec-CH-UA-Platform": '"Windows"',
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-site",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
    response = requests.post(url, headers=headers, data=params).json()
    if response['result'] == 707:
        print("二维码已经失效,请重新获取")
        return False
    elif response['result'] == 1:
        print("已经扫码,等待确认")
        return True
   
#手机端确认登录
def Confirm_status(qrLoginToken,qrLoginSignature):
    url = "https://id.kuaishou.com/rest/c/infra/ks/qr/acceptResult"
    params = {
    "qrLoginToken": qrLoginToken,
    "qrLoginSignature": qrLoginSignature,
    "sid": "kuaishou.server.webday7",
    "channelType": "UNKNOWN",
    "encryptHeaders": ""
}
    headers = {
    "Accept": "*/*",
    "Accept-Encoding": "gzip, deflate, br, zstd",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "Connection": "keep-alive",
    "Content-Length": "121",
    "Content-Type": "application/x-www-form-urlencoded",
    "Host": "id.kuaishou.com",
    "Origin": "https://www.kuaishou.com",
    "Referer": "https://www.kuaishou.com/?isHome=1",
    "Sec-CH-UA": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
    "Sec-CH-UA-Mobile": "?0",
    "Sec-CH-UA-Platform": '"Windows"',
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-site",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
    response = requests.post(url, headers=headers, data=params).json()
    if response['result'] == 1:
        print("登录成功")
        return response['qrToken']
#返回cookie
def kuaishou_callback(qrToken):
    cookie = ""
    url = "https://id.kuaishou.com/pass/kuaishou/login/qr/callback"
    params = {
        "qrToken": qrToken,
        "sid": "kuaishou.server.webday7",
        "channelType": "UNKNOWN",
        "encryptHeaders": ""
}
    headers = {
    "accept": "*/*",
    "accept-encoding": "gzip, deflate, br, zstd",
    "accept-language": "zh-CN,zh;q=0.9",
    "cache-control": "no-cache",
    "connection": "keep-alive",
    "content-length": "354",
    "content-type": "application/x-www-form-urlencoded",
    "cookie": "did=web_110d3c68ae9762f3af4fcdc734b87449",
    "host": "id.kuaishou.com",
    "origin": "https://www.kuaishou.com",
    "pragma": "no-cache",
    "referer": "https://www.kuaishou.com/?isHome=1",
    "sec-ch-ua": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": '"Windows"',
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-site",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
    response = requests.post(url, headers=headers, data=params).json()
    userId = response['userId']
    kuaishou_server_webday7_at = response['kuaishou.server.webday7.at']
    kuaishou_server_webday7_st = response['kuaishou.server.webday7_st']
    cookie = structure_cookie(kuaishou_server_webday7_at,kuaishou_server_webday7_st,userId)
    cookie = {key: str(value) for key, value in cookie.items()}
    return cookie
#构造cookie
def structure_cookie(webday7_ph,webday7_st,userId):
    cookie = {
        'clientid':3,
        'did':'web_110d3c68ae9762f3af4fcdc734b87449',
        'kpf':'PC_WEB',
        'kpn':'KUAISHOU_VISION',
        'kuaishou.server.webday7_ph':str(webday7_ph),
        'kuaishou.server.webday7_st':str(webday7_st),
        'userId':str(userId)
    }
    return cookie
#获取jaon文件
def get_use_video_info(cookie):
    url ="https://www.kuaishou.com/graphql"
    params = {"operationName":"visionProfileLikePhotoList","variables":{"pcursor":"","page":"profile"},"query":"fragment photoContent on PhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  riskTagContent\n  riskTagUrl\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  riskTagContent\n  riskTagUrl\n}\n\nfragment feedContentWithLiveInfo on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    livingInfo\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    ...recoPhotoFragment\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionProfileLikePhotoList($pcursor: String, $page: String, $webPageArea: String) {\n  visionProfileLikePhotoList(pcursor: $pcursor, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContentWithLiveInfo\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n"}
    headers = {
    "Accept": "*/*",
    "Accept-Encoding": "gzip, deflate, br, zstd",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "Cache-Control": "no-cache",
    "Connection": "keep-alive",
    "Content-Length": "1775",
    "Content-Type": "application/json",
    #"Cookie": cookie,
    "Host": "www.kuaishou.com",
    "Origin": "https://www.kuaishou.com",
    "Pragma": "no-cache",
    "Referer": "https://www.kuaishou.com/profile/3xisyfk6x2djz44",
    "Sec-CH-UA": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
    "Sec-CH-UA-Mobile": "?0",
    "Sec-CH-UA-Platform": '"Windows"',
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-origin",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
    response = requests.post(url, headers=headers, json=params,cookies=cookie,verify=False)
    if response.status_code == 200:
        data = response.json()
        print("获取信息文件成功...")
        return data
#下载视频
async def download_video(video_url):
    filename = str(random.randint(1000,9999))+'.mp4'
    path = r"C:\Users\MiddleMan\Videos\kuaishou\\"
    video_name = path+filename
    async with aiohttp.ClientSession() as session:
        async with session.get(video_url) as response:
            if response.status == 200:
                f = await aiofiles.open(video_name, mode='wb')
                content = await response.read()
                await f.write(content)
                await f.close()
                print(f"{filename}下载成功")
            else:
                print(f"{filename}下载失败")
#主函数
async def main():
    tasks = []
   
    # 获取登录二维码
    qrcode_base64 = get_login_qrcode()
   
    # 解码二维码
    qrcode_image = base64_to_image(qrcode_base64[0])
   
    # 显示二维码
    qrcode_image.show()
   
    bool_takes_effect = Check_takes_effect(qrcode_base64[1],qrcode_base64[2])
   
    if bool_takes_effect:
        #关闭二维码
        qrcode_image.close()
        qrToken = Confirm_status(qrcode_base64[1],qrcode_base64[2])
        cookie = kuaishou_callback(qrToken)
        data = get_use_video_info(cookie)
        data_list = data['data']['visionProfileLikePhotoList']['feeds']
        print(f"共{len(data_list)}个视频开始下载")
        for result in data_list:
            #print(result['photo']['videoResource']['h264']['adaptationSet'][0]['representation'][0]['url'])
            url = result['photo']['videoResource']['h264']['adaptationSet'][0]['representation'][0]['url']
            tasks.append(asyncio.create_task(download_video(url)))
    await asyncio.wait(tasks)
if __name__ == '__main__':
    asyncio.run(main())

二维码, 视频

zzh151223   

我自己写了一个,可以正常运行,但是隔天之后必须用浏览器访问一下网页或者app才能继续获取数据,否则返回全是未登录相关的代码,我从浏览器控制台找了一些xhr尝试了一下,有个应该是登录接口,之后我每天开始爬视频时就先访问那个登录接口,正常了一天,后面又失效了。现在完全没头绪了,只能每天爬之前手动访问一下浏览器,太麻烦了
laugh68   

牛逼,这下可以把这些没营养(划掉)的视频都保存下来了,话说回来,慢脚的尺度和阿B差不多了,反而是dy现在审核比较严格了。
Chielly   

感谢分享
qq1475   

好家伙,这高低需要个营养快线
chensvip   

这个有点意思
zzt5211314   

感谢分享
cpckly   

感谢分享,测试下效果如何
Qinmuyi   

感谢楼主分享
linfenglin   

太帅了哥
您需要登录后才可以回帖 登录 | 立即注册

返回顶部