import re
import os
# import requests
import urllib.request
from multiprocessing import Pool
import time
requestUrl = 'https://www.kuaishou.com/graphql'
folder_path = 'D:\kuaishou'#抓cookie ktrace-context开头的一串,通过h5快手抓取
cookie = ''
pcursor = '1'
def post(Cookie,pcursor):
data = {"operationName":"visionProfileLikePhotoList","variables":{"pcursor":pcursor,"page":"profile"},"query":"query visionProfileLikePhotoList($pcursor: String, $page: String, $webPageArea: String) {\n visionProfileLikePhotoList(pcursor: $pcursor, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n type\n author {\n id\n name\n following\n headerUrl\n headerUrls {\n cdn\n url\n __typename\n }\n __typename\n }\n tags {\n type\n name\n __typename\n }\n photo {\n id\n duration\n caption\n likeCount\n realLikeCount\n coverUrl\n coverUrls {\n cdn\n url\n __typename\n }\n photoUrls {\n cdn\n url\n __typename\n }\n photoUrl\n liked\n timestamp\n expTag\n animatedCoverUrl\n stereoType\n videoRatio\n __typename\n }\n canAddComment\n currentPcursor\n llsid\n status\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n"}
failed = {'msg': 'failed...'}
headers = {
'Host':'www.kuaishou.com',
'Connection':'keep-alive',
'Content-Length':'1261',
'accept':'*/*',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4621.0 Safari/537.36',
'content-type':'application/json',
'Origin':'https://www.kuaishou.com',
'Sec-Fetch-Site':'same-origin',
'Sec-Fetch-Mode':'cors',
'Sec-Fetch-Dest':'empty',
'Referer':'https://www.kuaishou.com/profile', #自己点赞作品主页地址
'Accept-Language':'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'Cookie':Cookie,
}
r = requests.post(requestUrl, data=json.dumps(data), headers=headers)
r.encoding = 'UTF-8'
html = r.text
return html
def down(feeds,keywork):
for feed in feeds:
filename = str(feed['photo']['duration']) + '.mp4'
filepath = folder_path + '/' + keywork + '/'
if not os.path.exists(filepath + filename):
progressbar(feed['photo']['photoUrl'],filepath,filename)
print(filename + ",下载完成")
else:
pass
print(filename + ",已存在,跳过")
def url_response(url,filepath,filename):
r = requests.get(url, stream=True)
with open(filepath, 'wb') as f:
widgets = ['Progress: ', progressbar.Percentage(), ' ',
progressbar.Bar(marker='#', left='[', right=']'),
' ', progressbar.ETA(), ' ', progressbar.FileTransferSpeed()]
pbar = progressbar.ProgressBar(widgets=widgets, maxval=total_length).start()
for chunk in response.iter_content(chunk_size=1):
if chunk:
f.write(chunk)
f.flush()
pbar.update(len(chunk) + 1)
pbar.finish()
def progressbar(url,filepath,filename):
if not os.path.exists(filepath):
os.mkdir(filepath)
start = time.time()
response = requests.get(url, stream=True)
size = 0
chunk_size = 1024
content_size = int(response.headers['content-length'])
if response.status_code == 200:
print('Start download,[File size]:{size:.2f} MB'.format(size = content_size / chunk_size / 1024))
filename = filename.replace("\n", "")
filepath = filepath + filename
try:
with open(filepath,'wb') as file:
for data in response.iter_content(chunk_size = chunk_size):
file.write(data)
size +=len(data)
print('\r' + '[下载进度]:%s%.2f%%' % ('>' * int(size * 50 / content_size), float(size / content_size * 100)) ,end=' ')
end = time.time()
print('Download completed!,times: %.2f秒' % (end - start))
except :
pass
if __name__ == "__main__":
keyWork = 'zan'
links = []
index = ''
# a = ['[44,1261][63,1299]', '[44,2237][63,2276]', '[561,1104][577,1143]', '[561,2080][577,2119]']
a = '[44,1261][63,1299]'
pattern = r'\[(\d+),(\d+)\].*\[(\d+),(\d+)\]'
# match = re.search(pattern, a)
#
# print(match.group(3))
# # 输出63
#
# print(match.group(4))
# # 输出1299
# exit()
while pcursor != False:
pcursor=index
result = post(cookie,pcursor)
data = json.loads(result)
# 判断是否还存在内容
if "visionProfileLikePhotoList" not in data['data']:
print('success')
break
# 判断是否有下一页的浮标
if data['data']['visionProfileLikePhotoList']['pcursor'] == '':
print('success')
break
# 赋值下一页的浮标
index = data['data']['visionProfileLikePhotoList']['pcursor']
feeds = data['data']['visionProfileLikePhotoList']['feeds']
flen = len(feeds)
if flen == 0:
print(data['data'])
print('no videos')
break
print(feeds)
links.append(feeds)
for link in links:
down(link,keyWork)
print('while done')