https://anxia.com/s/swhrgql33re?password=ef42#
006-annas_archive_data__aacid__duxiu_files__20240613T170636Z--20240613T170637Z
访问码:ef42
复制这段内容,可在115生活APP中直接打开!
离线完后,可以用python填充下每个文件的ss号,这个文件加后缀.pdf就可以正常看了,处理ss号的python脚本也列下
[Python] 纯文本查看 复制代码import PyPDF2
import re
import os
import sys
path=sys.argv[1]
print(path)
#获取 PDF 信息
for root, dirs, files in os.walk(path, False):
for name in files:
print(name)
namelike=re.search('\_[0-9]{8}$',name)
if namelike is not None :
print("已经修改过文件名,跳出本次循环")
continue
print("继续处理...")
pdfFile = open(os.path.join(path,name),'rb')
pdfObj = PyPDF2.PdfReader(pdfFile)
page_count = len(pdfObj.pages)
#提取文本
text = pdfObj.pages[page_count-1]
ssobj=re.search('filename_decoded.*',text.extract_text())
print("ssobj:")
print(ssobj)
if ssobj is not None and ssobj.group():
ssid=re.search('\d+',re.search('\"filename_decoded\"\:[ \n]{0,}[\"]{1}.*[\r\n0-9a-zA-Z \_\-\.]{0,}.*[\r\n0-9a-zA-Z \_\-\.]{0,}.*[\r\n0-9a-zA-Z \_\-\.]{0,}.*\",',text.extract_text()).group())
if ssid is not None and ssid.group():
print(ssid.group())
os.rename(os.path.join(path,name),os.path.join(path,name+"_"+str(ssid.group())))