import sys
import time
import re
from collections import defaultdict
# 添加项目路径到系统路径
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from zipcracker_utils import extract_hash_safe, get_logger
def group_split_files(files):
"""按分卷分组, 返回 {组名: [文件列表]}"""
groups = defaultdict(list)
for file_path in files:
filename = os.path.basename(file_path)
# 常见分卷命名:
# xxx.part1.rar / xxx.part01.rar
# xxx.001 / xxx.002
# xxx.z01 / xxx.z02
match = re.match(r"(.+?)\.(part\d+|\d{2,3}|z\d+)\.(rar|zip|7z)$", filename, re.I)
if match:
base = match.group(1)
ext = match.group(3)
group_key = f"{base}.{ext}".lower()
groups[group_key].append(file_path)
else:
# 普通文件单独处理
groups[filename.lower()].append(file_path)
return groups
def detect_file_ext(main_file):
"""根据分卷文件名修正扩展名"""
f = main_file.lower()
if f.endswith((".zip.001", ".001")):
return "zip"
elif f.endswith((".rar.part1", ".part1.rar")):
return "rar"
elif f.endswith((".7z.001", ".001")):
return "7z"
else:
# 普通文件扩展名
return os.path.splitext(main_file)[1].lower().lstrip('.')
def batch_extract_hash(test_dir, output_dir):
"""批量提取哈希并保存到对应文件 (支持分卷 + 汇总 + 断点续跑)"""
logger = get_logger()
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# 获取目录下所有文件
files = []
for root, dirs, filenames in os.walk(test_dir):
for filename in filenames:
file_path = os.path.join(root, filename)
files.append(file_path)
print(f"找到 {len(files)} 个文件,开始分组处理...")
john_path = r"D:\Desktop\tools\john-1.9.0-jumbo-1-win64"
# 按分卷分组
groups = group_split_files(files)
print(f"识别出 {len(groups)} 个文件组")
all_hashes_path = os.path.join(output_dir, "all_hashes.txt")
with open(all_hashes_path, "a", encoding="utf-8") as all_f:
for idx, (group_key, group_files) in enumerate(groups.items(), 1):
# 生成单个哈希文件路径
base_name = os.path.splitext(os.path.basename(group_key))[0]
hash_out_path = os.path.join(output_dir, f"{base_name}.hash")
# 断点续跑:如果哈希文件已存在,跳过
if os.path.exists(hash_out_path):
print(f"[{idx}/{len(groups)}] 已存在,跳过: {hash_out_path}")
continue
print(f"\n[{idx}/{len(groups)}] 文件组: {group_key}")
for f in group_files:
print(f" - {f}")
# 选第一个分卷作为主文件
group_files_sorted = sorted(group_files, key=lambda x: os.path.getsize(x))
main_file = group_files_sorted[0]
# 修正扩展名
file_ext = detect_file_ext(main_file)
start_time = time.time()
try:
hash_value, hash_file, status_msg = extract_hash_safe(
john_path,
main_file,
file_ext
)
elapsed_time = time.time() - start_time
if hash_value:
# 写单文件哈希
with open(hash_out_path, "w", encoding="utf-8") as f:
f.write(hash_value.strip() + "\n")
# 写总汇总文件
all_f.write(hash_value.strip() + "\n")
print(f" ✅ 提取成功 (用时 {elapsed_time:.2f}s),已保存: {hash_out_path}")
else:
print(f" ❌ 提取失败: {status_msg}")
except Exception as e:
print(f" ⚠️ 异常错误: {str(e)}")
print(f"\n 所有哈希已提取完成,总汇总文件: {all_hashes_path}")
if __name__ == "__main__":
test_dir = r"D:\Desktop\wenjian" # 输入目录
output_dir = r"D:\Desktop\hashes" # 输出哈希目录
print(f"开始提取目录: {test_dir}")
batch_extract_hash(test_dir, output_dir)