提取pdf原图片

作者：wzs0777 发布时间：2025-5-29 07:00:31

[Python] 纯文本查看复制代码import os
import pikepdf
import fitz  # PyMuPDF
import io
from PIL import Image
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import threading
import time
class PDFImageExtractorApp:
def __init__(self, root):
      self.root = root
      self.root.title("PDF图片提取工具")
      self.root.geometry("600x400")
      self.root.resizable(True, True)
      self.pdf_path = ""
      self.output_dir = ""
      self.password = ""
      self.is_running = False
      # 创建主框架
      main_frame = ttk.Frame(root, padding="20")
      main_frame.pack(fill=tk.BOTH, expand=True)
      # 文件选择区域
      file_frame = ttk.Frame(main_frame)
      file_frame.pack(fill=tk.X, pady=10)
      ttk.Label(file_frame, text="PDF文件:").grid(row=0, column=0, sticky=tk.W, padx=5, pady=5)
      self.file_entry = ttk.Entry(file_frame, width=50)
      self.file_entry.grid(row=0, column=1, padx=5, pady=5, sticky=tk.W + tk.E)
      ttk.Button(file_frame, text="浏览...", command=self.browse_file).grid(row=0, column=2, padx=5, pady=5)
      # 密码输入区域
      password_frame = ttk.Frame(main_frame)
      password_frame.pack(fill=tk.X, pady=5)
      ttk.Label(password_frame, text="PDF密码:").grid(row=0, column=0, sticky=tk.W, padx=5, pady=5)
      self.password_entry = ttk.Entry(password_frame, width=30, show="*")
      self.password_entry.grid(row=0, column=1, padx=5, pady=5, sticky=tk.W)
      ttk.Label(password_frame, text="(可选，如PDF有密码)").grid(row=0, column=2, sticky=tk.W, padx=5, pady=5)
      # 进度显示区域
      self.progress_var = tk.DoubleVar()
      self.progress_frame = ttk.Frame(main_frame)
      self.progress_frame.pack(fill=tk.X, pady=10)
      self.progress_bar = ttk.Progressbar(self.progress_frame, variable=self.progress_var, length=100)
      self.progress_bar.pack(fill=tk.X, padx=5, pady=5)
      # 状态显示区域
      self.status_frame = ttk.Frame(main_frame)
      self.status_frame.pack(fill=tk.BOTH, expand=True)
      self.status_text = tk.Text(self.status_frame, height=10, width=70, wrap=tk.WORD)
      self.status_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=5, pady=5)
      scrollbar = ttk.Scrollbar(self.status_frame, command=self.status_text.yview)
      scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
      self.status_text.config(yscrollcommand=scrollbar.set)
      self.status_text.config(state=tk.DISABLED)
      # 操作按钮区域
      button_frame = ttk.Frame(main_frame)
      button_frame.pack(fill=tk.X, pady=10)
      self.extract_btn = ttk.Button(button_frame, text="提取图片", command=self.start_extraction)
      self.extract_btn.pack(side=tk.RIGHT, padx=5)
def browse_file(self):
      """打开文件选择对话框"""
      file_path = filedialog.askopenfilename(
         title="选择PDF文件",
         filetypes=[("PDF文件", "*.pdf"), ("所有文件", "*.*")]
      )
      if file_path:
         self.pdf_path = file_path
         self.file_entry.delete(0, tk.END)
         self.file_entry.insert(0, file_path)
         # 自动设置输出目录为PDF同名文件夹
         pdf_dir = os.path.dirname(file_path)
         pdf_name = os.path.splitext(os.path.basename(file_path))[0]
         self.output_dir = os.path.join(pdf_dir, pdf_name)
         self.log_message(f"已选择PDF文件: {file_path}")
         self.log_message(f"图片将保存到: {self.output_dir}")
def log_message(self, message):
      """向状态文本框添加消息"""
      self.status_text.config(state=tk.NORMAL)
      self.status_text.insert(tk.END, message + "\n")
      self.status_text.see(tk.END)
      self.status_text.config(state=tk.DISABLED)
      self.root.update()
def update_progress(self, value):
      """更新进度条"""
      self.progress_var.set(value)
      self.root.update()
def start_extraction(self):
      """开始提取图片的线程"""
      if not self.pdf_path:
         messagebox.showerror("错误", "请先选择PDF文件")
         return
      if self.is_running:
         messagebox.showinfo("提示", "正在处理中，请稍候...")
         return
      self.password = self.password_entry.get()
      # 禁用提取按钮
      self.extract_btn.config(state=tk.DISABLED)
      self.is_running = True
      # 清空状态框
      self.status_text.config(state=tk.NORMAL)
      self.status_text.delete(1.0, tk.END)
      self.status_text.config(state=tk.DISABLED)
      # 启动处理线程
      self.thread = threading.Thread(target=self.process_pdf)
      self.thread.daemon = True
      self.thread.start()
      # 启动监控线程状态的方法
      self.root.after(100, self.check_thread)
def check_thread(self):
      """检查处理线程是否完成"""
      if self.thread.is_alive():
         self.root.after(100, self.check_thread)
      else:
         self.extract_btn.config(state=tk.NORMAL)
         self.is_running = False
         messagebox.showinfo("完成", "图片提取完成！")
def check_pdf_encryption(self, pdf_path):
      """检查PDF文件是否加密"""
      try:
         with pikepdf.Pdf.open(pdf_path) as pdf:
            return False  # 未加密
      except pikepdf.PasswordError:
         return True  # 已加密
def decrypt_pdf(self, pdf_path, password, output_path):
      """解密PDF文件"""
      try:
         with pikepdf.Pdf.open(pdf_path, password=password) as pdf:
            pdf.save(output_path)
         return True
      except pikepdf.PasswordError:
         self.log_message("密码错误，无法解密PDF")
         return False
def extract_images_from_pdf(self, pdf_path, output_dir):
      """提取PDF中的所有原始图片"""
      os.makedirs(output_dir, exist_ok=True)
      doc = fitz.open(pdf_path)
      image_count = 0
      total_pages = len(doc)
      for page_index in range(total_pages):
         page = doc[page_index]
         image_list = page.get_images(full=True)
         # 更新进度
         progress_value = (page_index + 1) / total_pages * 100
         self.update_progress(progress_value)
         self.log_message(f"正在处理第 {page_index + 1}/{total_pages} 页...")
         for image_index, img in enumerate(image_list):
            xref = img[0]
            # 提取原始图像数据
            base_image = doc.extract_image(xref)
            image_bytes = base_image["image"]
            image_ext = base_image["ext"]
            # 直接使用原始图像数据
            image = Image.open(io.BytesIO(image_bytes))
            image_filename = os.path.join(output_dir, f"page{page_index + 1}_img{image_index + 1}.{image_ext}")
            image.save(image_filename)
            image_count += 1
            self.log_message(f"已保存: {os.path.basename(image_filename)}")
      self.log_message(f"成功提取了 {image_count} 张原始图片")
      return image_count
def process_pdf(self):
      """处理PDF文件：检查加密，解密（如需），提取图片"""
      try:
         os.makedirs(self.output_dir, exist_ok=True)
         self.update_progress(0)
         if self.check_pdf_encryption(self.pdf_path):
            self.log_message("PDF文件已加密，尝试解密...")
            if not self.password:
                  self.log_message("错误: PDF文件已加密，但未提供密码")
                  return
            decrypted_pdf_path = os.path.join(self.output_dir, "decrypted_" + os.path.basename(self.pdf_path))
            if self.decrypt_pdf(self.pdf_path, self.password, decrypted_pdf_path):
                  self.log_message("PDF解密成功，开始提取图片...")
                  self.extract_images_from_pdf(decrypted_pdf_path, self.output_dir)
                  # 删除临时解密文件
                  try:
                     os.remove(decrypted_pdf_path)
                  except:
                     pass
            else:
                  self.log_message("PDF解密失败，无法提取图片")
         else:
            self.log_message("PDF文件未加密，直接提取图片...")
            self.extract_images_from_pdf(self.pdf_path, self.output_dir)
         self.update_progress(100)
         self.log_message(f"处理完成! 图片保存在: {self.output_dir}")
      except Exception as e:
         self.log_message(f"处理出错: {str(e)}")
         self.update_progress(0)
if __name__ == "__main__":
root = tk.Tk()
app = PDFImageExtractorApp(root)
root.mainloop()

文件, 图片

提取pdf原图片

相关帖子

热门主题

最近收BA的人很多交易了要立刻取消BA 教训

刚看了一个视频，让我又清醒了一下

小小农民新开中转站，欢迎来踩

港版安卓机是满血的国际版安卓机吗？

我 ThreeJSON 又回来了： V 友们批评得对！

继之前 5.4 的 “收口”之后， 5.6 Sol 好

折腾 homelab 挺长时间了建了一个群想不

codex 打开风扇狂转怎么办

Vibe 的一个中文起名小工具

你们明天要去看周星驰的电影么？

热门板块

公告

网站帮助 - Yoo趣儿

我们的愿景

在 Yoo趣儿投放广告

Yoo趣儿网站用户应遵守规则

提取pdf原图片

相关帖子

热门主题

最近收BA的人很多 交易了要立刻取消BA 教训

刚看了一个视频，让我又清醒了一下

小小农民新开中转站，欢迎来踩

港版安卓机是满血的国际版安卓机吗？

我 ThreeJSON 又回来了： V 友们批评得对！

继之前 5.4 的 “收口”之后， 5.6 Sol 好

折腾 homelab 挺长时间了 建了一个群 想不

codex 打开风扇狂转怎么办

Vibe 的一个中文起名小工具

你们明天要去看周星驰的电影么？

热门板块

公告

网站帮助 - Yoo趣儿

我们的愿景

在 Yoo趣儿 投放广告

Yoo趣儿网站用户应遵守规则

最近收BA的人很多交易了要立刻取消BA 教训

折腾 homelab 挺长时间了建了一个群想不

在 Yoo趣儿投放广告