from tkinter import filedialog, messagebox
from tkinter.ttk import Progressbar
import pdfplumber
from PyPDF2 import PdfReader, PdfWriter
import threading
def is_blank_page(page):
text = page.extract_text()
if not text or text.isspace():
images = [im for im in page.images]
if len(images) == 0:
return True
return False
def remove_blank_pages(input_pdf_path, output_pdf_path, progress_var, total_pages):
reader = PdfReader(input_pdf_path)
writer = PdfWriter()
with pdfplumber.open(input_pdf_path) as pdf:
for i in range(len(reader.pages)):
page = pdf.pages
if not is_blank_page(page):
writer.add_page(reader.pages)
# 更新进度条
progress_var.set((i + 1) / total_pages * 100)
root.update_idletasks() # 确保界面更新
with open(output_pdf_path, "wb") as output_pdf:
writer.write(output_pdf)
def select_input_file():
file_path = filedialog.askopenfilename(filetypes=[("PDF 文件", "*.pdf")])
if file_path:
input_entry.delete(0, tk.END)
input_entry.insert(0, file_path)
def select_output_path():
file_path = filedialog.asksaveasfilename(defaultextension=".pdf", filetypes=[("PDF 文件", "*.pdf")])
if file_path:
output_entry.delete(0, tk.END)
output_entry.insert(0, file_path)
def process_pdf():
input_pdf_path = input_entry.get()
output_pdf_path = output_entry.get()
if not input_pdf_path or not output_pdf_path:
messagebox.showerror("错误", "请选择输入和输出路径。")
return
try:
reader = PdfReader(input_pdf_path)
total_pages = len(reader.pages)
progress_var.set(0) # 重置进度条
progress_bar['maximum'] = 100
progress_bar['value'] = 0
# 使用线程避免GUI冻结
thread = threading.Thread(target=lambda: remove_blank_pages(input_pdf_path, output_pdf_path, progress_var, total_pages))
thread.start()
# 检查线程是否完成
def check_thread():
if thread.is_alive():
root.after(100, check_thread) # 继续检查
else:
messagebox.showinfo("成功", "空白页移除成功!")
root.after(100, check_thread)
except Exception as e:
messagebox.showerror("错误", f"发生了一个错误: {str(e)}")
# 创建主窗口
root = tk.Tk()
root.title("PDF 空白页移除工具")
# 输入文件选择
input_label = tk.Label(root, text="选择要处理的 PDF 文件:")
input_label.pack(pady=5)
input_entry = tk.Entry(root, width=50)
input_entry.pack(pady=5)
input_button = tk.Button(root, text="浏览...", command=select_input_file)
input_button.pack(pady=5)
# 输出文件选择
output_label = tk.Label(root, text="选择保存位置:")
output_label.pack(pady=5)
output_entry = tk.Entry(root, width=50)
output_entry.pack(pady=5)
output_button = tk.Button(root, text="浏览...", command=select_output_path)
output_button.pack(pady=5)
# 添加进度条
progress_var = tk.DoubleVar()
progress_bar = Progressbar(root, variable=progress_var, maximum=100)
progress_bar.pack(pady=20, fill=tk.X)
# 处理按钮
process_button = tk.Button(root, text="开始移除空白页", command=process_pdf)
process_button.pack(pady=20)
# 运行主循环
root.mainloop()
运行后截图:
微信截图_20241226092042.png (46.09 KB, 下载次数: 2)
下载附件
2024-12-26 09:52 上传
一个是需要选中处理的PDF路径,一个是保存的位置
制作背景:由于有大量excel文件需要打印,合并了EXCEL再生成PDF查看格式是否发生变化,看到合并的pdf后一堆空白页,根本删不完,而且也在网上找方法大部分都是教怎么预览删除,于是就制作了批量删除空白页的小软件,刚好同事也需要,但是她没python环境,干脆用tkinter做了简单的UI,打包后文件有点大(约60M),这个也没优化了。。将就着能用!
下载:https://wwww.lanzoue.com/iSbaB2j25jyh 密码:ar92