PDF文件批量转换成图片

查看 16|回复 1
作者:guoshenmexian   
1.pdf批量转换jpg,适用子文件夹较多的文件
2.同时可以把png、jpeg的图片统一转换成jpg
3.可选择转换后是否保存原pdf文件

转换成, 图片

guoshenmexian
OP
  

[Python] 纯文本查看 复制代码import os
import fitz
from PIL import Image
def pdf_to_jpg(pdf_path, zoom_x=2.0, zoom_y=2.0, image_quality=90, keep_pdf=True):
        pdf_document = fitz.open(pdf_path)
        for page_number in range(len(pdf_document)):
            page = pdf_document.load_page(page_number)
            mat = fitz.Matrix(zoom_x, zoom_y)
            pix = page.get_pixmap(matrix=mat)
            image_name = f"{os.path.splitext(os.path.basename(pdf_path))[0]}_page{page_number + 1}.jpg"
            image_path = os.path.join(os.path.dirname(pdf_path), image_name)
            image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            image.save(image_path, "JPEG", quality=image_quality)
        pdf_document.close()
        if not keep_pdf:
            os.remove(pdf_path)
   
def png_to_jpg(png_path, image_quality=90):
    with Image.open(png_path) as img:
            rgb_img = img.convert('RGB')
            jpg_name = os.path.splitext(os.path.basename(png_path))[0] + '.jpg'
            jpg_path = os.path.join(os.path.dirname(png_path), jpg_name)
            rgb_img.save(jpg_path, 'JPEG', quality=image_quality)
    os.remove(png_path)
   
def jpeg_to_jpg(jpeg_path, image_quality=90):
    with Image.open(jpeg_path) as img:
            rgb_img = img.convert('RGB')
            jpg_name = os.path.splitext(os.path.basename(jpeg_path))[0] + '.jpg'
            jpg_path = os.path.join(os.path.dirname(jpeg_path), jpg_name)
            rgb_img.save(jpg_path, 'JPEG', quality=image_quality)
    os.remove(jpeg_path)
def convert_all_files_in_folder(folder_path, zoom_x=2.0, zoom_y=2.0, image_quality=90, keep_pdf=True):
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith('.pdf'):
                pdf_path = os.path.join(root, file)
                pdf_to_jpg(pdf_path, zoom_x, zoom_y, image_quality, keep_pdf)
            elif file.lower().endswith('.png'):
                png_path = os.path.join(root, file)
                png_to_jpg(png_path, image_quality)
            elif file.lower().endswith('.jpeg'):
                jpeg_path = os.path.join(root, file)
                jpeg_to_jpg(jpeg_path, image_quality)
if __name__ == "__main__":
    target_folder = '识别专用'
    zoom_x_factor = 2.0
    zoom_y_factor = 2.0
    save_quality = 90
    config = {
     "keep_original_pdf": True,  # 此行注释则不会保存原PDF文件
        }
    keep_original_pdf = config.get("keep_original_pdf", False)
    convert_all_files_in_folder(target_folder, zoom_x_factor, zoom_y_factor, save_quality, keep_original_pdf)
您需要登录后才可以回帖 登录 | 立即注册

返回顶部