[Python] 纯文本查看 复制代码import os import fitz from PIL import Image def pdf_to_jpg(pdf_path, zoom_x=2.0, zoom_y=2.0, image_quality=90, keep_pdf=True): pdf_document = fitz.open(pdf_path) for page_number in range(len(pdf_document)): page = pdf_document.load_page(page_number) mat = fitz.Matrix(zoom_x, zoom_y) pix = page.get_pixmap(matrix=mat) image_name = f"{os.path.splitext(os.path.basename(pdf_path))[0]}_page{page_number + 1}.jpg" image_path = os.path.join(os.path.dirname(pdf_path), image_name) image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) image.save(image_path, "JPEG", quality=image_quality) pdf_document.close() if not keep_pdf: os.remove(pdf_path) def png_to_jpg(png_path, image_quality=90): with Image.open(png_path) as img: rgb_img = img.convert('RGB') jpg_name = os.path.splitext(os.path.basename(png_path))[0] + '.jpg' jpg_path = os.path.join(os.path.dirname(png_path), jpg_name) rgb_img.save(jpg_path, 'JPEG', quality=image_quality) os.remove(png_path) def jpeg_to_jpg(jpeg_path, image_quality=90): with Image.open(jpeg_path) as img: rgb_img = img.convert('RGB') jpg_name = os.path.splitext(os.path.basename(jpeg_path))[0] + '.jpg' jpg_path = os.path.join(os.path.dirname(jpeg_path), jpg_name) rgb_img.save(jpg_path, 'JPEG', quality=image_quality) os.remove(jpeg_path) def convert_all_files_in_folder(folder_path, zoom_x=2.0, zoom_y=2.0, image_quality=90, keep_pdf=True): for root, dirs, files in os.walk(folder_path): for file in files: if file.lower().endswith('.pdf'): pdf_path = os.path.join(root, file) pdf_to_jpg(pdf_path, zoom_x, zoom_y, image_quality, keep_pdf) elif file.lower().endswith('.png'): png_path = os.path.join(root, file) png_to_jpg(png_path, image_quality) elif file.lower().endswith('.jpeg'): jpeg_path = os.path.join(root, file) jpeg_to_jpg(jpeg_path, image_quality) if __name__ == "__main__": target_folder = '识别专用' zoom_x_factor = 2.0 zoom_y_factor = 2.0 save_quality = 90 config = { "keep_original_pdf": True, # 此行注释则不会保存原PDF文件 } keep_original_pdf = config.get("keep_original_pdf", False) convert_all_files_in_folder(target_folder, zoom_x_factor, zoom_y_factor, save_quality, keep_original_pdf)