PDF 二值化压缩工具

import os
from tkinter import Tk, filedialog, Label, Button, StringVar, messagebox
from tkinterdnd2 import TkinterDnD, DND_FILES
from tkinter import ttk
from pypdf import PdfReader, PdfWriter
from PIL import Image, ImageEnhance
from io import BytesIO
import threading
import fitz  # PyMuPDF

def blacky(im, contrast=3, brightness=1.5, threshold=128):
    """
    调整图像对比度、亮度并进行二值化处理。
    """
    im = im.convert('L')  # 转换为灰度图像
    im = ImageEnhance.Contrast(im).enhance(contrast)  # 调整对比度
    im = ImageEnhance.Brightness(im).enhance(brightness)  # 调整亮度
    # 定义灰度界限并进行二值化
    table = [0 if i < threshold else 1 for i in range(256)]
    new_image = im.point(table, '1')
    return new_image

def process_page_to_image(page, dpi=300):
    """
    使用 PyMuPDF 将 PDF 页面渲染为图像,进行二值化处理。
    """
    try:
        pix = page.get_pixmap(dpi=dpi)  # 渲染页面为像素矩阵
        image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        return blacky(image, contrast=3, brightness=1.8, threshold=140)  # 调整参数以提高清晰度
    except Exception as e:
        print(f"Error processing page: {e}")
    return None

def process_pdf(pdf_path, writer, dpi, progress_var, progress_bar, root):
    """
    在主线程中更新进度条,处理 PDF 页面。
    """
    pdf_document = fitz.open(pdf_path)
    total_pages = len(pdf_document)
    progress_step = 100 / total_pages  # 每页处理完成后增加的进度百分比

    for i, page in enumerate(pdf_document):
        processed_image = process_page_to_image(page, dpi)
        if processed_image:
            # 将处理后的图像重新添加到 PDF
            add_image_to_writer(writer, processed_image)
        # 更新进度条
        progress = int((i + 1) * progress_step)
        progress_var.set(f"进度: {progress}%")
        progress_bar["value"] = progress
        root.update_idletasks()  # 强制刷新 UI

def add_image_to_writer(writer, image):
    """
    将处理后的图像添加到 PDF Writer。
    """
    imgbuffer = BytesIO()
    image.save(imgbuffer, format="PDF")
    imgbuffer.seek(0)
    writer.add_page(PdfReader(imgbuffer).pages[0])

def add_bookmarks_to_writer(writer, reader, outlines, parent=None):
    """
    保留书签层级结构。
    """
    for item in outlines:
        if isinstance(item, list):
            add_bookmarks_to_writer(writer, reader, item, parent)
        else:
            title = item.get('/Title')
            indirect_ref = item.get('/Page')
            page_num = get_page_number_from_indirect(reader, indirect_ref)
            if page_num is not None:
                bookmark = writer.add_outline_item(title, page_num, parent=parent)
                if '/Count' in item and item['/Count'] < 0:
                    add_bookmarks_to_writer(writer, reader, item.get('/Kids', []), parent=bookmark)

def get_page_number_from_indirect(reader, indirect_ref):
    """
    获取页面编号。
    """
    for i, page in enumerate(reader.pages):
        if page.indirect_ref == indirect_ref:
            return i
    return None

def select_file():
    """
    打开文件选择对话框。
    """
    root = Tk()
    root.withdraw()
    root.update()
    file_path = filedialog.askopenfilename(filetypes=[("PDF files", "*.pdf")])
    root.destroy()
    return file_path

def on_drop(event):
    """
    拖放文件处理。
    """
    file_path = event.data.strip()
    if file_path.endswith('.pdf'):
        threading.Thread(target=process_file, args=(file_path,)).start()
    else:
        print("请拖放一个有效的 PDF 文件。")

def process_file(file_path):
    """
    处理文件。
    """
    print(f"正在处理文件: {file_path}")
    reader = PdfReader(file_path)
    writer = PdfWriter()

    # 动态显示进度条
    progress_label.pack(pady=10)
    progress_bar.pack(pady=10)

    # 使用单线程处理 PDF 页面
    dpi = 300  # 设置渲染 DPI
    print(f"开始处理 PDF 文件...")
    progress_var.set("进度: 0%")
    progress_bar["value"] = 0
    process_pdf(file_path, writer, dpi, progress_var, progress_bar, root)

    # 写入书签
    print(f"写入书签中...")
    outlines = reader.outline
    parents = []
    add_bookmarks_to_writer(writer, reader, outlines)

    # 写入输出文件到原始文件目录
    output_file = os.path.join(os.path.dirname(file_path), "已优化_" + os.path.basename(file_path))
    with open(output_file, "wb") as f:
        writer.write(f)

    print(f"处理完成,输出文件为: {output_file}")
    messagebox.showinfo("完成", f"处理完成!文件已保存到:
{output_file}")

    # 隐藏进度条
    progress_label.pack_forget()
    progress_bar.pack_forget()

if __name__ == "__main__":
    # 创建 Tkinter 窗口以支持拖放
    root = TkinterDnD.Tk()
    root.title("PDF 二值化压缩工具")
    root.geometry("400x300")
    root.drop_target_register(DND_FILES)
    root.dnd_bind('<<Drop>>', on_drop)

    # 显示提示信息
    label = Label(root, text="拖放 PDF 文件到此窗口
或点击选择文件", font=("Arial", 12))
    label.pack(pady=20)

    # 添加按钮以选择文件
    button = Button(root, text="选择文件", command=lambda: threading.Thread(target=process_file, args=(select_file(),)).start())
    button.pack(pady=10)

    # 添加进度条(初始隐藏)
    progress_var = StringVar()
    progress_var.set("进度: 0%")
    progress_label = Label(root, textvariable=progress_var, font=("Arial", 10))

    progress_bar = ttk.Progressbar(root, orient="horizontal", length=300, mode="determinate")

    root.mainloop()

PDF 二值化压缩工具

https://wwto.lanzouu.com/iRIT72s8h0de

© 版权声明
THE END
如果内容对您有所帮助,就支持一下吧!
点赞0 分享
评论 共8条

请登录后发表评论