import os
from tkinter import Tk, filedialog, Label, Button, StringVar, messagebox
from tkinterdnd2 import TkinterDnD, DND_FILES
from tkinter import ttk
from pypdf import PdfReader, PdfWriter
from PIL import Image, ImageEnhance
from io import BytesIO
import threading
import fitz # PyMuPDF
def blacky(im, contrast=3, brightness=1.5, threshold=128):
"""
调整图像对比度、亮度并进行二值化处理。
"""
im = im.convert('L') # 转换为灰度图像
im = ImageEnhance.Contrast(im).enhance(contrast) # 调整对比度
im = ImageEnhance.Brightness(im).enhance(brightness) # 调整亮度
# 定义灰度界限并进行二值化
table = [0 if i < threshold else 1 for i in range(256)]
new_image = im.point(table, '1')
return new_image
def process_page_to_image(page, dpi=300):
"""
使用 PyMuPDF 将 PDF 页面渲染为图像,进行二值化处理。
"""
try:
pix = page.get_pixmap(dpi=dpi) # 渲染页面为像素矩阵
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
return blacky(image, contrast=3, brightness=1.8, threshold=140) # 调整参数以提高清晰度
except Exception as e:
print(f"Error processing page: {e}")
return None
def process_pdf(pdf_path, writer, dpi, progress_var, progress_bar, root):
"""
在主线程中更新进度条,处理 PDF 页面。
"""
pdf_document = fitz.open(pdf_path)
total_pages = len(pdf_document)
progress_step = 100 / total_pages # 每页处理完成后增加的进度百分比
for i, page in enumerate(pdf_document):
processed_image = process_page_to_image(page, dpi)
if processed_image:
# 将处理后的图像重新添加到 PDF
add_image_to_writer(writer, processed_image)
# 更新进度条
progress = int((i + 1) * progress_step)
progress_var.set(f"进度: {progress}%")
progress_bar["value"] = progress
root.update_idletasks() # 强制刷新 UI
def add_image_to_writer(writer, image):
"""
将处理后的图像添加到 PDF Writer。
"""
imgbuffer = BytesIO()
image.save(imgbuffer, format="PDF")
imgbuffer.seek(0)
writer.add_page(PdfReader(imgbuffer).pages[0])
def add_bookmarks_to_writer(writer, reader, outlines, parent=None):
"""
保留书签层级结构。
"""
for item in outlines:
if isinstance(item, list):
add_bookmarks_to_writer(writer, reader, item, parent)
else:
title = item.get('/Title')
indirect_ref = item.get('/Page')
page_num = get_page_number_from_indirect(reader, indirect_ref)
if page_num is not None:
bookmark = writer.add_outline_item(title, page_num, parent=parent)
if '/Count' in item and item['/Count'] < 0:
add_bookmarks_to_writer(writer, reader, item.get('/Kids', []), parent=bookmark)
def get_page_number_from_indirect(reader, indirect_ref):
"""
获取页面编号。
"""
for i, page in enumerate(reader.pages):
if page.indirect_ref == indirect_ref:
return i
return None
def select_file():
"""
打开文件选择对话框。
"""
root = Tk()
root.withdraw()
root.update()
file_path = filedialog.askopenfilename(filetypes=[("PDF files", "*.pdf")])
root.destroy()
return file_path
def on_drop(event):
"""
拖放文件处理。
"""
file_path = event.data.strip()
if file_path.endswith('.pdf'):
threading.Thread(target=process_file, args=(file_path,)).start()
else:
print("请拖放一个有效的 PDF 文件。")
def process_file(file_path):
"""
处理文件。
"""
print(f"正在处理文件: {file_path}")
reader = PdfReader(file_path)
writer = PdfWriter()
# 动态显示进度条
progress_label.pack(pady=10)
progress_bar.pack(pady=10)
# 使用单线程处理 PDF 页面
dpi = 300 # 设置渲染 DPI
print(f"开始处理 PDF 文件...")
progress_var.set("进度: 0%")
progress_bar["value"] = 0
process_pdf(file_path, writer, dpi, progress_var, progress_bar, root)
# 写入书签
print(f"写入书签中...")
outlines = reader.outline
parents = []
add_bookmarks_to_writer(writer, reader, outlines)
# 写入输出文件到原始文件目录
output_file = os.path.join(os.path.dirname(file_path), "已优化_" + os.path.basename(file_path))
with open(output_file, "wb") as f:
writer.write(f)
print(f"处理完成,输出文件为: {output_file}")
messagebox.showinfo("完成", f"处理完成!文件已保存到:
{output_file}")
# 隐藏进度条
progress_label.pack_forget()
progress_bar.pack_forget()
if __name__ == "__main__":
# 创建 Tkinter 窗口以支持拖放
root = TkinterDnD.Tk()
root.title("PDF 二值化压缩工具")
root.geometry("400x300")
root.drop_target_register(DND_FILES)
root.dnd_bind('<<Drop>>', on_drop)
# 显示提示信息
label = Label(root, text="拖放 PDF 文件到此窗口
或点击选择文件", font=("Arial", 12))
label.pack(pady=20)
# 添加按钮以选择文件
button = Button(root, text="选择文件", command=lambda: threading.Thread(target=process_file, args=(select_file(),)).start())
button.pack(pady=10)
# 添加进度条(初始隐藏)
progress_var = StringVar()
progress_var.set("进度: 0%")
progress_label = Label(root, textvariable=progress_var, font=("Arial", 10))
progress_bar = ttk.Progressbar(root, orient="horizontal", length=300, mode="determinate")
root.mainloop()

https://wwto.lanzouu.com/iRIT72s8h0de
© 版权声明
文章版权归作者所有,未经允许请勿转载。如内容涉嫌侵权,请在本页底部进入<联系我们>进行举报投诉!
THE END



















- 最新
- 最热
只看作者