应用场景

物流行业每天需要处理大量包含物流面单的PDF文件,这些文件通常包含运单号、收发货人信息、货物详情等重要数据。传统手动处理方式效率低下且容易出错。本方案通过WPF实现一个自动化工具,能够:
批量拆分多页PDF为单页文件
提取每页面单中的关键信息(如运单号、收件人、货物信息等)
根据提取内容自动重命名文件或导出为结构化表格
典型应用场景包括:
物流公司每日运单归档管理
电商平台订单信息批量提取
仓储管理系统入库数据采集
财务部门运费结算数据整理
界面设计

主窗口布局
<Window x:Class="PdfLogisticsProcessor.MainWindow"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
Title="物流面单批量处理器" Height="600" Width="800">
<DockPanel>
<!-- 菜单栏 -->
<Menu DockPanel.Dock="Top">
<MenuItem Header="文件">
<MenuItem Header="打开PDF" Command="{Binding OpenPdfCommand}"/>
<Separator/>
<MenuItem Header="退出" Command="{Binding ExitCommand}"/>
</MenuItem>
<MenuItem Header="帮助">
<MenuItem Header="关于"/>
</MenuItem>
</Menu>
<!-- 工具栏 -->
<ToolBar DockPanel.Dock="Top">
<Button Content="选择PDF" Command="{Binding OpenPdfCommand}"/>
<Separator/>
<Button Content="处理面单" Command="{Binding ProcessCommand}"/>
</ToolBar>
<!-- 主内容区 -->
<Grid>
<Grid.ColumnDefinitions>
<ColumnDefinition Width="2*"/>
<ColumnDefinition Width="3*"/>
</Grid.ColumnDefinitions>
<!-- 左侧面板 -->
<StackPanel Margin="10">
<GroupBox Header="PDF信息">
<StackPanel>
<TextBlock Text="文件名:"/>
<TextBox Text="{Binding PdfFileName}" IsReadOnly="True"/>
<TextBlock Text="总页数:"/>
<TextBox Text="{Binding PageCount}" IsReadOnly="True"/>
</StackPanel>
</GroupBox>
<GroupBox Header="处理选项" Margin="0,10,0,0">
<StackPanel>
<CheckBox Content="按页拆分PDF" IsChecked="{Binding SplitPdf}"/>
<CheckBox Content="提取面单数据" IsChecked="{Binding ExtractData}"/>
<StackPanel Orientation="Horizontal">
<RadioButton Content="重命名文件" IsChecked="{Binding RenameFiles}" GroupName="Output"/>
<RadioButton Content="导出Excel" IsChecked="{Binding ExportExcel}" GroupName="Output" Margin="10,0,0,0"/>
</StackPanel>
<Button Content="设置区域" Command="{Binding SetRegionCommand}" Margin="0,10,0,0"/>
</StackPanel>
</GroupBox>
<Button Content="开始处理" Command="{Binding ProcessCommand}" Margin="0,20,0,0" Height="30"/>
</StackPanel>
<!-- 右侧面板 -->
<TabControl Grid.Column="1" Margin="10">
<TabItem Header="PDF预览">
<!-- PDF预览控件 -->
</TabItem>
<TabItem Header="日志">
<TextBox Text="{Binding LogText}" IsReadOnly="True" VerticalScrollBarVisibility="Auto"/>
</TabItem>
<TabItem Header="结果预览">
<!-- 数据表格预览 -->
<DataGrid ItemsSource="{Binding ResultData}" AutoGenerateColumns="True"/>
</TabItem>
</TabControl>
</Grid>
<!-- 状态栏 -->
<StatusBar DockPanel.Dock="Bottom">
<TextBlock Text="{Binding StatusMessage}"/>
<ProgressBar Value="{Binding Progress}" Width="200" Height="20" Margin="10,0"/>
</StatusBar>
</DockPanel>
</Window>
详细代码实现
1. 项目结构与依赖
PdfLogisticsProcessor/
├── Models/
│ ├── PdfRegion.cs (定义提取区域)
│ ├── LogisticsData.cs (物流数据结构)
├── Services/
│ ├── PdfSplitterService.cs (PDF拆分服务)
│ ├── DataExtractorService.cs (数据提取服务)
│ ├── ExcelExportService.cs (Excel导出服务)
├── ViewModels/
│ ├── MainViewModel.cs (主视图模型)
├── Views/
│ ├── MainWindow.xaml
│ ├── MainWindow.xaml.cs
通过NuGet安装以下包:
iTextSharp 或 iText7 (PDF处理)
EPPlus (Excel导出)
Newtonsoft.Json (可选,用于配置序列化)
2. PDF拆分服务
using iText.Kernel.Pdf;
using iText.Kernel.Utils;
using System.IO;
public class PdfSplitterService
{
public void SplitPdf(string inputPath, string outputFolder)
{
if (!Directory.Exists(outputFolder))
Directory.CreateDirectory(outputFolder);
using (var pdfDoc = new PdfDocument(new PdfReader(inputPath)))
{
var splitter = new PdfSplitter(pdfDoc);
for (int i = 1; i <= pdfDoc.GetNumberOfPages(); i++)
{
string outputPath = Path.Combine(outputFolder, $"page_{i}.pdf");
using (var newDoc = new PdfDocument(new PdfWriter(outputPath)))
{
splitter.ExtractPage(i).CopyPagesTo(1, 1, newDoc);
}
}
}
}
}
3. 物流数据提取服务
using iText.Kernel.Pdf;
using iText.Kernel.Pdf.Canvas.Parser;
using iText.Kernel.Pdf.Canvas.Parser.Listener;
using System.Collections.Generic;
using System.Text.RegularExpressions;
public class DataExtractorService
{
public LogisticsData ExtractLogisticsData(string pdfPath, PdfRegion region)
{
var data = new LogisticsData();
using (var pdfDoc = new PdfDocument(new PdfReader(pdfPath)))
{
var strategy = new LocationTextExtractionStrategy();
var text = PdfTextExtractor.GetTextFromPage(
pdfDoc.GetPage(region.PageNumber),
strategy,
region.ToRectangle()
);
// 使用正则表达式提取物流数据
data.TrackingNumber = ExtractByPattern(text, @"运单号[::]s*(w+)");
data.Recipient = ExtractByPattern(text, @"收件人[::]s*([^
]+)");
data.Phone = ExtractByPattern(text, @"电话[::]s*([0-9-]+)");
data.Address = ExtractByPattern(text, @"地址[::]s*([^
]+)");
// 其他字段...
}
return data;
}
private string ExtractByPattern(string text, string pattern)
{
var match = Regex.Match(text, pattern);
return match.Success ? match.Groups[1].Value.Trim() : null;
}
}
4. Excel导出服务
using OfficeOpenXml;
using System.Collections.Generic;
using System.IO;
public class ExcelExportService
{
public void ExportToExcel(IEnumerable<LogisticsData> dataList, string outputPath)
{
using (var package = new ExcelPackage())
{
var worksheet = package.Workbook.Worksheets.Add("物流数据");
// 设置表头
worksheet.Cells[1, 1].Value = "运单号";
worksheet.Cells[1, 2].Value = "收件人";
worksheet.Cells[1, 3].Value = "电话";
worksheet.Cells[1, 4].Value = "地址";
// 其他列...
// 填充数据
int row = 2;
foreach (var data in dataList)
{
worksheet.Cells[row, 1].Value = data.TrackingNumber;
worksheet.Cells[row, 2].Value = data.Recipient;
worksheet.Cells[row, 3].Value = data.Phone;
worksheet.Cells[row, 4].Value = data.Address;
// 其他字段...
row++;
}
// 自动调整列宽
worksheet.Cells[worksheet.Dimension.Address].AutoFitColumns();
// 保存文件
package.SaveAs(new FileInfo(outputPath));
}
}
}
5. 主视图模型
using System.Collections.ObjectModel;
using System.ComponentModel;
using System.IO;
using System.Windows.Input;
public class MainViewModel : INotifyPropertyChanged
{
private string _pdfPath;
private int _pageCount;
private string _statusMessage;
private double _progress;
private ObservableCollection<LogisticsData> _resultData = new ObservableCollection<LogisticsData>();
public ICommand OpenPdfCommand { get; }
public ICommand ProcessCommand { get; }
public ICommand SetRegionCommand { get; }
public MainViewModel()
{
OpenPdfCommand = new RelayCommand(OpenPdf);
ProcessCommand = new RelayCommand(ProcessPdf, CanProcessPdf);
SetRegionCommand = new RelayCommand(SetRegion);
}
private void OpenPdf()
{
var dialog = new Microsoft.Win32.OpenFileDialog
{
Filter = "PDF文件|*.pdf"
};
if (dialog.ShowDialog() == true)
{
PdfPath = dialog.FileName;
using (var pdfDoc = new PdfDocument(new PdfReader(PdfPath)))
{
PageCount = pdfDoc.GetNumberOfPages();
}
}
}
private async void ProcessPdf()
{
StatusMessage = "正在处理...";
Progress = 0;
var splitter = new PdfSplitterService();
var extractor = new DataExtractorService();
var exporter = new ExcelExportService();
string tempFolder = Path.Combine(Path.GetTempPath(), "PdfLogisticsProcessor");
try
{
// 1. 拆分PDF
if (SplitPdf)
{
splitter.SplitPdf(PdfPath, tempFolder);
Log("PDF拆分完成");
Progress = 30;
}
// 2. 提取数据
var allData = new List<LogisticsData>();
var pdfFiles = Directory.GetFiles(tempFolder, "*.pdf");
for (int i = 0; i < pdfFiles.Length; i++)
{
var data = extractor.ExtractLogisticsData(pdfFiles[i], SelectedRegion);
allData.Add(data);
// 更新UI
ResultData.Add(data);
Progress = 30 + (i * 70 / pdfFiles.Length);
}
// 3. 导出结果
if (ExportExcel)
{
string outputPath = Path.Combine(
Path.GetDirectoryName(PdfPath),
$"{Path.GetFileNameWithoutExtension(PdfPath)}_物流数据.xlsx");
exporter.ExportToExcel(allData, outputPath);
Log($"数据已导出到: {outputPath}");
}
else if (RenameFiles)
{
foreach (var file in pdfFiles)
{
var data = extractor.ExtractLogisticsData(file, SelectedRegion);
string newName = $"{data.TrackingNumber}_{data.Recipient}.pdf";
File.Move(file, Path.Combine(Path.GetDirectoryName(file), newName));
}
Log("文件重命名完成");
}
Progress = 100;
StatusMessage = "处理完成";
}
catch (Exception ex)
{
Log($"处理出错: {ex.Message}");
StatusMessage = "处理失败";
}
finally
{
// 清理临时文件
if (Directory.Exists(tempFolder))
Directory.Delete(tempFolder, true);
}
}
// 其他属性和方法...
}
总结与优化
已实现功能
PDF批量拆分:将多页PDF按页拆分为单独文件
物流数据提取:从面单中提取运单号、收件人信息等关键数据
自动化处理:支持批量重命名或导出Excel表格
用户友好界面:提供进度显示和日志记录功能
优化方向
性能优化:
使用多线程或并行处理加速大批量文件处理
实现增量处理,避免重复处理相同文件
功能增强:
集成腾讯云OCR API处理扫描件面单
添加模板系统,支持不同格式的面单
实现拖放选择区域功能,提升用户体验
错误处理:
增强异常处理,提供更详细的错误信息
添加重试机制处理网络或文件锁定问题
部署与维护:
添加自动更新功能
实现配置导出导入,方便迁移设置
本方案通过WPF实现了物流面单的自动化处理,显著提高了工作效率和数据准确性,特别适合物流、电商等需要处理大量面单的场景
















暂无评论内容