Zynq平台开发基础详解及C/C++实例

Zynq平台开发基础详解及C/C++实例

1. Zynq平台概述

1.1 Zynq架构简介

Zynq是Xilinx(现已被AMD收购)推出的系统级芯片(SoC)系列,它将ARM处理器核心(处理系统PS)和可编程逻辑单元(PL)集成在同一芯片上。主要系列包括:

Zynq-7000:集成双核Cortex-A9处理器和Artix/Kintex FPGA
Zynq UltraScale+ MPSoC:集成四核Cortex-A53(应用处理器)、双核Cortex-R5(实时处理器)、Mali-400 GPU和UltraScale FPGA
Zynq RFSoC:在UltraScale+ MPSoC基础上增加射频数据转换器

Zynq的主要优势在于将软件的灵活性和硬件的高性能结合在一起,特别适合需要同时进行通用处理和专用硬件加速的应用场景。

1.2 Zynq-7000系列基本组成

以Zynq-7000为例,其基本组成包括:

处理系统(PS)部分:

双核ARM Cortex-A9处理器(最高1GHz)
L1/L2缓存
片上存储器(OCM)
外部存储器控制器(DDR)
丰富的外设(UART、I2C、SPI、USB、Ethernet等)
通用I/O接口(GPIO)

可编程逻辑(PL)部分:

可编程逻辑单元(LUT)
存储资源(BRAM)
DSP模块
可配置I/O
时钟管理单元

PS-PL接口:

高性能AXI接口(HP0-HP3)
通用AXI接口(GP0-GP1)
加速一致性端口(ACP)
EMIO(扩展MIO)接口

2. Zynq开发环境搭建

2.1 开发工具链

开发Zynq平台应用需要的主要工具包括:

Vivado设计套件:用于硬件设计、综合、实现和比特流生成
Vitis统一软件平台:包含SDK,用于软件应用程序开发、调试和部署
PetaLinux工具:用于构建定制的嵌入式Linux系统
XSCT(Xilinx Software Command-line Tool):命令行开发工具

2.2 硬件设计工作流程

在Vivado中创建项目
添加处理系统IP(ZYNQ7 Processing System)
通过Block Design配置PS设置
添加和配置自定义PL IP核
连接PS和PL
生成HDL封装、综合、实现设计
导出硬件定义文件(.xsa)

2.3 软件开发工作流程

在Vitis中创建平台项目,导入硬件定义
创建应用项目,选择合适的OS(裸机/Linux/FreeRTOS)
编写应用程序代码
编译、调试应用程序
将应用程序部署到Zynq板上运行

3. Zynq PS编程基础

3.1 裸机(Bare-metal)应用程序开发

c

/**
 * hello_world.c - Zynq裸机应用程序示例
 *
 * 这个简单的示例演示了如何在Zynq上进行UART输出
 */

#include <stdio.h>
#include "platform.h"
#include "xil_printf.h"
#include "xparameters.h"
#include "sleep.h"

int main()
{
    // 初始化平台
    init_platform();

    // 打印欢迎信息
    print("Hello from Zynq!

");
    xil_printf("This is a bare-metal application running on a Zynq device

");
    
    // 打印系统参数
    xil_printf("CPU Clock Frequency: %u Hz

", XPAR_CPU_CORTEXA9_CORE_CLOCK_FREQ_HZ);
    xil_printf("UART Baud Rate: %u bps

", XPAR_PS7_UART_1_UART_CLK_FREQ_HZ / 16);
    
    // 简单的倒计时循环
    for (int i = 10; i > 0; i--) {
        xil_printf("Counting down: %d

", i);
        sleep(1);  // 延时1秒
    }
    
    xil_printf("Countdown complete!

");
    
    // 清理平台资源
    cleanup_platform();
    
    return 0;
}

3.2 GPIO控制实例

c

/**
 * gpio_example.c - Zynq PS GPIO控制示例
 * 
 * 演示如何使用EMIO GPIO控制LED和读取按钮状态
 */

#include <stdio.h>
#include "platform.h"
#include "xgpio.h"
#include "xparameters.h"
#include "sleep.h"

// GPIO设备ID(在硬件设计中定义)
#define GPIO_DEVICE_ID  XPAR_PS7_GPIO_0_DEVICE_ID
#define LED_CHANNEL     1  // GPIO通道1用于LED控制
#define BTN_CHANNEL     2  // GPIO通道2用于按钮读取

// LED和按钮位掩码
#define LED_0_MASK      0x01  // LED 0对应的位掩码
#define LED_1_MASK      0x02  // LED 1对应的位掩码
#define BTN_0_MASK      0x01  // 按钮0对应的位掩码
#define BTN_1_MASK      0x02  // 按钮1对应的位掩码

int main()
{
    int Status;
    XGpio Gpio;        // GPIO实例
    u32 LedData = 0;   // LED状态数据
    u32 BtnData;       // 按钮状态数据
    u32 BtnDataOld;    // 前一个按钮状态
    
    init_platform();
    
    xil_printf("Zynq PS GPIO Example

");
    
    // 初始化GPIO驱动
    Status = XGpio_Initialize(&Gpio, GPIO_DEVICE_ID);
    if (Status != XST_SUCCESS) {
        xil_printf("GPIO初始化失败

");
        return XST_FAILURE;
    }
    
    // 设置LED通道为输出,按钮通道为输入
    XGpio_SetDataDirection(&Gpio, LED_CHANNEL, 0x00);  // 0表示输出
    XGpio_SetDataDirection(&Gpio, BTN_CHANNEL, 0xFF);  // 1表示输入
    
    // 读取初始按钮状态
    BtnDataOld = XGpio_DiscreteRead(&Gpio, BTN_CHANNEL);
    
    xil_printf("按下按钮0控制LED 0,按下按钮1控制LED 1

");
    
    // 主循环
    while (1) {
        // 读取当前按钮状态
        BtnData = XGpio_DiscreteRead(&Gpio, BTN_CHANNEL);
        
        // 检测按钮变化
        if (BtnData != BtnDataOld) {
            // 按钮0控制LED 0
            if ((BtnData & BTN_0_MASK) && !(BtnDataOld & BTN_0_MASK)) {
                // 按钮0按下
                LedData ^= LED_0_MASK;  // 切换LED 0状态
                xil_printf("LED 0切换至: %s

", (LedData & LED_0_MASK) ? "开" : "关");
            }
            
            // 按钮1控制LED 1
            if ((BtnData & BTN_1_MASK) && !(BtnDataOld & BTN_1_MASK)) {
                // 按钮1按下
                LedData ^= LED_1_MASK;  // 切换LED 1状态
                xil_printf("LED 1切换至: %s

", (LedData & LED_1_MASK) ? "开" : "关");
            }
            
            // 更新LED状态
            XGpio_DiscreteWrite(&Gpio, LED_CHANNEL, LedData);
            
            // 保存当前按钮状态
            BtnDataOld = BtnData;
        }
        
        // 短暂延时以减少循环速度
        usleep(1000);
    }
    
    cleanup_platform();
    return 0;
}

3.3 中断处理实例

c

/**
 * interrupt_example.c - Zynq PS中断处理示例
 * 
 * 演示如何设置和处理GPIO中断
 */

#include <stdio.h>
#include "platform.h"
#include "xgpio.h"
#include "xscugic.h"
#include "xil_exception.h"
#include "xparameters.h"

// 设备ID
#define GPIO_DEVICE_ID          XPAR_PS7_GPIO_0_DEVICE_ID
#define INTC_DEVICE_ID          XPAR_SCUGIC_SINGLE_DEVICE_ID
#define GPIO_INTERRUPT_ID       XPAR_XGPIOPS_0_INTR

// GPIO通道和掩码
#define BTN_CHANNEL             1  // 按钮通道
#define LED_CHANNEL             2  // LED通道
#define BTN_MASK                0x03  // 所有按钮掩码
#define LED_MASK                0x03  // 所有LED掩码

// 全局变量
XGpio Gpio;        // GPIO实例
XScuGic IntcInst;  // 中断控制器实例
static int LedData = 0;  // LED状态

// 中断服务程序
void GpioHandler(void *CallbackRef)
{
    XGpio *GpioPtr = (XGpio *)CallbackRef;
    u32 Pending;
    
    // 读取中断状态
    Pending = XGpio_InterruptGetStatus(GpioPtr);
    
    // 清除中断
    XGpio_InterruptClear(GpioPtr, Pending);
    
    // 如果是按钮中断
    if (Pending & BTN_MASK) {
        // 读取按钮状态
        u32 BtnState = XGpio_DiscreteRead(GpioPtr, BTN_CHANNEL);
        
        // 根据按钮状态切换LED
        if (BtnState & 0x01) {
            // 按钮0按下,切换LED 0
            LedData ^= 0x01;
            xil_printf("按钮0按下,LED 0切换为: %s

", 
                      (LedData & 0x01) ? "开" : "关");
        }
        
        if (BtnState & 0x02) {
            // 按钮1按下,切换LED 1
            LedData ^= 0x02;
            xil_printf("按钮1按下,LED 1切换为: %s

", 
                      (LedData & 0x02) ? "开" : "关");
        }
        
        // 更新LED状态
        XGpio_DiscreteWrite(GpioPtr, LED_CHANNEL, LedData);
    }
}

// 设置GPIO中断
int SetupGpioInterrupt(XScuGic *IntcInstancePtr, XGpio *GpioInstancePtr)
{
    int Status;
    
    // 连接中断处理函数
    Status = XScuGic_Connect(IntcInstancePtr, GPIO_INTERRUPT_ID,
                            (Xil_InterruptHandler)GpioHandler,
                            (void *)GpioInstancePtr);
    if (Status != XST_SUCCESS) {
        return XST_FAILURE;
    }
    
    // 使能中断控制器上的中断
    XScuGic_Enable(IntcInstancePtr, GPIO_INTERRUPT_ID);
    
    // 使能GPIO中断
    XGpio_InterruptEnable(GpioInstancePtr, BTN_MASK);
    XGpio_InterruptGlobalEnable(GpioInstancePtr);
    
    return XST_SUCCESS;
}

// 设置中断系统
int SetupInterruptSystem(XScuGic *IntcInstancePtr)
{
    int Status;
    
    // 初始化中断控制器驱动
    XScuGic_Config *IntcConfig;
    IntcConfig = XScuGic_LookupConfig(INTC_DEVICE_ID);
    if (NULL == IntcConfig) {
        return XST_FAILURE;
    }
    
    Status = XScuGic_CfgInitialize(IntcInstancePtr, IntcConfig,
                                  IntcConfig->CpuBaseAddress);
    if (Status != XST_SUCCESS) {
        return XST_FAILURE;
    }
    
    // 初始化异常处理
    Xil_ExceptionInit();
    
    // 将中断控制器设置为异常处理器
    Xil_ExceptionRegisterHandler(XIL_EXCEPTION_ID_INT,
                                (Xil_ExceptionHandler)XScuGic_InterruptHandler,
                                IntcInstancePtr);
    
    // 使能异常
    Xil_ExceptionEnable();
    
    return XST_SUCCESS;
}

int main()
{
    int Status;
    
    init_platform();
    
    xil_printf("Zynq PS中断处理示例

");
    
    // 初始化GPIO
    Status = XGpio_Initialize(&Gpio, GPIO_DEVICE_ID);
    if (Status != XST_SUCCESS) {
        xil_printf("GPIO初始化失败

");
        return XST_FAILURE;
    }
    
    // 设置GPIO方向
    XGpio_SetDataDirection(&Gpio, BTN_CHANNEL, BTN_MASK);  // 按钮设为输入
    XGpio_SetDataDirection(&Gpio, LED_CHANNEL, 0);         // LED设为输出
    
    // 初始化LED为关闭状态
    XGpio_DiscreteWrite(&Gpio, LED_CHANNEL, 0);
    
    // 设置中断系统
    Status = SetupInterruptSystem(&IntcInst);
    if (Status != XST_SUCCESS) {
        xil_printf("中断系统设置失败

");
        return XST_FAILURE;
    }
    
    // 设置GPIO中断
    Status = SetupGpioInterrupt(&IntcInst, &Gpio);
    if (Status != XST_SUCCESS) {
        xil_printf("GPIO中断设置失败

");
        return XST_FAILURE;
    }
    
    xil_printf("按下按钮0或按钮1以切换对应的LED

");
    
    // 主循环 - 保持程序运行
    while (1) {
        // 空循环,中断服务程序处理按钮事件
    }
    
    cleanup_platform();
    return 0;
}

3.4 定时器应用实例

c

/**
 * timer_example.c - Zynq PS定时器应用示例
 * 
 * 演示如何配置和使用Private Timer
 */

#include <stdio.h>
#include "platform.h"
#include "xil_printf.h"
#include "xscutimer.h"
#include "xscugic.h"
#include "xil_exception.h"
#include "xparameters.h"

// 设备ID
#define TIMER_DEVICE_ID         XPAR_XSCUTIMER_0_DEVICE_ID
#define INTC_DEVICE_ID          XPAR_SCUGIC_SINGLE_DEVICE_ID
#define TIMER_IRPT_INTR         XPAR_SCUTIMER_INTR

// 定时器参数
#define TIMER_LOAD_VALUE        333333333  // 约3.33秒(基于333.33MHz时钟)

// 全局变量
XScuTimer TimerInst;     // 定时器实例
XScuGic   IntcInst;      // 中断控制器实例
int       TimerExpired;  // 定时器到期标志

// 定时器中断处理函数
void TimerIntrHandler(void *CallBackRef)
{
    XScuTimer *TimerPtr = (XScuTimer *)CallBackRef;
    
    // 清除中断
    XScuTimer_ClearInterruptStatus(TimerPtr);
    
    // 设置标志并打印消息
    TimerExpired = 1;
    xil_printf("定时器中断触发

");
}

// 设置定时器中断
int SetupTimerInterrupt(XScuGic *IntcInstancePtr, XScuTimer *TimerInstancePtr,
                        u16 TimerIntrId)
{
    int Status;
    
    // 连接中断处理函数
    Status = XScuGic_Connect(IntcInstancePtr, TimerIntrId,
                            (Xil_ExceptionHandler)TimerIntrHandler,
                            (void *)TimerInstancePtr);
    if (Status != XST_SUCCESS) {
        return Status;
    }
    
    // 使能中断
    XScuGic_Enable(IntcInstancePtr, TimerIntrId);
    
    // 使能定时器中断
    XScuTimer_EnableInterrupt(TimerInstancePtr);
    
    return XST_SUCCESS;
}

// 设置中断系统
int SetupInterruptSystem(XScuGic *IntcInstancePtr)
{
    int Status;
    
    // 初始化中断控制器驱动
    XScuGic_Config *IntcConfig;
    IntcConfig = XScuGic_LookupConfig(INTC_DEVICE_ID);
    if (NULL == IntcConfig) {
        return XST_FAILURE;
    }
    
    Status = XScuGic_CfgInitialize(IntcInstancePtr, IntcConfig,
                                  IntcConfig->CpuBaseAddress);
    if (Status != XST_SUCCESS) {
        return XST_FAILURE;
    }
    
    // 初始化异常处理
    Xil_ExceptionInit();
    
    // 将中断控制器设置为异常处理器
    Xil_ExceptionRegisterHandler(XIL_EXCEPTION_ID_INT,
                                (Xil_ExceptionHandler)XScuGic_InterruptHandler,
                                IntcInstancePtr);
    
    // 使能异常
    Xil_ExceptionEnable();
    
    return XST_SUCCESS;
}

int main()
{
    int Status;
    int Count = 0;
    
    init_platform();
    
    xil_printf("Zynq PS定时器应用示例

");
    
    // 初始化定时器
    XScuTimer_Config *ConfigPtr;
    ConfigPtr = XScuTimer_LookupConfig(TIMER_DEVICE_ID);
    if (NULL == ConfigPtr) {
        return XST_FAILURE;
    }
    
    Status = XScuTimer_CfgInitialize(&TimerInst, ConfigPtr,
                                    ConfigPtr->BaseAddr);
    if (Status != XST_SUCCESS) {
        return XST_FAILURE;
    }
    
    // 设置中断系统
    Status = SetupInterruptSystem(&IntcInst);
    if (Status != XST_SUCCESS) {
        return XST_FAILURE;
    }
    
    // 设置定时器中断
    Status = SetupTimerInterrupt(&IntcInst, &TimerInst, TIMER_IRPT_INTR);
    if (Status != XST_SUCCESS) {
        return XST_FAILURE;
    }
    
    // 配置定时器参数
    XScuTimer_SetPrescaler(&TimerInst, 0);                 // 设置预分频器为0
    XScuTimer_LoadTimer(&TimerInst, TIMER_LOAD_VALUE);     // 加载计数值
    XScuTimer_EnableAutoReload(&TimerInst);                // 启用自动重载
    
    // 启动定时器
    XScuTimer_Start(&TimerInst);
    
    xil_printf("定时器已启动,等待中断...

");
    
    // 主循环
    while (1) {
        // 如果定时器到期
        if (TimerExpired) {
            // 清除标志
            TimerExpired = 0;
            
            // 增加计数并打印信息
            Count++;
            xil_printf("计数: %d

", Count);
            
            // 如果计数达到5次,退出循环
            if (Count >= 5) {
                break;
            }
        }
    }
    
    // 停止定时器
    XScuTimer_Stop(&TimerInst);
    xil_printf("定时器已停止,示例完成

");
    
    cleanup_platform();
    return 0;
}

4. PS-PL交互与AXI接口

4.1 AXI接口基本概念

AXI(Advanced eXtensible Interface)是ARM AMBA协议的一部分,用于芯片内部的高性能、高带宽和低延迟的通信。Zynq中主要使用的AXI接口如下:

AXI-Lite:简化的AXI接口,用于低带宽控制和状态寄存器访问
AXI4:全功能AXI接口,支持突发传输,用于高带宽数据传输
AXI-Stream:用于单向流式数据传输,无需地址

在Zynq中,PS和PL之间的通信主要通过以下接口:

GP AXI接口:通用AXI接口,通常使用AXI-Lite,PS作为主设备
HP AXI接口:高性能AXI接口,使用AXI4,PL作为主设备
ACP接口:加速一致性端口,允许PL直接访问PS的缓存

4.2 AXI-Lite外设控制示例

c

/**
 * axi_lite_example.c - AXI-Lite接口控制示例
 * 
 * 演示如何通过AXI-Lite接口控制PL中的自定义IP
 * 假设自定义IP具有以下寄存器布局:
 *   - 0x00: 控制寄存器
 *   - 0x04: 状态寄存器
 *   - 0x08: 数据寄存器(输入)
 *   - 0x0C: 结果寄存器(输出)
 */

#include <stdio.h>
#include "platform.h"
#include "xil_printf.h"
#include "xil_io.h"
#include "xparameters.h"
#include "sleep.h"

// 自定义IP的基地址(在Vivado中自动生成的xparameters.h中定义)
#define CUSTOM_IP_BASEADDR      XPAR_CUSTOM_IP_0_S00_AXI_BASEADDR

// 寄存器偏移量
#define CONTROL_REG_OFFSET      0x00
#define STATUS_REG_OFFSET       0x04
#define DATA_REG_OFFSET         0x08
#define RESULT_REG_OFFSET       0x0C

// 控制寄存器位定义
#define CTRL_START_BIT          0x01    // 开始运算
#define CTRL_RESET_BIT          0x02    // 复位IP
#define CTRL_MODE_MASK          0x0C    // 运算模式(2位)
#define CTRL_MODE_ADD           0x00    // 加法模式
#define CTRL_MODE_SUB           0x04    // 减法模式
#define CTRL_MODE_MUL           0x08    // 乘法模式
#define CTRL_MODE_DIV           0x0C    // 除法模式

// 状态寄存器位定义
#define STATUS_DONE_BIT         0x01    // 运算完成
#define STATUS_BUSY_BIT         0x02    // 运算中
#define STATUS_ERROR_BIT        0x04    // 错误标志

// 辅助函数:等待操作完成
static int WaitForCompletion(u32 timeout_ms)
{
    u32 status;
    int timeout_count = 0;
    const int delay_us = 1000;  // 1ms检查间隔
    const int max_count = timeout_ms;
    
    while (timeout_count < max_count) {
        // 读取状态寄存器
        status = Xil_In32(CUSTOM_IP_BASEADDR + STATUS_REG_OFFSET);
        
        // 检查完成位
        if (status & STATUS_DONE_BIT) {
            return XST_SUCCESS;
        }
        
        // 检查错误位
        if (status & STATUS_ERROR_BIT) {
            xil_printf("操作出错,状态寄存器: 0x%08x

", status);
            return XST_FAILURE;
        }
        
        // 等待1ms
        usleep(delay_us);
        timeout_count++;
    }
    
    xil_printf("操作超时

");
    return XST_FAILURE;
}

// 执行数学运算
static int PerformOperation(u32 operand1, u32 operand2, u8 operation, u32 *result)
{
    int Status;
    u32 control_value = 0;
    
    // 根据操作类型设置控制值
    switch (operation) {
        case 0:  // 加法
            control_value = CTRL_MODE_ADD;
            break;
        case 1:  // 减法
            control_value = CTRL_MODE_SUB;
            break;
        case 2:  // 乘法
            control_value = CTRL_MODE_MUL;
            break;
        case 3:  // 除法
            control_value = CTRL_MODE_DIV;
            // 检查除数是否为0
            if (operand2 == 0) {
                xil_printf("错误:除数不能为0

");
                return XST_FAILURE;
            }
            break;
        default:
            xil_printf("错误:不支持的操作类型

");
            return XST_FAILURE;
    }
    
    // 先复位IP
    Xil_Out32(CUSTOM_IP_BASEADDR + CONTROL_REG_OFFSET, CTRL_RESET_BIT);
    usleep(1000);  // 等待复位完成
    
    // 写入操作数
    Xil_Out32(CUSTOM_IP_BASEADDR + DATA_REG_OFFSET, operand1);
    Xil_Out32(CUSTOM_IP_BASEADDR + DATA_REG_OFFSET + 4, operand2);  // 假设有两个连续的数据寄存器
    
    // 写入控制寄存器并启动操作
    Xil_Out32(CUSTOM_IP_BASEADDR + CONTROL_REG_OFFSET, control_value | CTRL_START_BIT);
    
    // 等待操作完成
    Status = WaitForCompletion(1000);  // 最多等待1秒
    if (Status != XST_SUCCESS) {
        return Status;
    }
    
    // 读取结果
    *result = Xil_In32(CUSTOM_IP_BASEADDR + RESULT_REG_OFFSET);
    
    return XST_SUCCESS;
}

int main()
{
    int Status;
    u32 result;
    
    init_platform();
    
    xil_printf("AXI-Lite接口控制示例

");
    
    // 测试加法操作
    xil_printf("
测试加法运算:25 + 17

");
    Status = PerformOperation(25, 17, 0, &result);
    if (Status == XST_SUCCESS) {
        xil_printf("结果:%d

", result);
    }
    
    // 测试减法操作
    xil_printf("
测试减法运算:100 - 42

");
    Status = PerformOperation(100, 42, 1, &result);
    if (Status == XST_SUCCESS) {
        xil_printf("结果:%d

", result);
    }
    
    // 测试乘法操作
    xil_printf("
测试乘法运算:12 * 15

");
    Status = PerformOperation(12, 15, 2, &result);
    if (Status == XST_SUCCESS) {
        xil_printf("结果:%d

", result);
    }
    
    // 测试除法操作
    xil_printf("
测试除法运算:144 / 12

");
    Status = PerformOperation(144, 12, 3, &result);
    if (Status == XST_SUCCESS) {
        xil_printf("结果:%d

", result);
    }
    
    // 测试除法错误情况
    xil_printf("
测试除法错误:100 / 0

");
    Status = PerformOperation(100, 0, 3, &result);
    if (Status != XST_SUCCESS) {
        xil_printf("除零错误处理成功

");
    }
    
    xil_printf("
示例完成

");
    
    cleanup_platform();
    return 0;
}

4.3 AXI-DMA传输示例

c

/**
 * axi_dma_example.c - AXI DMA传输示例
 * 
 * 演示如何使用AXI-DMA在PS和PL之间传输数据
 * 假设在PL中实现了一个简单的数据处理IP(如滤波器)
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "platform.h"
#include "xil_printf.h"
#include "xil_cache.h"
#include "xaxidma.h"
#include "xparameters.h"

// 设备参数
#define DMA_DEV_ID              XPAR_AXIDMA_0_DEVICE_ID
#define DDR_BASE_ADDR           XPAR_PS7_DDR_0_S_AXI_BASEADDR
#define MEM_BASE_ADDR           (DDR_BASE_ADDR + 0x1000000) // 偏移16MB

// 缓冲区参数
#define TX_BUFFER_BASE          MEM_BASE_ADDR
#define RX_BUFFER_BASE          (MEM_BASE_ADDR + 0x100000) // 偏移1MB
#define MAX_PKT_LEN             0x100 // 最大传输包长度

// 全局变量
static XAxiDma AxiDma;  // AXI DMA实例

// DMA初始化函数
int InitDma()
{
    XAxiDma_Config *CfgPtr;
    int Status;
    
    // 查找DMA配置
    CfgPtr = XAxiDma_LookupConfig(DMA_DEV_ID);
    if (!CfgPtr) {
        xil_printf("查找DMA配置失败

");
        return XST_FAILURE;
    }
    
    // 初始化DMA引擎
    Status = XAxiDma_CfgInitialize(&AxiDma, CfgPtr);
    if (Status != XST_SUCCESS) {
        xil_printf("DMA初始化失败,错误码: %d

", Status);
        return XST_FAILURE;
    }
    
    // 检查是否支持Scatter Gather模式
    if (XAxiDma_HasSg(&AxiDma)) {
        xil_printf("需要禁用Scatter Gather模式

");
        return XST_FAILURE;
    }
    
    // 重置DMA引擎
    XAxiDma_Reset(&AxiDma);
    while (!XAxiDma_ResetIsDone(&AxiDma));
    
    return XST_SUCCESS;
}

// 准备测试数据
void PrepareTestData(u8 *TxBufferPtr, int BufferLength)
{
    int Index;
    
    // 生成测试数据(这里使用递增序列)
    for (Index = 0; Index < BufferLength; Index++) {
        TxBufferPtr[Index] = (u8)Index;
    }
    
    // 清空接收缓冲区
    memset((void *)RX_BUFFER_BASE, 0, BufferLength);
    
    // 刷新缓存,确保数据实际写入内存
    Xil_DCacheFlushRange((UINTPTR)TxBufferPtr, BufferLength);
    Xil_DCacheFlushRange((UINTPTR)RX_BUFFER_BASE, BufferLength);
}

// 验证接收到的数据
int CheckData(u8 *TxBufferPtr, u8 *RxBufferPtr, int BufferLength)
{
    int Index;
    
    // 先使缓存无效,确保从内存读取最新数据
    Xil_DCacheInvalidateRange((UINTPTR)RxBufferPtr, BufferLength);
    
    // 逐字节验证结果
    // 注意:在实际应用中,应该根据PL处理逻辑的预期结果来修改此处的验证逻辑
    for (Index = 0; Index < BufferLength; Index++) {
        // 这里假设PL简单地将每个字节加1
        if (RxBufferPtr[Index] != (u8)(TxBufferPtr[Index] + 1)) {
            xil_printf("数据不匹配在位置 %d: 期望 %d, 实际 %d

",
                      Index, (TxBufferPtr[Index] + 1), RxBufferPtr[Index]);
            return XST_FAILURE;
        }
    }
    
    return XST_SUCCESS;
}

// 执行单次传输并验证
int PerformTransfer(int BufferLength)
{
    int Status;
    u8 *TxBufferPtr = (u8 *)TX_BUFFER_BASE;
    u8 *RxBufferPtr = (u8 *)RX_BUFFER_BASE;
    
    xil_printf("开始传输, 数据长度: %d字节...

", BufferLength);
    
    // 准备测试数据
    PrepareTestData(TxBufferPtr, BufferLength);
    
    // 启动DMA传输
    Status = XAxiDma_SimpleTransfer(&AxiDma, (UINTPTR)RX_BUFFER_BASE,
                                   BufferLength, XAXIDMA_DEVICE_TO_DMA);
    if (Status != XST_SUCCESS) {
        xil_printf("RX Simple Transfer失败

");
        return XST_FAILURE;
    }
    
    Status = XAxiDma_SimpleTransfer(&AxiDma, (UINTPTR)TX_BUFFER_BASE,
                                   BufferLength, XAXIDMA_DMA_TO_DEVICE);
    if (Status != XST_SUCCESS) {
        xil_printf("TX Simple Transfer失败

");
        return XST_FAILURE;
    }
    
    // 等待传输完成
    while (XAxiDma_Busy(&AxiDma, XAXIDMA_DMA_TO_DEVICE));
    while (XAxiDma_Busy(&AxiDma, XAXIDMA_DEVICE_TO_DMA));
    
    // 验证结果
    Status = CheckData(TxBufferPtr, RxBufferPtr, BufferLength);
    if (Status != XST_SUCCESS) {
        xil_printf("数据验证失败

");
        return XST_FAILURE;
    }
    
    xil_printf("传输成功完成!

");
    
    return XST_SUCCESS;
}

int main()
{
    int Status;
    
    init_platform();
    
    xil_printf("AXI DMA传输示例

");
    
    // 初始化DMA
    Status = InitDma();
    if (Status != XST_SUCCESS) {
        xil_printf("DMA初始化失败

");
        return XST_FAILURE;
    }
    
    // 测试不同大小的传输
    Status = PerformTransfer(64);    // 小数据包
    if (Status != XST_SUCCESS) {
        goto Done;
    }
    
    Status = PerformTransfer(256);   // 中等数据包
    if (Status != XST_SUCCESS) {
        goto Done;
    }
    
    Status = PerformTransfer(MAX_PKT_LEN); // 最大数据包
    if (Status != XST_SUCCESS) {
        goto Done;
    }
    
    xil_printf("
所有测试都通过!

");
    
Done:
    cleanup_platform();
    return Status;
}

4.4 基于C++的AXI接口抽象封装

cpp

/**
 * axi_wrapper.hpp - AXI接口的C++抽象封装
 * 
 * 提供面向对象的AXI接口访问封装,简化硬件访问
 */

#ifndef AXI_WRAPPER_HPP
#define AXI_WRAPPER_HPP

#include <cstdint>
#include <string>
#include <stdexcept>
#include <functional>

// 导入Xilinx基础库
extern "C" {
#include "xil_io.h"
#include "xil_cache.h"
#include "xstatus.h"
}

// 前向声明
class AXI_Lite_Device;
class AXI_DMA_Device;

/**
 * AXI_Exception 类:表示AXI操作异常
 */
class AXI_Exception : public std::runtime_error {
public:
    explicit AXI_Exception(const std::string& message) 
        : std::runtime_error(message) {}
};

/**
 * AXI_Lite_Device 类:封装AXI-Lite设备访问
 */
class AXI_Lite_Device {
public:
    /**
     * 构造函数
     * 
     * @param base_addr 设备基地址
     * @param name 设备名称(可选)
     */
    AXI_Lite_Device(uint32_t base_addr, const std::string& name = "Unknown")
        : base_address_(base_addr), device_name_(name) {}
    
    // 禁用拷贝
    AXI_Lite_Device(const AXI_Lite_Device&) = delete;
    AXI_Lite_Device& operator=(const AXI_Lite_Device&) = delete;
    
    /**
     * 写寄存器
     * 
     * @param offset 寄存器偏移量
     * @param value 要写入的值
     */
    void writeReg(uint32_t offset, uint32_t value) const {
        Xil_Out32(base_address_ + offset, value);
    }
    
    /**
     * 读寄存器
     * 
     * @param offset 寄存器偏移量
     * @return 读取的值
     */
    uint32_t readReg(uint32_t offset) const {
        return Xil_In32(base_address_ + offset);
    }
    
    /**
     * 设置寄存器位
     * 
     * @param offset 寄存器偏移量
     * @param bit_mask 位掩码
     */
    void setBits(uint32_t offset, uint32_t bit_mask) const {
        uint32_t value = readReg(offset);
        writeReg(offset, value | bit_mask);
    }
    
    /**
     * 清除寄存器位
     * 
     * @param offset 寄存器偏移量
     * @param bit_mask 位掩码
     */
    void clearBits(uint32_t offset, uint32_t bit_mask) const {
        uint32_t value = readReg(offset);
        writeReg(offset, value & ~bit_mask);
    }
    
    /**
     * 等待寄存器位变为指定值
     * 
     * @param offset 寄存器偏移量
     * @param bit_mask 位掩码
     * @param expected_value 期望值(0或1)
     * @param timeout_ms 超时时间(毫秒)
     * @return 是否成功(超时返回false)
     */
    bool waitForBits(uint32_t offset, uint32_t bit_mask, 
                    bool expected_value, uint32_t timeout_ms) const {
        uint32_t expected = expected_value ? bit_mask : 0;
        
        for (uint32_t i = 0; i < timeout_ms; i++) {
            uint32_t value = readReg(offset) & bit_mask;
            if (value == expected) {
                return true;
            }
            
            // 延时1毫秒
            usleep(1000);
        }
        
        return false;
    }
    
    /**
     * 获取设备名称
     * 
     * @return 设备名称
     */
    const std::string& getName() const {
        return device_name_;
    }
    
    /**
     * 获取设备基地址
     * 
     * @return 基地址
     */
    uint32_t getBaseAddress() const {
        return base_address_;
    }
    
private:
    uint32_t base_address_;     // 设备基地址
    std::string device_name_;   // 设备名称
};

/**
 * Memory_Buffer 类:表示DMA可访问的内存缓冲区
 */
class Memory_Buffer {
public:
    /**
     * 构造函数
     * 
     * @param address 物理地址
     * @param size 大小(字节)
     */
    Memory_Buffer(uint64_t address, size_t size)
        : physical_address_(address), buffer_size_(size) {}
    
    /**
     * 获取物理地址
     * 
     * @return 物理地址
     */
    uint64_t getPhysicalAddress() const {
        return physical_address_;
    }
    
    /**
     * 获取虚拟地址
     * 
     * @return 虚拟地址
     */
    template <typename T = void*>
    T getVirtualAddress() const {
        return reinterpret_cast<T>(physical_address_);
    }
    
    /**
     * 获取缓冲区大小
     * 
     * @return 缓冲区大小(字节)
     */
    size_t getSize() const {
        return buffer_size_;
    }
    
    /**
     * 清空缓冲区
     */
    void clear() const {
        std::memset(getVirtualAddress<void*>(), 0, buffer_size_);
        flushCache();
    }
    
    /**
     * 刷新缓存
     */
    void flushCache() const {
        Xil_DCacheFlushRange(physical_address_, buffer_size_);
    }
    
    /**
     * 使缓存无效
     */
    void invalidateCache() const {
        Xil_DCacheInvalidateRange(physical_address_, buffer_size_);
    }
    
private:
    uint64_t physical_address_;  // 物理地址
    size_t buffer_size_;         // 缓冲区大小
};

/**
 * AXI_DMA_Device 类:封装AXI-DMA设备操作
 */
class AXI_DMA_Device {
public:
    // DMA传输方向
    enum Direction {
        PS_TO_PL = 0,  // 内存到流
        PL_TO_PS = 1   // 流到内存
    };
    
    /**
     * 构造函数
     * 
     * @param tx_base_addr MM2S通道基地址
     * @param rx_base_addr S2MM通道基地址
     * @param name 设备名称(可选)
     */
    AXI_DMA_Device(uint32_t tx_base_addr, uint32_t rx_base_addr, 
                 const std::string& name = "DMA")
        : tx_controller_(tx_base_addr, name + "_TX"),
          rx_controller_(rx_base_addr, name + "_RX"),
          device_name_(name) {
        
        // 复位DMA引擎
        resetDMA();
    }
    
    // 禁用拷贝
    AXI_DMA_Device(const AXI_DMA_Device&) = delete;
    AXI_DMA_Device& operator=(const AXI_DMA_Device&) = delete;
    
    /**
     * 复位DMA引擎
     */
    void resetDMA() {
        // 复位TX通道
        tx_controller_.writeReg(0x00, 0x00000004); // MM2S_DMACR
        // 复位RX通道
        rx_controller_.writeReg(0x30, 0x00000004); // S2MM_DMACR
        
        // 等待复位完成
        bool tx_reset_done = tx_controller_.waitForBits(0x00, 0x00000004, false, 1000);
        bool rx_reset_done = rx_controller_.waitForBits(0x30, 0x00000004, false, 1000);
        
        if (!tx_reset_done || !rx_reset_done) {
            throw AXI_Exception("DMA引擎复位超时");
        }
        
        // 启用DMA
        tx_controller_.writeReg(0x00, 0x00000001); // MM2S_DMACR
        rx_controller_.writeReg(0x30, 0x00000001); // S2MM_DMACR
    }
    
    /**
     * 启动DMA传输
     * 
     * @param buffer 内存缓冲区
     * @param size 传输大小(字节)
     * @param direction 传输方向
     */
    void startTransfer(const Memory_Buffer& buffer, size_t size, Direction direction) {
        if (direction == PS_TO_PL) {
            // 写入MM2S地址
            tx_controller_.writeReg(0x18, buffer.getPhysicalAddress() & 0xFFFFFFFF);
            tx_controller_.writeReg(0x1C, (buffer.getPhysicalAddress() >> 32) & 0xFFFFFFFF);
            
            // 确保缓存刷新
            buffer.flushCache();
            
            // 写入传输长度
            tx_controller_.writeReg(0x28, size);
        } else {
            // 清空接收缓冲区
            buffer.clear();
            
            // 写入S2MM地址
            rx_controller_.writeReg(0x48, buffer.getPhysicalAddress() & 0xFFFFFFFF);
            rx_controller_.writeReg(0x4C, (buffer.getPhysicalAddress() >> 32) & 0xFFFFFFFF);
            
            // 写入传输长度
            rx_controller_.writeReg(0x58, size);
        }
    }
    
    /**
     * 等待DMA传输完成
     * 
     * @param direction 传输方向
     * @param timeout_ms 超时时间(毫秒)
     * @return 传输是否成功完成
     */
    bool waitForTransferComplete(Direction direction, uint32_t timeout_ms) {
        if (direction == PS_TO_PL) {
            // 等待MM2S传输完成
            return tx_controller_.waitForBits(0x04, 0x00001000, true, timeout_ms);
        } else {
            // 等待S2MM传输完成
            bool transfer_done = rx_controller_.waitForBits(0x34, 0x00001000, true, timeout_ms);
            if (transfer_done) {
                // 对于PL到PS的传输,需要使缓存无效
                // 这里我们不能使缓存无效,因为没有相应的Buffer实例
                // 使用者需要手动调用buffer.invalidateCache()
                return true;
            }
            return false;
        }
    }
    
    /**
     * 检查DMA是否忙
     * 
     * @param direction 传输方向
     * @return 通道是否忙
     */
    bool isBusy(Direction direction) const {
        if (direction == PS_TO_PL) {
            return (tx_controller_.readReg(0x04) & 0x00000001) != 0;
        } else {
            return (rx_controller_.readReg(0x34) & 0x00000001) != 0;
        }
    }
    
    /**
     * 获取传输状态
     * 
     * @param direction 传输方向
     * @return 状态寄存器值
     */
    uint32_t getStatus(Direction direction) const {
        if (direction == PS_TO_PL) {
            return tx_controller_.readReg(0x04); // MM2S_DMASR
        } else {
            return rx_controller_.readReg(0x34); // S2MM_DMASR
        }
    }
    
    /**
     * 执行双向DMA传输
     * 
     * @param tx_buffer 发送缓冲区
     * @param rx_buffer 接收缓冲区
     * @param size 传输大小(字节)
     * @param timeout_ms 超时时间(毫秒)
     * @return 传输是否成功
     */
    bool performTransfer(const Memory_Buffer& tx_buffer, 
                       const Memory_Buffer& rx_buffer, 
                       size_t size, 
                       uint32_t timeout_ms) {
        // 启动接收
        startTransfer(rx_buffer, size, PL_TO_PS);
        
        // 启动发送
        startTransfer(tx_buffer, size, PS_TO_PL);
        
        // 等待传输完成
        bool tx_done = waitForTransferComplete(PS_TO_PL, timeout_ms);
        bool rx_done = waitForTransferComplete(PL_TO_PS, timeout_ms);
        
        // 使接收缓存无效(确保从内存读取最新数据)
        rx_buffer.invalidateCache();
        
        return tx_done && rx_done;
    }
    
    /**
     * 获取设备名称
     * 
     * @return 设备名称
     */
    const std::string& getName() const {
        return device_name_;
    }
    
private:
    AXI_Lite_Device tx_controller_;  // 发送控制器(MM2S)
    AXI_Lite_Device rx_controller_;  // 接收控制器(S2MM)
    std::string device_name_;        // 设备名称
};

#endif // AXI_WRAPPER_HPP

cpp

/**
 * axi_cpp_example.cpp - 基于C++的AXI接口使用示例
 * 
 * 演示如何使用C++封装的AXI接口访问硬件
 */

#include <iostream>
#include <iomanip>
#include <string>
#include <cstring>
#include <cstdlib>
#include <ctime>
#include "platform.h"
#include "xil_printf.h"
#include "xparameters.h"
#include "sleep.h"
#include "axi_wrapper.hpp"

// 设备参数
#define CUSTOM_IP_BASEADDR     XPAR_CUSTOM_IP_0_S00_AXI_BASEADDR
#define DMA_MM2S_BASEADDR      XPAR_AXIDMA_0_BASEADDR
#define DMA_S2MM_BASEADDR      (XPAR_AXIDMA_0_BASEADDR + 0x30)
#define DDR_BASE_ADDR          XPAR_PS7_DDR_0_S_AXI_BASEADDR
#define MEM_BASE_ADDR          (DDR_BASE_ADDR + 0x1000000) // 偏移16MB
#define TX_BUFFER_BASE         MEM_BASE_ADDR
#define RX_BUFFER_BASE         (MEM_BASE_ADDR + 0x100000) // 偏移1MB
#define MAX_PKT_LEN            0x1000 // 4KB

// 自定义IP寄存器定义
#define CONTROL_REG            0x00
#define STATUS_REG             0x04
#define DATA_IN_REG            0x08
#define DATA_OUT_REG           0x0C

// 控制寄存器位
#define CTRL_START             0x01
#define CTRL_RESET             0x02
#define CTRL_INTR_EN           0x04

// 状态寄存器位
#define STATUS_DONE            0x01
#define STATUS_BUSY            0x02
#define STATUS_ERROR           0x04

/**
 * 自定义IP控制器类
 */
class Custom_IP_Controller {
public:
    /**
     * 构造函数
     * 
     * @param base_addr 设备基地址
     */
    Custom_IP_Controller(uint32_t base_addr)
        : device_(base_addr, "CustomIP") {
        // 复位设备
        device_.writeReg(CONTROL_REG, CTRL_RESET);
        usleep(1000); // 等待复位完成
        device_.writeReg(CONTROL_REG, 0);
    }
    
    /**
     * 处理数据
     * 
     * @param input_data 输入数据
     * @return 处理后的数据
     */
    uint32_t processData(uint32_t input_data) {
        // 写入输入数据
        device_.writeReg(DATA_IN_REG, input_data);
        
        // 启动处理
        device_.writeReg(CONTROL_REG, CTRL_START);
        
        // 等待处理完成
        if (!device_.waitForBits(STATUS_REG, STATUS_DONE, true, 1000)) {
            throw AXI_Exception("处理超时或出错");
        }
        
        // 检查错误状态
        if (device_.readReg(STATUS_REG) & STATUS_ERROR) {
            throw AXI_Exception("处理过程中发生错误");
        }
        
        // 读取结果
        return device_.readReg(DATA_OUT_REG);
    }
    
    /**
     * 打印设备状态
     */
    void printStatus() const {
        uint32_t status = device_.readReg(STATUS_REG);
        std::cout << "设备状态: ";
        std::cout << "完成=" << ((status & STATUS_DONE) ? "是" : "否") << ", ";
        std::cout << "忙=" << ((status & STATUS_BUSY) ? "是" : "否") << ", ";
        std::cout << "错误=" << ((status & STATUS_ERROR) ? "是" : "否") << std::endl;
    }
    
private:
    AXI_Lite_Device device_;
};

/**
 * 准备DMA测试数据
 * 
 * @param buffer 内存缓冲区
 * @param pattern 数据模式 (0=递增, 1=随机)
 */
void prepareTestData(const Memory_Buffer& buffer, int pattern) {
    uint8_t* data = buffer.getVirtualAddress<uint8_t*>();
    
    if (pattern == 0) {
        // 递增模式
        for (size_t i = 0; i < buffer.getSize(); i++) {
            data[i] = i & 0xFF;
        }
    } else {
        // 随机模式
        for (size_t i = 0; i < buffer.getSize(); i++) {
            data[i] = rand() & 0xFF;
        }
    }
    
    buffer.flushCache();
}

/**
 * 打印缓冲区内容
 * 
 * @param buffer 内存缓冲区
 * @param offset 起始偏移
 * @param length 显示长度
 */
void printBuffer(const Memory_Buffer& buffer, size_t offset, size_t length) {
    uint8_t* data = buffer.getVirtualAddress<uint8_t*>();
    
    std::cout << "缓冲区内容 [" << std::hex << buffer.getPhysicalAddress() 
              << " + " << std::dec << offset << "]:" << std::endl;
    
    // 限制打印长度
    size_t max_length = std::min(length, buffer.getSize() - offset);
    
    for (size_t i = 0; i < max_length; i++) {
        if (i % 16 == 0) {
            std::cout << std::hex << std::setw(4) << std::setfill('0') << i << ": ";
        }
        
        std::cout << std::hex << std::setw(2) << std::setfill('0') 
                  << static_cast<int>(data[offset + i]) << " ";
        
        if ((i + 1) % 16 == 0 || i == max_length - 1) {
            std::cout << std::endl;
        }
    }
    std::cout << std::dec; // 恢复十进制输出
}

/**
 * 比较缓冲区
 * 
 * @param buffer1 第一个缓冲区
 * @param buffer2 第二个缓冲区
 * @param length 比较长度
 * @return 是否匹配
 */
bool compareBuffers(const Memory_Buffer& buffer1, const Memory_Buffer& buffer2, size_t length) {
    uint8_t* data1 = buffer1.getVirtualAddress<uint8_t*>();
    uint8_t* data2 = buffer2.getVirtualAddress<uint8_t*>();
    
    size_t max_length = std::min({length, buffer1.getSize(), buffer2.getSize()});
    
    for (size_t i = 0; i < max_length; i++) {
        if (data1[i] != data2[i]) {
            std::cout << "数据不匹配在位置 " << i << ": "
                      << static_cast<int>(data1[i]) << " != " 
                      << static_cast<int>(data2[i]) << std::endl;
            return false;
        }
    }
    
    return true;
}

/**
 * 主函数
 */
int main() {
    int status = 0;
    
    init_platform();
    
    try {
        std::cout << "C++ AXI接口示例" << std::endl;
        std::cout << "==================" << std::endl;
        
        // 初始化随机数生成器
        srand(time(NULL));
        
        // 演示AXI-Lite设备访问
        std::cout << "
1. AXI-Lite设备访问示例" << std::endl;
        std::cout << "---------------------------" << std::endl;
        
        Custom_IP_Controller ipController(CUSTOM_IP_BASEADDR);
        
        try {
            // 处理一些测试数据
            uint32_t result1 = ipController.processData(0x12345678);
            std::cout << "输入: 0x12345678, 输出: 0x" << std::hex << result1 << std::dec << std::endl;
            
            uint32_t result2 = ipController.processData(0xABCDEF01);
            std::cout << "输入: 0xABCDEF01, 输出: 0x" << std::hex << result2 << std::dec << std::endl;
            
            ipController.printStatus();
        }
        catch (const AXI_Exception& e) {
            std::cout << "处理过程中出错: " << e.what() << std::endl;
        }
        
        // 演示AXI-DMA传输
        std::cout << "
2. AXI-DMA传输示例" << std::endl;
        std::cout << "---------------------------" << std::endl;
        
        // 创建DMA控制器
        AXI_DMA_Device dmaController(DMA_MM2S_BASEADDR, DMA_S2MM_BASEADDR, "TestDMA");
        
        // 创建内存缓冲区
        Memory_Buffer txBuffer(TX_BUFFER_BASE, MAX_PKT_LEN);
        Memory_Buffer rxBuffer(RX_BUFFER_BASE, MAX_PKT_LEN);
        
        // 测试不同大小的传输
        size_t test_sizes[] = {64, 256, 1024, 4096};
        
        for (size_t size : test_sizes) {
            std::cout << "
传输大小: " << size << " 字节" << std::endl;
            
            // 准备测试数据
            prepareTestData(txBuffer, 0);  // 使用递增模式
            
            // 打印发送缓冲区
            std::cout << "发送数据(前32字节):" << std::endl;
            printBuffer(txBuffer, 0, 32);
            
            // 执行传输
            bool success = dmaController.performTransfer(txBuffer, rxBuffer, size, 5000);
            
            if (success) {
                std::cout << "传输成功!" << std::endl;
                
                // 打印接收缓冲区
                std::cout << "接收数据(前32字节):" << std::endl;
                printBuffer(rxBuffer, 0, 32);
                
                // 验证数据 - 根据PL中的实际处理逻辑修改此处的验证逻辑
                // 这里假设PL直接传递数据,不做修改
                if (compareBuffers(txBuffer, rxBuffer, size)) {
                    std::cout << "数据验证通过!" << std::endl;
                } else {
                    std::cout << "数据验证失败!" << std::endl;
                    status = -1;
                }
            } else {
                std::cout << "传输失败! DMA状态:" << std::endl;
                std::cout << "  TX状态: 0x" << std::hex << dmaController.getStatus(AXI_DMA_Device::PS_TO_PL) << std::endl;
                std::cout << "  RX状态: 0x" << std::hex << dmaController.getStatus(AXI_DMA_Device::PL_TO_PS) << std::endl;
                std::cout << std::dec;
                status = -1;
                break;
            }
        }
        
        std::cout << "
示例完成" << std::endl;
    }
    catch (const AXI_Exception& e) {
        std::cout << "错误: " << e.what() << std::endl;
        status = -1;
    }
    catch (const std::exception& e) {
        std::cout << "标准异常: " << e.what() << std::endl;
        status = -1;
    }
    catch (...) {
        std::cout << "未知异常!" << std::endl;
        status = -1;
    }
    
    cleanup_platform();
    return status;
}

5. Zynq Linux应用开发

5.1 PetaLinux环境搭建与配置

PetaLinux是Xilinx提供的工具集,用于在Zynq等嵌入式平台上构建和定制Linux系统。以下是基本的步骤概述:

安装PetaLinux工具

bash

# 安装依赖包
sudo apt-get install gcc git make net-tools libncurses5-dev tftpd zlib1g-dev
sudo apt-get install libssl-dev flex bison chrpath socat xterm autoconf libtool
sudo apt-get install texinfo zlib1g-dev gcc-multilib build-essential

# 安装PetaLinux
chmod +x petalinux-v2023.1-installer.run
./petalinux-v2023.1-installer.run --dir /opt/petalinux/2023.1

设置环境

bash

source /opt/petalinux/2023.1/settings.sh

创建PetaLinux项目

bash

# 基于模板创建项目
petalinux-create --type project --template zynq --name my_linux_project

# 或基于硬件描述文件创建
petalinux-create --type project --template zynq --name my_linux_project
cd my_linux_project
petalinux-config --get-hw-description=/path/to/hardware/design/

配置Linux系统

bash

# 系统配置
petalinux-config

# 内核配置
petalinux-config -c kernel

# 根文件系统配置
petalinux-config -c rootfs

构建系统

bash

petalinux-build

打包镜像

bash

petalinux-package --boot --format BIN --fsbl images/linux/zynq_fsbl.elf --fpga images/linux/system.bit --u-boot

5.2 基于Linux的GPIO控制

c

/**
 * linux_gpio.c - Linux下GPIO控制示例
 * 
 * 演示如何在Linux中通过sysfs接口控制GPIO
 * 编译: gcc -o linux_gpio linux_gpio.c
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <poll.h>
#include <errno.h>

// GPIO管理函数
int gpio_export(unsigned int gpio)
{
    int fd, len;
    char buf[64];

    fd = open("/sys/class/gpio/export", O_WRONLY);
    if (fd < 0) {
        perror("gpio/export");
        return fd;
    }

    len = snprintf(buf, sizeof(buf), "%d", gpio);
    write(fd, buf, len);
    close(fd);

    return 0;
}

int gpio_unexport(unsigned int gpio)
{
    int fd, len;
    char buf[64];

    fd = open("/sys/class/gpio/unexport", O_WRONLY);
    if (fd < 0) {
        perror("gpio/unexport");
        return fd;
    }

    len = snprintf(buf, sizeof(buf), "%d", gpio);
    write(fd, buf, len);
    close(fd);

    return 0;
}

int gpio_set_dir(unsigned int gpio, const char* dir)
{
    int fd, len;
    char buf[64];

    len = snprintf(buf, sizeof(buf), "/sys/class/gpio/gpio%d/direction", gpio);

    fd = open(buf, O_WRONLY);
    if (fd < 0) {
        perror("gpio/direction");
        return fd;
    }

    write(fd, dir, strlen(dir));
    close(fd);

    return 0;
}

int gpio_set_value(unsigned int gpio, unsigned int value)
{
    int fd, len;
    char buf[64];

    len = snprintf(buf, sizeof(buf), "/sys/class/gpio/gpio%d/value", gpio);

    fd = open(buf, O_WRONLY);
    if (fd < 0) {
        perror("gpio/value");
        return fd;
    }

    if (value)
        write(fd, "1", 1);
    else
        write(fd, "0", 1);

    close(fd);
    return 0;
}

int gpio_get_value(unsigned int gpio)
{
    int fd, len, value;
    char buf[64];
    char ch;

    len = snprintf(buf, sizeof(buf), "/sys/class/gpio/gpio%d/value", gpio);

    fd = open(buf, O_RDONLY);
    if (fd < 0) {
        perror("gpio/value");
        return fd;
    }

    read(fd, &ch, 1);
    
    if (ch == '0')
        value = 0;
    else
        value = 1;

    close(fd);
    return value;
}

int gpio_set_edge(unsigned int gpio, const char* edge)
{
    int fd, len;
    char buf[64];

    len = snprintf(buf, sizeof(buf), "/sys/class/gpio/gpio%d/edge", gpio);

    fd = open(buf, O_WRONLY);
    if (fd < 0) {
        perror("gpio/edge");
        return fd;
    }

    write(fd, edge, strlen(edge));
    close(fd);

    return 0;
}

int gpio_wait_for_edge(unsigned int gpio, int timeout_ms)
{
    int fd, len, rc;
    char buf[64];
    struct pollfd pfd;

    len = snprintf(buf, sizeof(buf), "/sys/class/gpio/gpio%d/value", gpio);

    fd = open(buf, O_RDONLY);
    if (fd < 0) {
        perror("gpio/value");
        return fd;
    }

    // 清除任何待处理的中断
    char ch;
    read(fd, &ch, 1);

    // 设置poll
    pfd.fd = fd;
    pfd.events = POLLPRI | POLLERR;
    pfd.revents = 0;

    // 等待中断
    rc = poll(&pfd, 1, timeout_ms);

    // 检查结果
    if (rc < 0) {
        perror("poll");
    } else if (rc == 0) {
        printf("超时
");
    } else {
        if (pfd.revents & POLLPRI) {
            // 重新定位到文件开始处
            lseek(fd, 0, SEEK_SET);
            // 读取新值
            read(fd, &ch, 1);
            printf("GPIO状态改变: %c
", ch);
        }
    }

    close(fd);
    return rc;
}

int main(int argc, char** argv)
{
    // 定义使用的GPIO编号
    unsigned int led_gpio = 901;   // 例如,EMIO GPIO 901
    unsigned int button_gpio = 902; // 例如,EMIO GPIO 902
    
    printf("Linux GPIO控制示例
");
    
    // 导出GPIO
    gpio_export(led_gpio);
    gpio_export(button_gpio);
    
    // 设置GPIO方向
    gpio_set_dir(led_gpio, "out");
    gpio_set_dir(button_gpio, "in");
    
    // 设置按钮触发边沿
    gpio_set_edge(button_gpio, "both");
    
    printf("监听按钮(GPIO %d),按下时LED(GPIO %d)将切换状态
", button_gpio, led_gpio);
    printf("按Ctrl+C退出
");
    
    // 初始状态
    int led_state = 0;
    gpio_set_value(led_gpio, led_state);
    
    // 主循环
    while (1) {
        // 等待按钮事件,超时5000ms
        int rc = gpio_wait_for_edge(button_gpio, 5000);
        
        if (rc > 0) {
            // 读取按钮状态
            int button_state = gpio_get_value(button_gpio);
            
            if (button_state == 1) {
                // 按钮按下,切换LED状态
                led_state = !led_state;
                gpio_set_value(led_gpio, led_state);
                printf("LED状态切换为 %s
", led_state ? "开" : "关");
            }
        }
    }
    
    // 释放GPIO(实际上因为上面是无限循环,这部分代码不会执行)
    gpio_unexport(led_gpio);
    gpio_unexport(button_gpio);
    
    return 0;
}

5.3 基于Linux的MMAP内存访问

c

/**
 * mmap_example.c - Linux下使用mmap访问物理内存/设备示例
 * 
 * 演示如何在Linux中使用mmap直接访问PL中的硬件
 * 编译: gcc -o mmap_example mmap_example.c
 */

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <errno.h>
#include <string.h>

// 定义物理地址映射宏
#define MAP_SIZE 4096UL
#define MAP_MASK (MAP_SIZE - 1)

// 自定义IP基地址(在xparameters.h中定义,此处硬编码为示例)
#define CUSTOM_IP_BASEADDR 0x43C00000

// 寄存器偏移
#define CONTROL_REG_OFFSET 0x00
#define STATUS_REG_OFFSET  0x04
#define DATA_REG_OFFSET    0x08
#define RESULT_REG_OFFSET  0x0C

// 控制寄存器位
#define CTRL_START  0x01
#define CTRL_RESET  0x02
#define CTRL_ENABLE 0x04

// 状态寄存器位
#define STATUS_DONE  0x01
#define STATUS_READY 0x02
#define STATUS_ERROR 0x04

// 内存读写函数
void *map_base = NULL;
int fd = -1;

// 初始化内存映射
int init_mem_map() {
    // 打开/dev/mem设备文件
    if ((fd = open("/dev/mem", O_RDWR | O_SYNC)) == -1) {
        perror("打开/dev/mem失败");
        return -1;
    }
    
    // 映射物理内存到进程空间
    map_base = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 
                   CUSTOM_IP_BASEADDR & ~MAP_MASK);
    if (map_base == MAP_FAILED) {
        perror("内存映射失败");
        close(fd);
        return -1;
    }
    
    printf("内存映射成功,基地址: 0x%08X
", CUSTOM_IP_BASEADDR);
    return 0;
}

// 关闭内存映射
void close_mem_map() {
    if (map_base != NULL) {
        if (munmap(map_base, MAP_SIZE) == -1) {
            perror("取消内存映射失败");
        }
        map_base = NULL;
    }
    
    if (fd != -1) {
        close(fd);
        fd = -1;
    }
}

// 读取32位寄存器
uint32_t read_register(uint32_t offset) {
    void *reg_addr = map_base + (CUSTOM_IP_BASEADDR & MAP_MASK) + offset;
    return *((uint32_t *)reg_addr);
}

// 写入32位寄存器
void write_register(uint32_t offset, uint32_t value) {
    void *reg_addr = map_base + (CUSTOM_IP_BASEADDR & MAP_MASK) + offset;
    *((uint32_t *)reg_addr) = value;
}

// 等待操作完成
int wait_for_completion(int timeout_ms) {
    int timeout_count = 0;
    const int delay_us = 1000; // 1ms检查间隔
    const int max_count = timeout_ms;
    
    while (timeout_count < max_count) {
        // 读取状态寄存器
        uint32_t status = read_register(STATUS_REG_OFFSET);
        
        // 检查完成位
        if (status & STATUS_DONE) {
            return 0; // 成功
        }
        
        // 检查错误位
        if (status & STATUS_ERROR) {
            printf("操作出错,状态寄存器: 0x%08X
", status);
            return -1; // 错误
        }
        
        // 等待1ms
        usleep(delay_us);
        timeout_count++;
    }
    
    printf("操作超时
");
    return -1; // 超时
}

// 执行简单测试
int run_test() {
    // 复位设备
    write_register(CONTROL_REG_OFFSET, CTRL_RESET);
    usleep(1000); // 等待复位完成
    write_register(CONTROL_REG_OFFSET, 0);
    
    // 检查设备状态
    uint32_t status = read_register(STATUS_REG_OFFSET);
    printf("设备状态: 0x%08X
", status);
    
    if (!(status & STATUS_READY)) {
        printf("设备未就绪
");
        return -1;
    }
    
    // 写入测试数据
    uint32_t test_data = 0x12345678;
    write_register(DATA_REG_OFFSET, test_data);
    printf("写入数据: 0x%08X
", test_data);
    
    // 启动设备
    write_register(CONTROL_REG_OFFSET, CTRL_START | CTRL_ENABLE);
    printf("启动设备
");
    
    // 等待操作完成
    if (wait_for_completion(1000) != 0) {
        return -1;
    }
    
    // 读取结果
    uint32_t result = read_register(RESULT_REG_OFFSET);
    printf("读取结果: 0x%08X
", result);
    
    return 0;
}

int main() {
    printf("Linux MMAP物理内存访问示例
");
    
    // 初始化内存映射
    if (init_mem_map() != 0) {
        return -1;
    }
    
    // 执行测试
    int result = run_test();
    
    // 关闭内存映射
    close_mem_map();
    
    return result;
}

5.4 基于Linux的用户空间UIO驱动

c

/**
 * uio_example.c - Linux用户空间I/O(UIO)驱动示例
 * 
 * 演示如何使用Linux UIO框架访问PL设备
 * 编译: gcc -o uio_example uio_example.c
 */

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <errno.h>
#include <string.h>
#include <signal.h>
#include <poll.h>

// UIO设备路径
#define UIO_DEVICE "/dev/uio0"

// 默认映射大小
#define MAP_SIZE 4096UL

// 寄存器偏移
#define CONTROL_REG_OFFSET 0x00
#define STATUS_REG_OFFSET  0x04
#define DATA_REG_OFFSET    0x08
#define RESULT_REG_OFFSET  0x0C

// 控制寄存器位
#define CTRL_START       0x01
#define CTRL_RESET       0x02
#define CTRL_INTR_ENABLE 0x04

// 状态寄存器位
#define STATUS_DONE  0x01
#define STATUS_BUSY  0x02
#define STATUS_ERROR 0x04

// 全局变量
static int uio_fd = -1;
static void *mapped_base = NULL;
static size_t mapped_size = MAP_SIZE;
static volatile int keep_running = 1;

// 信号处理函数
void signal_handler(int sig) {
    if (sig == SIGINT) {
        printf("
收到Ctrl+C,程序即将退出...
");
        keep_running = 0;
    }
}

// 初始化UIO设备
int init_uio_device() {
    // 打开UIO设备
    uio_fd = open(UIO_DEVICE, O_RDWR);
    if (uio_fd < 0) {
        perror("打开UIO设备失败");
        return -1;
    }
    
    // 映射设备寄存器空间
    mapped_base = mmap(NULL, mapped_size, PROT_READ | PROT_WRITE, MAP_SHARED, uio_fd, 0);
    if (mapped_base == MAP_FAILED) {
        perror("映射设备寄存器失败");
        close(uio_fd);
        uio_fd = -1;
        return -1;
    }
    
    printf("UIO设备初始化成功
");
    return 0;
}

// 清理资源
void cleanup_uio_device() {
    if (mapped_base != NULL && mapped_base != MAP_FAILED) {
        if (munmap(mapped_base, mapped_size) < 0) {
            perror("取消内存映射失败");
        }
        mapped_base = NULL;
    }
    
    if (uio_fd >= 0) {
        close(uio_fd);
        uio_fd = -1;
    }
}

// 读取32位寄存器
uint32_t read_register(uint32_t offset) {
    return *((volatile uint32_t *)((uint8_t *)mapped_base + offset));
}

// 写入32位寄存器
void write_register(uint32_t offset, uint32_t value) {
    *((volatile uint32_t *)((uint8_t *)mapped_base + offset)) = value;
}

// 使能设备中断
void enable_interrupt() {
    // 写入1使能中断
    uint32_t enable = 1;
    if (write(uio_fd, &enable, sizeof(enable)) < 0) {
        perror("使能中断失败");
    }
}

// 等待中断
int wait_for_interrupt(int timeout_ms) {
    struct pollfd fds = {
        .fd = uio_fd,
        .events = POLLIN,
    };
    
    int ret = poll(&fds, 1, timeout_ms);
    if (ret > 0) {
        if (fds.revents & POLLIN) {
            // 读取中断计数,这将重新启用中断
            uint32_t interrupt_count;
            if (read(uio_fd, &interrupt_count, sizeof(interrupt_count)) < 0) {
                perror("读取中断计数失败");
                return -1;
            }
            return 1; // 中断发生
        }
    } else if (ret == 0) {
        return 0; // 超时
    } else {
        perror("poll失败");
        return -1; // 错误
    }
    
    return 0;
}

// 执行设备操作
int perform_operation(uint32_t input_data) {
    // 复位设备
    write_register(CONTROL_REG_OFFSET, CTRL_RESET);
    usleep(1000); // 等待复位完成
    write_register(CONTROL_REG_OFFSET, 0);
    
    // 检查设备状态
    uint32_t status = read_register(STATUS_REG_OFFSET);
    printf("设备状态: 0x%08X
", status);
    
    // 写入输入数据
    write_register(DATA_REG_OFFSET, input_data);
    printf("发送数据: 0x%08X
", input_data);
    
    // 使能中断
    enable_interrupt();
    
    // 启动设备操作,并使能中断
    write_register(CONTROL_REG_OFFSET, CTRL_START | CTRL_INTR_ENABLE);
    printf("操作已启动,等待中断...
");
    
    // 等待中断或超时(5秒)
    if (wait_for_interrupt(5000) <= 0) {
        printf("等待中断超时或发生错误
");
        return -1;
    }
    
    // 检查状态寄存器
    status = read_register(STATUS_REG_OFFSET);
    if (status & STATUS_ERROR) {
        printf("操作出错,状态: 0x%08X
", status);
        return -1;
    }
    
    if (!(status & STATUS_DONE)) {
        printf("操作未完成,状态: 0x%08X
", status);
        return -1;
    }
    
    // 读取结果
    uint32_t result = read_register(RESULT_REG_OFFSET);
    printf("操作完成,结果: 0x%08X
", result);
    
    return 0;
}

// 交互式测试
void interactive_test() {
    char input[32];
    uint32_t data;
    
    printf("
交互式测试模式 (输入'q'退出)
");
    
    while (keep_running) {
        printf("
请输入要发送的十六进制数据 (例如: 0x12345678): ");
        if (fgets(input, sizeof(input), stdin) == NULL) {
            break;
        }
        
        // 检查退出命令
        if (input[0] == 'q' || input[0] == 'Q') {
            break;
        }
        
        // 解析十六进制输入
        if (sscanf(input, "0x%x", &data) != 1 && sscanf(input, "%x", &data) != 1) {
            printf("无效的输入格式,请使用十六进制 (例如: 0x12345678)
");
            continue;
        }
        
        // 执行操作
        perform_operation(data);
    }
}

int main() {
    // 设置信号处理,捕获Ctrl+C
    struct sigaction sa;
    memset(&sa, 0, sizeof(sa));
    sa.sa_handler = signal_handler;
    sigaction(SIGINT, &sa, NULL);
    
    printf("Linux UIO驱动示例
");
    printf("=================
");
    
    // 初始化UIO设备
    if (init_uio_device() != 0) {
        return -1;
    }
    
    // 显示设备信息
    printf("设备寄存器已映射, 大小: %zu 字节
", mapped_size);
    printf("控制寄存器: 0x%08X
", read_register(CONTROL_REG_OFFSET));
    printf("状态寄存器: 0x%08X
", read_register(STATUS_REG_OFFSET));
    
    // 执行自动测试
    printf("
执行自动测试...
");
    perform_operation(0xABCD1234);
    
    // 执行交互式测试
    interactive_test();
    
    // 清理资源
    cleanup_uio_device();
    
    printf("程序结束
");
    return 0;
}

5.5 Linux下的DMA应用

c

/**
 * dma_demo.c - Linux DMA用户空间应用示例
 * 
 * 演示如何在Linux用户空间中使用Xilinx AXI DMA驱动进行数据传输
 * 编译: gcc -o dma_demo dma_demo.c
 */

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <errno.h>
#include <string.h>
#include <time.h>

// DMA设备文件
#define TX_DEVICE "/dev/xilinx_dma_tx"
#define RX_DEVICE "/dev/xilinx_dma_rx"

// DMA传输参数
#define MAX_PKT_SIZE       (4 * 1024 * 1024) // 4MB
#define TEST_SIZE          (64 * 1024)      // 64KB 默认测试大小

// 对齐内存分配
void *alloc_aligned_buffer(size_t size, unsigned alignment)
{
    void *ptr = NULL;
    int ret = posix_memalign(&ptr, alignment, size);
    if (ret != 0) {
        fprintf(stderr, "内存分配失败: %s
", strerror(ret));
        return NULL;
    }
    
    // 初始化为0
    memset(ptr, 0, size);
    return ptr;
}

// 填充发送缓冲区
void fill_buffer(uint8_t *buffer, size_t size, int pattern_type)
{
    if (pattern_type == 0) {
        // 递增模式
        for (size_t i = 0; i < size; i++) {
            buffer[i] = i & 0xFF;
        }
    } else if (pattern_type == 1) {
        // 随机模式
        srand(time(NULL));
        for (size_t i = 0; i < size; i++) {
            buffer[i] = rand() & 0xFF;
        }
    } else {
        // 固定模式
        for (size_t i = 0; i < size; i++) {
            buffer[i] = pattern_type & 0xFF;
        }
    }
}

// 验证数据
int verify_buffer(uint8_t *tx_buf, uint8_t *rx_buf, size_t size, int expected_change)
{
    int error_count = 0;
    
    for (size_t i = 0; i < size; i++) {
        uint8_t expected = (expected_change == 0) ? 
                            tx_buf[i] : 
                            (uint8_t)(tx_buf[i] + expected_change);
        
        if (rx_buf[i] != expected) {
            if (error_count < 10) { // 限制错误报告数量
                printf("数据不匹配 @ %zu: 发送 = 0x%02X, 接收 = 0x%02X, 期望 = 0x%02X
", 
                       i, tx_buf[i], rx_buf[i], expected);
            }
            error_count++;
        }
    }
    
    if (error_count > 0) {
        printf("总共发现 %d 个错误 (共 %zu 字节)
", error_count, size);
        return -1;
    }
    
    return 0;
}

// 打印数据块
void print_buffer(uint8_t *buffer, size_t offset, size_t length)
{
    printf("缓冲区内容 @ 偏移 %zu:
", offset);
    
    for (size_t i = 0; i < length; i++) {
        printf("%02X ", buffer[offset + i]);
        if ((i + 1) % 16 == 0) {
            printf("
");
        }
    }
    
    printf("
");
}

// 执行DMA传输
int perform_dma_transfer(size_t transfer_size, int pattern_type, int expected_change)
{
    int tx_fd, rx_fd;
    uint8_t *tx_buf, *rx_buf;
    ssize_t tx_bytes, rx_bytes;
    
    // 分配对齐的DMA缓冲区
    tx_buf = alloc_aligned_buffer(transfer_size, 4096);
    rx_buf = alloc_aligned_buffer(transfer_size, 4096);
    
    if (!tx_buf || !rx_buf) {
        printf("无法分配DMA缓冲区
");
        goto error_out;
    }
    
    // 填充发送缓冲区
    fill_buffer(tx_buf, transfer_size, pattern_type);
    
    // 打印部分发送数据
    printf("发送数据样本:
");
    print_buffer(tx_buf, 0, 32);
    
    // 打开DMA设备
    tx_fd = open(TX_DEVICE, O_WRONLY);
    if (tx_fd < 0) {
        perror("无法打开TX DMA设备");
        goto error_out;
    }
    
    rx_fd = open(RX_DEVICE, O_RDONLY);
    if (rx_fd < 0) {
        perror("无法打开RX DMA设备");
        close(tx_fd);
        goto error_out;
    }
    
    // 开始计时
    struct timespec start_time, end_time;
    clock_gettime(CLOCK_MONOTONIC, &start_time);
    
    // 开始接收 (非阻塞)
    rx_bytes = read(rx_fd, rx_buf, transfer_size);
    if (rx_bytes < 0) {
        perror("接收失败");
        goto close_files;
    }
    
    // 开始发送
    tx_bytes = write(tx_fd, tx_buf, transfer_size);
    if (tx_bytes < 0) {
        perror("发送失败");
        goto close_files;
    }
    
    // 等待接收完成 (如果之前的read是非阻塞的)
    // 在某些DMA驱动实现中可能不需要这一步
    
    // 停止计时
    clock_gettime(CLOCK_MONOTONIC, &end_time);
    
    // 计算带宽
    double elapsed_ns = (end_time.tv_sec - start_time.tv_sec) * 1e9 + 
                        (end_time.tv_nsec - start_time.tv_nsec);
    double elapsed_ms = elapsed_ns / 1e6;
    double bandwidth_mbps = (transfer_size * 8) / (elapsed_ns); // Mbps
    
    printf("传输完成: %zu 字节 在 %.2f ms (%.2f Mbps)
", 
           transfer_size, elapsed_ms, bandwidth_mbps);
    
    // 打印接收数据样本
    printf("接收数据样本:
");
    print_buffer(rx_buf, 0, 32);
    
    // 验证数据
    if (verify_buffer(tx_buf, rx_buf, transfer_size, expected_change) != 0) {
        printf("数据验证失败!
");
        goto close_files;
    }
    
    printf("数据验证成功!
");
    
    // 关闭设备文件
    close(rx_fd);
    close(tx_fd);
    
    // 释放缓冲区
    free(tx_buf);
    free(rx_buf);
    
    return 0;
    
close_files:
    if (rx_fd >= 0) close(rx_fd);
    if (tx_fd >= 0) close(tx_fd);
    
error_out:
    if (tx_buf) free(tx_buf);
    if (rx_buf) free(rx_buf);
    
    return -1;
}

// 主函数
int main(int argc, char **argv)
{
    size_t transfer_size = TEST_SIZE;
    int pattern_type = 0; // 默认使用递增模式
    int expected_change = 0; // 默认不期望数据变化
    
    // 解析命令行参数
    int opt;
    while ((opt = getopt(argc, argv, "s:p:c:")) != -1) {
        switch (opt) {
            case 's': // 传输大小
                transfer_size = atoi(optarg);
                if (transfer_size > MAX_PKT_SIZE) {
                    printf("警告: 传输大小超过最大值,将使用最大值 %d
", MAX_PKT_SIZE);
                    transfer_size = MAX_PKT_SIZE;
                }
                break;
            case 'p': // 数据模式
                pattern_type = atoi(optarg);
                break;
            case 'c': // 期望的数据变化
                expected_change = atoi(optarg);
                break;
            default:
                printf("用法: %s [-s 大小] [-p 模式] [-c 变化]
", argv[0]);
                printf("  -s 大小: 传输大小 (默认: %d)
", TEST_SIZE);
                printf("  -p 模式: 0=递增, 1=随机, 其他=固定值
");
                printf("  -c 变化: 期望的数据变化值 (默认: 0, 表示数据不变)
");
                return 1;
        }
    }
    
    printf("Linux AXI DMA 演示
");
    printf("==================
");
    printf("传输大小: %zu 字节
", transfer_size);
    printf("数据模式: %d (%s)
", pattern_type, 
           pattern_type == 0 ? "递增" : (pattern_type == 1 ? "随机" : "固定值"));
    printf("期望变化: %d
", expected_change);
    
    // 执行DMA传输
    return perform_dma_transfer(transfer_size, pattern_type, expected_change);
}

6. 高级应用实例

6.1 硬件加速器控制与数据处理

cpp

/**
 * hw_accelerator.hpp - 硬件加速器C++接口定义
 * 
 * 提供对Zynq PL中实现的硬件加速器的访问
 */
#ifndef HW_ACCELERATOR_HPP
#define HW_ACCELERATOR_HPP

#include <cstdint>
#include <vector>
#include <string>
#include <stdexcept>
#include <memory>

// 前向声明
class HardwareAcceleratorImpl;

/**
 * 硬件加速器异常类
 */
class HardwareAcceleratorException : public std::runtime_error {
public:
    explicit HardwareAcceleratorException(const std::string& message) 
        : std::runtime_error(message) {}
};

/**
 * 硬件加速器类 - 提供对PL加速器的高级访问
 */
class HardwareAccelerator {
public:
    // 加速器操作类型
    enum OperationType {
        MATRIX_MULTIPLY = 0,
        FFT_TRANSFORM = 1,
        CONVOLUTION = 2,
        FILTER = 3
    };
    
    /**
     * 构造函数
     * 
     * @param base_addr 加速器基地址
     * @param dma_device_path DMA设备路径
     * @param use_interrupts 是否使用中断
     */
    HardwareAccelerator(uint32_t base_addr, 
                       const std::string& dma_device_path = "",
                       bool use_interrupts = true);
    
    /**
     * 析构函数
     */
    ~HardwareAccelerator();
    
    /**
     * 初始化加速器
     * 
     * @return 初始化是否成功
     */
    bool initialize();
    
    /**
     * 检查加速器状态
     * 
     * @return 加速器是否可用
     */
    bool isReady() const;
    
    /**
     * 设置操作类型
     * 
     * @param op_type 操作类型
     */
    void setOperationType(OperationType op_type);
    
    /**
     * 配置加速器参数
     * 
     * @param param_id 参数ID
     * @param value 参数值
     */
    void setParameter(uint32_t param_id, uint32_t value);
    
    /**
     * 执行浮点向量操作
     * 
     * @param input_data 输入数据
     * @param output_data 输出数据
     * @param size 数据大小
     * @return 操作是否成功
     */
    bool processFloatData(const std::vector<float>& input_data, 
                         std::vector<float>& output_data,
                         size_t size);
    
    /**
     * 执行整数向量操作
     * 
     * @param input_data 输入数据
     * @param output_data 输出数据
     * @param size 数据大小
     * @return 操作是否成功
     */
    bool processIntData(const std::vector<int32_t>& input_data, 
                       std::vector<int32_t>& output_data,
                       size_t size);
    
    /**
     * 执行矩阵乘法
     * 
     * @param matrix_a 矩阵A
     * @param matrix_b 矩阵B
     * @param matrix_c 结果矩阵C
     * @param rows_a A矩阵行数
     * @param cols_a A矩阵列数
     * @param cols_b B矩阵列数
     * @return 操作是否成功
     */
    bool matrixMultiply(const std::vector<float>& matrix_a,
                       const std::vector<float>& matrix_b,
                       std::vector<float>& matrix_c,
                       uint32_t rows_a, uint32_t cols_a, uint32_t cols_b);
    
    /**
     * 执行FFT变换
     * 
     * @param real_in 实部输入
     * @param imag_in 虚部输入
     * @param real_out 实部输出
     * @param imag_out 虚部输出
     * @param fft_size FFT大小
     * @param inverse 是否执行IFFT
     * @return 操作是否成功
     */
    bool performFFT(const std::vector<float>& real_in,
                   const std::vector<float>& imag_in,
                   std::vector<float>& real_out,
                   std::vector<float>& imag_out,
                   uint32_t fft_size, bool inverse = false);
    
    /**
     * 获取上一次操作的处理时间(毫秒)
     * 
     * @return 处理时间
     */
    double getLastProcessingTime() const;
    
    /**
     * 获取硬件版本信息
     * 
     * @return 版本信息字符串
     */
    std::string getVersionInfo() const;
    
    /**
     * 重置硬件加速器
     */
    void reset();
    
private:
    // PIMPL模式
    std::unique_ptr<HardwareAcceleratorImpl> impl;
    
    // 禁止拷贝和赋值
    HardwareAccelerator(const HardwareAccelerator&) = delete;
    HardwareAccelerator& operator=(const HardwareAccelerator&) = delete;
};

#endif // HW_ACCELERATOR_HPP

cpp

/**
 * hw_accelerator.cpp - 硬件加速器C++接口实现
 * 
 * 基于Zynq平台实现硬件加速器接口
 */

#include "hw_accelerator.hpp"
#include <chrono>
#include <iostream>
#include <fstream>
#include <cstring>
#include <cerrno>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <poll.h>

// 寄存器偏移定义
#define REG_CONTROL        0x00
#define REG_STATUS         0x04
#define REG_VERSION        0x08
#define REG_OP_TYPE        0x0C
#define REG_PARAM_0        0x10
#define REG_PARAM_1        0x14
#define REG_PARAM_2        0x18
#define REG_PARAM_3        0x1C
#define REG_SRC_ADDR_LO    0x20
#define REG_SRC_ADDR_HI    0x24
#define REG_DST_ADDR_LO    0x28
#define REG_DST_ADDR_HI    0x2C
#define REG_SRC_SIZE       0x30
#define REG_DST_SIZE       0x34
#define REG_EXEC_TIME      0x38
#define REG_INTR_ENABLE    0x3C

// 控制寄存器位
#define CTRL_START         0x00000001
#define CTRL_RESET         0x00000002
#define CTRL_INTR_ENABLE   0x00000004
#define CTRL_ABORT         0x00000008

// 状态寄存器位
#define STATUS_BUSY        0x00000001
#define STATUS_DONE        0x00000002
#define STATUS_ERROR       0x00000004
#define STATUS_READY       0x00000008

// DMA缓冲区配置
#define DMA_BUFFER_SIZE    (16 * 1024 * 1024)  // 16MB
#define PAGE_SIZE          4096
#define DMA_ALIGNMENT      4096

// 硬件加速器实现类
class HardwareAcceleratorImpl {
public:
    HardwareAcceleratorImpl(uint32_t base_addr, 
                           const std::string& dma_device_path,
                           bool use_interrupts) 
        : base_addr_(base_addr),
          dma_device_path_(dma_device_path),
          use_interrupts_(use_interrupts),
          mem_fd_(-1),
          dma_fd_(-1),
          uio_fd_(-1),
          reg_map_(nullptr),
          dma_buffer_(nullptr),
          dma_buffer_size_(0),
          dma_buffer_phys_addr_(0),
          last_processing_time_(0.0),
          initialized_(false) {
    }
    
    ~HardwareAcceleratorImpl() {
        cleanup();
    }
    
    bool initialize() {
        // 打开/dev/mem以访问物理内存
        mem_fd_ = open("/dev/mem", O_RDWR | O_SYNC);
        if (mem_fd_ < 0) {
            last_error_ = "无法打开/dev/mem: " + std::string(strerror(errno));
            return false;
        }
        
        // 映射加速器寄存器空间
        reg_map_ = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, 
                       mem_fd_, base_addr_);
        
        if (reg_map_ == MAP_FAILED) {
            last_error_ = "内存映射失败: " + std::string(strerror(errno));
            cleanup();
            return false;
        }
        
        // 如果使用中断,打开UIO设备
        if (use_interrupts_) {
            uio_fd_ = open("/dev/uio0", O_RDWR);
            if (uio_fd_ < 0) {
                // 不是致命错误,回退到轮询模式
                std::cerr << "警告:无法打开UIO设备,将使用轮询模式" << std::endl;
                use_interrupts_ = false;
            }
        }
        
        // 如果指定了DMA设备,初始化DMA
        if (!dma_device_path_.empty()) {
            if (!initializeDma()) {
                return false;
            }
        }
        
        // 复位加速器
        writeRegister(REG_CONTROL, CTRL_RESET);
        usleep(10000);  // 等待复位完成
        writeRegister(REG_CONTROL, 0);
        
        // 检查版本和状态
        uint32_t version = readRegister(REG_VERSION);
        uint32_t status = readRegister(REG_STATUS);
        
        std::cout << "硬件加速器版本: 0x" << std::hex << version << std::dec << std::endl;
        std::cout << "初始状态: 0x" << std::hex << status << std::dec << std::endl;
        
        if (!(status & STATUS_READY)) {
            last_error_ = "加速器未就绪";
            cleanup();
            return false;
        }
        
        initialized_ = true;
        return true;
    }
    
    void cleanup() {
        if (reg_map_ && reg_map_ != MAP_FAILED) {
            munmap(reg_map_, PAGE_SIZE);
            reg_map_ = nullptr;
        }
        
        if (dma_buffer_) {
            munmap(dma_buffer_, dma_buffer_size_);
            dma_buffer_ = nullptr;
        }
        
        if (mem_fd_ >= 0) {
            close(mem_fd_);
            mem_fd_ = -1;
        }
        
        if (dma_fd_ >= 0) {
            close(dma_fd_);
            dma_fd_ = -1;
        }
        
        if (uio_fd_ >= 0) {
            close(uio_fd_);
            uio_fd_ = -1;
        }
        
        initialized_ = false;
    }
    
    bool isReady() const {
        if (!initialized_) {
            return false;
        }
        
        uint32_t status = readRegister(REG_STATUS);
        return (status & STATUS_READY) != 0;
    }
    
    void setOperationType(HardwareAccelerator::OperationType op_type) {
        if (!initialized_) {
            throw HardwareAcceleratorException("加速器未初始化");
        }
        
        writeRegister(REG_OP_TYPE, static_cast<uint32_t>(op_type));
    }
    
    void setParameter(uint32_t param_id, uint32_t value) {
        if (!initialized_) {
            throw HardwareAcceleratorException("加速器未初始化");
        }
        
        if (param_id > 3) {
            throw HardwareAcceleratorException("无效的参数ID");
        }
        
        uint32_t reg_offset = REG_PARAM_0 + (param_id * 4);
        writeRegister(reg_offset, value);
    }
    
    bool processFloatData(const std::vector<float>& input_data, 
                         std::vector<float>& output_data,
                         size_t size) {
        if (!initialized_) {
            last_error_ = "加速器未初始化";
            return false;
        }
        
        if (input_data.size() < size) {
            last_error_ = "输入数据大小不足";
            return false;
        }
        
        // 确保输出缓冲区足够大
        output_data.resize(size);
        
        // 如果有DMA,使用DMA传输
        if (dma_buffer_ && dma_buffer_phys_addr_ != 0) {
            return processDMA(input_data.data(), output_data.data(), 
                             size * sizeof(float));
        } else {
            // 否则使用寄存器方式一次处理一个数据
            for (size_t i = 0; i < size; i++) {
                writeRegister(REG_PARAM_0, *reinterpret_cast<const uint32_t*>(&input_data[i]));
                writeRegister(REG_CONTROL, CTRL_START);
                
                if (!waitForCompletion()) {
                    return false;
                }
                
                uint32_t result = readRegister(REG_PARAM_1);
                output_data[i] = *reinterpret_cast<float*>(&result);
            }
            return true;
        }
    }
    
    bool processIntData(const std::vector<int32_t>& input_data, 
                       std::vector<int32_t>& output_data,
                       size_t size) {
        if (!initialized_) {
            last_error_ = "加速器未初始化";
            return false;
        }
        
        if (input_data.size() < size) {
            last_error_ = "输入数据大小不足";
            return false;
        }
        
        // 确保输出缓冲区足够大
        output_data.resize(size);
        
        // 如果有DMA,使用DMA传输
        if (dma_buffer_ && dma_buffer_phys_addr_ != 0) {
            return processDMA(input_data.data(), output_data.data(), 
                             size * sizeof(int32_t));
        } else {
            // 否则使用寄存器方式一次处理一个数据
            for (size_t i = 0; i < size; i++) {
                writeRegister(REG_PARAM_0, input_data[i]);
                writeRegister(REG_CONTROL, CTRL_START);
                
                if (!waitForCompletion()) {
                    return false;
                }
                
                output_data[i] = readRegister(REG_PARAM_1);
            }
            return true;
        }
    }
    
    bool matrixMultiply(const std::vector<float>& matrix_a,
                       const std::vector<float>& matrix_b,
                       std::vector<float>& matrix_c,
                       uint32_t rows_a, uint32_t cols_a, uint32_t cols_b) {
        if (!initialized_) {
            last_error_ = "加速器未初始化";
            return false;
        }
        
        // 验证矩阵维度
        if (matrix_a.size() < rows_a * cols_a || 
            matrix_b.size() < cols_a * cols_b) {
            last_error_ = "矩阵维度与数据大小不一致";
            return false;
        }
        
        // 设置操作类型为矩阵乘法
        setOperationType(HardwareAccelerator::MATRIX_MULTIPLY);
        
        // 配置矩阵维度
        setParameter(0, rows_a);
        setParameter(1, cols_a);
        setParameter(2, cols_b);
        
        // 调整输出矩阵大小
        matrix_c.resize(rows_a * cols_b);
        
        // 如果有DMA,使用DMA传输
        if (dma_buffer_ && dma_buffer_phys_addr_ != 0) {
            // 准备DMA缓冲区 - 注意需要保证数据连续排列
            size_t total_input_size = (matrix_a.size() + matrix_b.size()) * sizeof(float);
            size_t output_size = matrix_c.size() * sizeof(float);
            
            if (total_input_size + output_size > dma_buffer_size_) {
                last_error_ = "DMA缓冲区不足";
                return false;
            }
            
            // 复制输入矩阵到DMA缓冲区
            float* dma_src = reinterpret_cast<float*>(dma_buffer_);
            memcpy(dma_src, matrix_a.data(), matrix_a.size() * sizeof(float));
            memcpy(dma_src + matrix_a.size(), matrix_b.data(), matrix_b.size() * sizeof(float));
            
            // 配置DMA传输参数
            uint64_t src_addr = dma_buffer_phys_addr_;
            uint64_t dst_addr = dma_buffer_phys_addr_ + total_input_size;
            
            writeRegister(REG_SRC_ADDR_LO, src_addr & 0xFFFFFFFF);
            writeRegister(REG_SRC_ADDR_HI, (src_addr >> 32) & 0xFFFFFFFF);
            writeRegister(REG_DST_ADDR_LO, dst_addr & 0xFFFFFFFF);
            writeRegister(REG_DST_ADDR_HI, (dst_addr >> 32) & 0xFFFFFFFF);
            writeRegister(REG_SRC_SIZE, total_input_size);
            writeRegister(REG_DST_SIZE, output_size);
            
            // 启动加速器
            uint32_t ctrl_val = CTRL_START;
            if (use_interrupts_) {
                ctrl_val |= CTRL_INTR_ENABLE;
            }
            
            auto start_time = std::chrono::high_resolution_clock::now();
            writeRegister(REG_CONTROL, ctrl_val);
            
            // 等待完成
            if (!waitForCompletion()) {
                return false;
            }
            
            auto end_time = std::chrono::high_resolution_clock::now();
            last_processing_time_ = std::chrono::duration<double, std::milli>(
                end_time - start_time).count();
            
            // 复制结果到输出矩阵
            float* dma_dst = reinterpret_cast<float*>(dma_buffer_ + total_input_size);
            memcpy(matrix_c.data(), dma_dst, output_size);
            
            return true;
            
        } else {
            // 不支持大矩阵的非DMA操作
            last_error_ = "矩阵乘法需要DMA支持";
            return false;
        }
    }
    
    bool performFFT(const std::vector<float>& real_in,
                   const std::vector<float>& imag_in,
                   std::vector<float>& real_out,
                   std::vector<float>& imag_out,
                   uint32_t fft_size, bool inverse) {
        if (!initialized_) {
            last_error_ = "加速器未初始化";
            return false;
        }
        
        // 验证输入大小
        if (real_in.size() < fft_size || imag_in.size() < fft_size) {
            last_error_ = "输入数据大小不足";
            return false;
        }
        
        // 设置操作类型为FFT
        setOperationType(HardwareAcc

cpp

        // 设置操作类型为FFT
        setOperationType(HardwareAccelerator::FFT_TRANSFORM);
        
        // 配置FFT参数
        setParameter(0, fft_size);
        setParameter(1, inverse ? 1 : 0);
        
        // 调整输出向量大小
        real_out.resize(fft_size);
        imag_out.resize(fft_size);
        
        // 如果有DMA,使用DMA传输
        if (dma_buffer_ && dma_buffer_phys_addr_ != 0) {
            // 准备DMA缓冲区 - 实部和虚部交替排列
            size_t input_size = fft_size * 2 * sizeof(float);  // 实部+虚部
            size_t output_size = fft_size * 2 * sizeof(float);
            
            if (input_size + output_size > dma_buffer_size_) {
                last_error_ = "DMA缓冲区不足";
                return false;
            }
            
            // 复制输入数据到DMA缓冲区,交替实部和虚部
            float* dma_src = reinterpret_cast<float*>(dma_buffer_);
            for (uint32_t i = 0; i < fft_size; i++) {
                dma_src[i*2] = real_in[i];     // 实部
                dma_src[i*2+1] = imag_in[i];   // 虚部
            }
            
            // 配置DMA传输参数
            uint64_t src_addr = dma_buffer_phys_addr_;
            uint64_t dst_addr = dma_buffer_phys_addr_ + input_size;
            
            writeRegister(REG_SRC_ADDR_LO, src_addr & 0xFFFFFFFF);
            writeRegister(REG_SRC_ADDR_HI, (src_addr >> 32) & 0xFFFFFFFF);
            writeRegister(REG_DST_ADDR_LO, dst_addr & 0xFFFFFFFF);
            writeRegister(REG_DST_ADDR_HI, (dst_addr >> 32) & 0xFFFFFFFF);
            writeRegister(REG_SRC_SIZE, input_size);
            writeRegister(REG_DST_SIZE, output_size);
            
            // 启动加速器
            uint32_t ctrl_val = CTRL_START;
            if (use_interrupts_) {
                ctrl_val |= CTRL_INTR_ENABLE;
            }
            
            auto start_time = std::chrono::high_resolution_clock::now();
            writeRegister(REG_CONTROL, ctrl_val);
            
            // 等待完成
            if (!waitForCompletion()) {
                return false;
            }
            
            auto end_time = std::chrono::high_resolution_clock::now();
            last_processing_time_ = std::chrono::duration<double, std::milli>(
                end_time - start_time).count();
            
            // 复制结果到输出向量,分离实部和虚部
            float* dma_dst = reinterpret_cast<float*>(dma_buffer_ + input_size);
            for (uint32_t i = 0; i < fft_size; i++) {
                real_out[i] = dma_dst[i*2];     // 实部
                imag_out[i] = dma_dst[i*2+1];   // 虚部
            }
            
            return true;
            
        } else {
            // 不支持大FFT的非DMA操作
            last_error_ = "FFT变换需要DMA支持";
            return false;
        }
    }
    
    double getLastProcessingTime() const {
        return last_processing_time_;
    }
    
    std::string getVersionInfo() const {
        if (!initialized_) {
            return "未初始化";
        }
        
        uint32_t version = readRegister(REG_VERSION);
        char version_str[32];
        snprintf(version_str, sizeof(version_str), "%d.%d.%d", 
                 (version >> 16) & 0xFF,
                 (version >> 8) & 0xFF,
                 version & 0xFF);
        
        return version_str;
    }
    
    void reset() {
        if (!initialized_) {
            throw HardwareAcceleratorException("加速器未初始化");
        }
        
        writeRegister(REG_CONTROL, CTRL_RESET);
        usleep(10000);  // 等待复位完成
        writeRegister(REG_CONTROL, 0);
    }
    
    const std::string& getLastError() const {
        return last_error_;
    }
    
private:
    // 读取寄存器
    uint32_t readRegister(uint32_t offset) const {
        volatile uint32_t* reg_ptr = static_cast<volatile uint32_t*>(reg_map_) + (offset >> 2);
        return *reg_ptr;
    }
    
    // 写入寄存器
    void writeRegister(uint32_t offset, uint32_t value) {
        volatile uint32_t* reg_ptr = static_cast<volatile uint32_t*>(reg_map_) + (offset >> 2);
        *reg_ptr = value;
    }
    
    // 初始化DMA
    bool initializeDma() {
        // 打开DMA设备
        dma_fd_ = open(dma_device_path_.c_str(), O_RDWR);
        if (dma_fd_ < 0) {
            last_error_ = "无法打开DMA设备: " + std::string(strerror(errno));
            return false;
        }
        
        // 分配DMA缓冲区 - 使用mmap映射/dev/dma_alloc或特定的DMA设备
        dma_buffer_size_ = DMA_BUFFER_SIZE;
        dma_buffer_ = mmap(NULL, dma_buffer_size_, PROT_READ | PROT_WRITE,
                          MAP_SHARED, dma_fd_, 0);
        
        if (dma_buffer_ == MAP_FAILED) {
            last_error_ = "DMA缓冲区分配失败: " + std::string(strerror(errno));
            dma_buffer_ = nullptr;
            return false;
        }
        
        // 获取物理地址 - 通常通过ioctl调用或设备特定方法
        // 示例:使用dma_alloc的方式获取物理地址
        struct {
            void* virtual_addr;
            uint64_t physical_addr;
            size_t size;
        } dma_info;
        
        dma_info.virtual_addr = dma_buffer_;
        dma_info.size = dma_buffer_size_;
        
        if (ioctl(dma_fd_, _IOWR('D', 0, typeof(dma_info)), &dma_info) < 0) {
            // 如果ioctl失败,可以尝试其他方法,例如使用/proc/self/pagemap
            // 或者使用libdma-buf等特定库
            
            // 在本示例中,我们假设物理地址与虚拟地址之间有一个固定偏移
            // 这在实际应用中是不可靠的,应该使用特定的驱动程序API
            dma_buffer_phys_addr_ = reinterpret_cast<uint64_t>(dma_buffer_) & 0x1FFFFFFF;
            
            std::cerr << "警告:无法通过ioctl获取DMA物理地址,使用替代方法" << std::endl;
        } else {
            dma_buffer_phys_addr_ = dma_info.physical_addr;
        }
        
        std::cout << "DMA缓冲区已分配:" << std::endl;
        std::cout << "  虚拟地址: 0x" << std::hex << dma_buffer_ << std::dec << std::endl;
        std::cout << "  物理地址: 0x" << std::hex << dma_buffer_phys_addr_ << std::dec << std::endl;
        std::cout << "  大小: " << dma_buffer_size_ << " 字节" << std::endl;
        
        return true;
    }
    
    // 等待操作完成
    bool waitForCompletion() {
        bool completion_success = false;
        
        if (use_interrupts_ && uio_fd_ >= 0) {
            // 使用中断方式等待
            uint32_t interrupt_count;
            struct pollfd fds = {
                .fd = uio_fd_,
                .events = POLLIN,
            };
            
            // 等待中断事件
            int ret = poll(&fds, 1, 5000);  // 5秒超时
            if (ret > 0) {
                // 读取中断计数以重新启用中断
                if (read(uio_fd_, &interrupt_count, sizeof(interrupt_count)) < 0) {
                    last_error_ = "读取中断计数失败: " + std::string(strerror(errno));
                    return false;
                }
                
                // 检查状态寄存器确认完成
                uint32_t status = readRegister(REG_STATUS);
                if (status & STATUS_DONE) {
                    completion_success = true;
                } else if (status & STATUS_ERROR) {
                    last_error_ = "硬件加速器报告错误";
                } else {
                    last_error_ = "中断触发但操作未完成";
                }
            } else if (ret == 0) {
                last_error_ = "等待中断超时";
            } else {
                last_error_ = "等待中断失败: " + std::string(strerror(errno));
            }
        } else {
            // 使用轮询方式等待
            int timeout_count = 0;
            const int max_timeout = 5000; // 最大5秒
            
            while (timeout_count < max_timeout) {
                uint32_t status = readRegister(REG_STATUS);
                
                if (status & STATUS_DONE) {
                    completion_success = true;
                    break;
                }
                
                if (status & STATUS_ERROR) {
                    last_error_ = "硬件加速器报告错误";
                    break;
                }
                
                // 小延时以减轻CPU负载
                usleep(1000);  // 1ms
                timeout_count++;
            }
            
            if (timeout_count >= max_timeout) {
                last_error_ = "等待操作完成超时";
            }
        }
        
        return completion_success;
    }
    
    // 通过DMA执行数据处理
    template<typename T>
    bool processDMA(const T* input_data, T* output_data, size_t bytes) {
        if (dma_buffer_size_ < bytes * 2) {
            last_error_ = "DMA缓冲区不足";
            return false;
        }
        
        // 复制输入数据到DMA缓冲区
        memcpy(dma_buffer_, input_data, bytes);
        
        // 配置DMA传输参数
        uint64_t src_addr = dma_buffer_phys_addr_;
        uint64_t dst_addr = dma_buffer_phys_addr_ + bytes;
        
        writeRegister(REG_SRC_ADDR_LO, src_addr & 0xFFFFFFFF);
        writeRegister(REG_SRC_ADDR_HI, (src_addr >> 32) & 0xFFFFFFFF);
        writeRegister(REG_DST_ADDR_LO, dst_addr & 0xFFFFFFFF);
        writeRegister(REG_DST_ADDR_HI, (dst_addr >> 32) & 0xFFFFFFFF);
        writeRegister(REG_SRC_SIZE, bytes);
        writeRegister(REG_DST_SIZE, bytes);
        
        // 启动加速器
        uint32_t ctrl_val = CTRL_START;
        if (use_interrupts_) {
            ctrl_val |= CTRL_INTR_ENABLE;
        }
        
        auto start_time = std::chrono::high_resolution_clock::now();
        writeRegister(REG_CONTROL, ctrl_val);
        
        // 等待完成
        if (!waitForCompletion()) {
            return false;
        }
        
        auto end_time = std::chrono::high_resolution_clock::now();
        last_processing_time_ = std::chrono::duration<double, std::milli>(
            end_time - start_time).count();
        
        // 读取执行时间计数器 (如果硬件支持)
        uint32_t hw_exec_time = readRegister(REG_EXEC_TIME);
        std::cout << "硬件计时器: " << hw_exec_time << " 周期" << std::endl;
        
        // 复制结果到输出缓冲区
        memcpy(output_data, static_cast<char*>(dma_buffer_) + bytes, bytes);
        
        return true;
    }
    
private:
    // 设备参数
    uint32_t base_addr_;
    std::string dma_device_path_;
    bool use_interrupts_;
    
    // 文件描述符和内存映射
    int mem_fd_;
    int dma_fd_;
    int uio_fd_;
    void* reg_map_;
    
    // DMA缓冲区
    void* dma_buffer_;
    size_t dma_buffer_size_;
    uint64_t dma_buffer_phys_addr_;
    
    // 状态
    double last_processing_time_;
    std::string last_error_;
    bool initialized_;
};

// HardwareAccelerator类方法的实现 - 委托给HardwareAcceleratorImpl

HardwareAccelerator::HardwareAccelerator(uint32_t base_addr, 
                                       const std::string& dma_device_path,
                                       bool use_interrupts)
    : impl(new HardwareAcceleratorImpl(base_addr, dma_device_path, use_interrupts)) {
}

HardwareAccelerator::~HardwareAccelerator() = default;

bool HardwareAccelerator::initialize() {
    return impl->initialize();
}

bool HardwareAccelerator::isReady() const {
    return impl->isReady();
}

void HardwareAccelerator::setOperationType(OperationType op_type) {
    impl->setOperationType(op_type);
}

void HardwareAccelerator::setParameter(uint32_t param_id, uint32_t value) {
    impl->setParameter(param_id, value);
}

bool HardwareAccelerator::processFloatData(const std::vector<float>& input_data, 
                                         std::vector<float>& output_data,
                                         size_t size) {
    return impl->processFloatData(input_data, output_data, size);
}

bool HardwareAccelerator::processIntData(const std::vector<int32_t>& input_data, 
                                       std::vector<int32_t>& output_data,
                                       size_t size) {
    return impl->processIntData(input_data, output_data, size);
}

bool HardwareAccelerator::matrixMultiply(const std::vector<float>& matrix_a,
                                       const std::vector<float>& matrix_b,
                                       std::vector<float>& matrix_c,
                                       uint32_t rows_a, uint32_t cols_a, uint32_t cols_b) {
    return impl->matrixMultiply(matrix_a, matrix_b, matrix_c, rows_a, cols_a, cols_b);
}

bool HardwareAccelerator::performFFT(const std::vector<float>& real_in,
                                   const std::vector<float>& imag_in,
                                   std::vector<float>& real_out,
                                   std::vector<float>& imag_out,
                                   uint32_t fft_size, bool inverse) {
    return impl->performFFT(real_in, imag_in, real_out, imag_out, fft_size, inverse);
}

double HardwareAccelerator::getLastProcessingTime() const {
    return impl->getLastProcessingTime();
}

std::string HardwareAccelerator::getVersionInfo() const {
    return impl->getVersionInfo();
}

void HardwareAccelerator::reset() {
    impl->reset();
}

cpp

/**
 * hw_accelerator_demo.cpp - 硬件加速器演示应用
 * 
 * 演示如何使用Zynq平台上的硬件加速器
 * 编译: g++ -std=c++11 -o hw_accelerator_demo hw_accelerator_demo.cpp hw_accelerator.cpp
 */

#include <iostream>
#include <vector>
#include <iomanip>
#include <chrono>
#include <random>
#include <cmath>
#include "hw_accelerator.hpp"

// 矩阵打印函数
void printMatrix(const std::vector<float>& matrix, int rows, int cols, const std::string& name) {
    std::cout << name << " (" << rows << "x" << cols << "):" << std::endl;
    
    // 仅打印一个预览(最多10x10)
    int print_rows = std::min(rows, 10);
    int print_cols = std::min(cols, 10);
    
    for (int i = 0; i < print_rows; i++) {
        for (int j = 0; j < print_cols; j++) {
            std::cout << std::setw(9) << std::fixed << std::setprecision(4) 
                      << matrix[i * cols + j] << " ";
        }
        
        if (print_cols < cols) {
            std::cout << "...";
        }
        
        std::cout << std::endl;
    }
    
    if (print_rows < rows) {
        std::cout << "..." << std::endl;
    }
    
    std::cout << std::endl;
}

// 软件矩阵乘法实现(用于比较)
std::vector<float> matrixMultiplySoftware(const std::vector<float>& matrix_a,
                                         const std::vector<float>& matrix_b,
                                         int rows_a, int cols_a, int cols_b) {
    std::vector<float> result(rows_a * cols_b, 0.0f);
    
    for (int i = 0; i < rows_a; i++) {
        for (int j = 0; j < cols_b; j++) {
            float sum = 0.0f;
            for (int k = 0; k < cols_a; k++) {
                sum += matrix_a[i * cols_a + k] * matrix_b[k * cols_b + j];
            }
            result[i * cols_b + j] = sum;
        }
    }
    
    return result;
}

// 软件FFT实现 (简化版)
void fftSoftware(const std::vector<float>& real_in,
                const std::vector<float>& imag_in,
                std::vector<float>& real_out,
                std::vector<float>& imag_out,
                int n, bool inverse) {
    
    // 只实现2的幂大小的FFT
    real_out.resize(n);
    imag_out.resize(n);
    
    // 基本情况
    if (n == 1) {
        real_out[0] = real_in[0];
        imag_out[0] = imag_in[0];
        return;
    }
    
    // 分治:奇偶分解
    std::vector<float> real_even(n/2), imag_even(n/2);
    std::vector<float> real_odd(n/2), imag_odd(n/2);
    
    for (int i = 0; i < n/2; i++) {
        real_even[i] = real_in[2*i];
        imag_even[i] = imag_in[2*i];
        real_odd[i] = real_in[2*i + 1];
        imag_odd[i] = imag_in[2*i + 1];
    }
    
    // 递归FFT
    std::vector<float> real_even_out(n/2), imag_even_out(n/2);
    std::vector<float> real_odd_out(n/2), imag_odd_out(n/2);
    
    fftSoftware(real_even, imag_even, real_even_out, imag_even_out, n/2, inverse);
    fftSoftware(real_odd, imag_odd, real_odd_out, imag_odd_out, n/2, inverse);
    
    // 合并结果
    double angle_dir = inverse ? 2.0 * M_PI / n : -2.0 * M_PI / n;
    
    for (int k = 0; k < n/2; k++) {
        double angle = angle_dir * k;
        double cos_val = cos(angle);
        double sin_val = sin(angle);
        
        // 旋转因子与奇数部分相乘
        double real_twiddle = real_odd_out[k] * cos_val - imag_odd_out[k] * sin_val;
        double imag_twiddle = real_odd_out[k] * sin_val + imag_odd_out[k] * cos_val;
        
        // 蝶形运算
        real_out[k] = real_even_out[k] + real_twiddle;
        imag_out[k] = imag_even_out[k] + imag_twiddle;
        
        real_out[k + n/2] = real_even_out[k] - real_twiddle;
        imag_out[k + n/2] = imag_even_out[k] - imag_twiddle;
    }
    
    // 逆变换需要缩放
    if (inverse) {
        for (int i = 0; i < n; i++) {
            real_out[i] /= 2.0;
            imag_out[i] /= 2.0;
        }
    }
}

// 比较两个矩阵的差异
bool compareMatrices(const std::vector<float>& matrix1,
                    const std::vector<float>& matrix2,
                    float tolerance = 1e-3) {
    if (matrix1.size() != matrix2.size()) {
        std::cout << "矩阵大小不同: " << matrix1.size() << " vs " << matrix2.size() << std::endl;
        return false;
    }
    
    int mismatch_count = 0;
    float max_diff = 0.0f;
    size_t max_diff_idx = 0;
    
    for (size_t i = 0; i < matrix1.size(); i++) {
        float diff = std::abs(matrix1[i] - matrix2[i]);
        if (diff > tolerance) {
            mismatch_count++;
            if (diff > max_diff) {
                max_diff = diff;
                max_diff_idx = i;
            }
        }
    }
    
    if (mismatch_count > 0) {
        std::cout << "发现 " << mismatch_count << " 个差异 (共 " << matrix1.size() << " 个元素)" << std::endl;
        std::cout << "最大差异: " << max_diff << " 在索引 " << max_diff_idx 
                 << " (值: " << matrix1[max_diff_idx] << " vs " << matrix2[max_diff_idx] << ")" << std::endl;
        return false;
    }
    
    return true;
}

int main() {
    try {
        std::cout << "Zynq硬件加速器演示" << std::endl;
        std::cout << "=====================" << std::endl << std::endl;
        
        // 初始化随机数生成器
        std::random_device rd;
        std::mt19937 gen(rd());
        std::uniform_real_distribution<float> dist(-10.0f, 10.0f);
        
        // 创建硬件加速器实例
        // 注意:基地址和DMA设备路径根据实际硬件设置调整
        HardwareAccelerator accelerator(0x43C00000, "/dev/xilinx_dma", true);
        
        // 初始化加速器
        if (!accelerator.initialize()) {
            std::cerr << "硬件加速器初始化失败,尝试仅使用软件模式" << std::endl;
        } else {
            std::cout << "硬件加速器已初始化" << std::endl;
            std::cout << "版本: " << accelerator.getVersionInfo() << std::endl << std::endl;
        }
        
        // 演示1:矩阵乘法
        std::cout << "1. 矩阵乘法测试" << std::endl;
        std::cout << "------------------" << std::endl;
        
        // 测试参数
        int rows_a = 128;
        int cols_a = 64;
        int cols_b = 128;
        
        // 创建测试矩阵
        std::vector<float> matrix_a(rows_a * cols_a);
        std::vector<float> matrix_b(cols_a * cols_b);
        
        // 填充随机数据
        for (auto& val : matrix_a) val = dist(gen);
        for (auto& val : matrix_b) val = dist(gen);
        
        // 软件实现
        auto start_sw = std::chrono::high_resolution_clock::now();
        std::vector<float> result_sw = matrixMultiplySoftware(matrix_a, matrix_b, rows_a, cols_a, cols_b);
        auto end_sw = std::chrono::high_resolution_clock::now();
        
        std::chrono::duration<double, std::milli> sw_time = end_sw - start_sw;
        std::cout << "软件实现耗时: " << sw_time.count() << " ms" << std::endl;
        
        // 硬件实现
        std::vector<float> result_hw;
        if (accelerator.isReady()) {
            auto start_hw = std::chrono::high_resolution_clock::now();
            bool success = accelerator.matrixMultiply(matrix_a, matrix_b, result_hw, rows_a, cols_a, cols_b);
            auto end_hw = std::chrono::high_resolution_clock::now();
            
            std::chrono::duration<double, std::milli> hw_time = end_hw - start_hw;
            std::cout << "硬件实现耗时: " << hw_time.count() << " ms" << std::endl;
            std::cout << "硬件加速器报告耗时: " << accelerator.getLastProcessingTime() << " ms" << std::endl;
            
            if (success) {
                std::cout << "加速比: " << sw_time.count() / hw_time.count() << "x" << std::endl;
                
                // 比较结果
                std::cout << "验证计算结果..." << std::endl;
                bool match = compareMatrices(result_sw, result_hw);
                if (match) {
                    std::cout << "计算结果匹配!" << std::endl;
                }
            } else {
                std::cout << "硬件加速失败,使用软件结果" << std::endl;
            }
        }
        
        // 打印矩阵预览
        printMatrix(matrix_a, rows_a, cols_a, "矩阵A");
        printMatrix(matrix_b, cols_a, cols_b, "矩阵B");
        printMatrix(result_sw, rows_a, cols_b, "结果");
        
        // 演示2:FFT变换
        std::cout << std::endl << "2. FFT变换测试" << std::endl;
        std::cout << "------------------" << std::endl;
        
        // FFT测试参数
        int fft_size = 1024;
        bool inverse = false;
        
        // 创建测试数据
        std::vector<float> real_in(fft_size);
        std::vector<float> imag_in(fft_size);
        
        // 生成测试信号 - 叠加几个不同频率的正弦波
        for (int i = 0; i < fft_size; i++) {
            double t = static_cast<double>(i) / fft_size;
            // 3个频率分量:100Hz, 200Hz, 350Hz
            real_in[i] = 0.5 * sin(2.0 * M_PI * 100.0 * t) + 
                         0.25 * sin(2.0 * M_PI * 200.0 * t) + 
                         0.125 * sin(2.0 * M_PI * 350.0 * t);
            imag_in[i] = 0.0f;  // 纯实信号
        }
        
        // 添加一些噪声
        std::normal_distribution<float> noise_dist(0.0f, 0.05f);
        for (auto& val : real_in) val += noise_dist(gen);
        
        // 软件FFT
        std::vector<float> real_out_sw(fft_size);
        std::vector<float> imag_out_sw(fft_size);
        
        auto start_fft_sw = std::chrono::high_resolution_clock::now();
        fftSoftware(real_in, imag_in, real_out_sw, imag_out_sw, fft_size, inverse);
        auto end_fft_sw = std::chrono::high_resolution_clock::now();
        
        std::chrono::duration<double, std::milli> fft_sw_time = end_fft_sw - start_fft_sw;
        std::cout << "软件FFT耗时: " << fft_sw_time.count() << " ms" << std::endl;
        
        // 硬件FFT
        std::vector<float> real_out_hw;
        std::vector<float> imag_out_hw;
        
        if (accelerator.isReady()) {
            auto start_fft_hw = std::chrono::high_resolution_clock::now();
            bool success = accelerator.performFFT(real_in, imag_in, real_out_hw, imag_out_hw, 
                                               fft_size, inverse);
            auto end_fft_hw = std::chrono::high_resolution_clock::now();
            
            std::chrono::duration<double, std::milli> fft_hw_time = end_fft_hw - start_fft_hw;
            std::cout << "硬件FFT耗时: " << fft_hw_time.count() << " ms" << std::endl;
            std::cout << "硬件加速器报告耗时: " << accelerator.getLastProcessingTime() << " ms" << std::endl;
            
            if (success) {
                std::cout << "加速比: " << fft_sw_time.count() / fft_hw_time.count() << "x" << std::endl;
                
                // 比较结果
                std::cout << "验证计算结果..." << std::endl;
                bool real_match = compareMatrices(real_out_sw, real_out_hw);
                bool imag_match = compareMatrices(imag_out_sw, imag_out_hw);
                if (real_match && imag_match) {
                    std::cout << "计算结果匹配!" << std::endl;
                }
            } else {
                std::cout << "硬件加速失败,使用软件结果" << std::endl;
            }
        }
        
        // 打印FFT结果预览 - 只显示前16个值和幅度
        std::cout << "FFT结果预览 (前16个点):" << std::endl;
        std::cout << "索引	实部		虚部		幅度" << std::endl;
        for (int i = 0; i < 16; i++) {
            float magnitude = sqrt(real_out_sw[i] * real_out_sw[i] + 
                                  imag_out_sw[i] * imag_out_sw[i]);
            std::cout << i << "	" << std::fixed << std::setprecision(4)
                      << real_out_sw[i] << "		" << imag_out_sw[i] << "		"
                      << magnitude << std::endl;
        }
        
        // 测试各种大小的数据
        std::cout << std::endl << "3. 性能缩放测试" << std::endl;
        std::cout << "------------------" << std::endl;
        
        // 测试不同大小的矩阵
        if (accelerator.isReady()) {
            std::cout << "矩阵乘法性能缩放:" << std::endl;
            std::cout << "大小	软件时间(ms)	硬件时间(ms)	加速比" << std::endl;
            
            int sizes[] = {32, 64, 128, 256, 512};
            
            for (int size : sizes) {
                // 创建方阵
                std::vector<float> mat_a(size * size);
                std::vector<float> mat_b(size * size);
                std::vector<float> result_sw_scaled, result_hw_scaled;
                
                // 填充随机数据
                for (auto& val : mat_a) val = dist(gen);
                for (auto& val : mat_b) val = dist(gen);
                
                // 软件计算
                auto start_sw = std::chrono::high_resolution_clock::now();
                result_sw_scaled = matrixMultiplySoftware(mat_a, mat_b, size, size, size);
                auto end_sw = std::chrono::high_resolution_clock::now();
                std::chrono::duration<double, std::milli> sw_time = end_sw - start_sw;
                
                // 硬件计算
                auto start_hw = std::chrono::high_resolution_clock::now();
                bool success = accelerator.matrixMultiply(mat_a, mat_b, result_hw_scaled, 
                                                       size, size, size);
                auto end_hw = std::chrono::high_resolution_clock::now();
                std::chrono::duration<double, std::milli> hw_time = end_hw - start_hw;
                
                // 输出结果
                if (success) {
                    double speedup = sw_time.count() / hw_time.count();
                    std::cout << size << "	" << std::fixed << std::setprecision(2)
                              << sw_time.count() << "		" << hw_time.count() 
                              << "		" << speedup << "x" << std::endl;
                    
                    // 验证结果
                    bool match = compareMatrices(result_sw_scaled, result_hw_scaled);
                    if (!match) {
                        std::cout << "  警告:大小 " << size << " 的结果不匹配" << std::endl;
                    }
                } else {
                    std::cout << size << "	" << sw_time.count() << "		失败		N/A" << std::endl;
                }
            }
        }
        
        std::cout << std::endl << "测试完成!" << std::endl;
        
    } catch (const HardwareAcceleratorException& e) {
        std::cerr << "硬件加速器错误: " << e.what() << std::endl;
        return 1;
    } catch (const std::exception& e) {
        std::cerr << "程序错误: " << e.what() << std::endl;
        return 1;
    }
    
    return 0;
}

6.2 实时视频处理系统

cpp

/**
 * video_processor.hpp - 基于Zynq的实时视频处理系统
 * 
 * 提供视频捕获、处理和显示功能,利用PL硬件加速器
 */

#ifndef VIDEO_PROCESSOR_HPP
#define VIDEO_PROCESSOR_HPP

#include <string>
#include <vector>
#include <functional>
#include <thread>
#include <mutex>
#include <atomic>
#include <condition_variable>
#include <queue>
#include <memory>

// 前置声明
class VideoCaptureImpl;
class VideoDisplayImpl;
class VideoProcessorImpl;

// 帧格式枚举
enum class PixelFormat {
    RGB24,
    RGBA32,
    YUV422,
    YUV420,
    GRAY8
};

// 视频帧结构
struct VideoFrame {
    std::vector<uint8_t> data;
    int width;
    int height;
    PixelFormat format;
    int64_t timestamp;
};

using FrameProcessor = std::function<bool(VideoFrame&)>;

/**
 * 视频捕获类
 */
class VideoCapture {
public:
    /**
     * 构造函数
     * 
     * @param device_path 设备路径 (例如 "/dev/video0")
     * @param width 视频宽度
     * @param height 视频高度
     * @param format 像素格式
     * @param fps 帧率
     */
    VideoCapture(const std::string& device_path, 
                int width, 
                int height, 
                PixelFormat format = PixelFormat::YUV422, 
                int fps = 30);
    
    /**
     * 析构函数
     */
    ~VideoCapture();
    
    /**
     * 开始视频捕获
     * 
     * @return 是否成功
     */
    bool start();
    
    /**
     * 停止视频捕获
     */
    void stop();
    
    /**
     * 读取一帧
     * 
     * @param frame 输出帧
     * @param timeout_ms 超时(毫秒)
     * @return 是否成功
     */
    bool read(VideoFrame& frame, int timeout_ms = 1000);
    
    /**
     * 获取设备信息
     * 
     * @return 设备信息字符串
     */
    std::string getDeviceInfo() const;
    
    /**
     * 获取当前FPS
     * 
     * @return 当前帧率
     */
    float getCurrentFPS() const;
    
private:
    std::unique_ptr<VideoCaptureImpl> impl_;
};

/**
 * 视频显示类
 */
class VideoDisplay {
public:
    /**
     * 构造函数
     * 
     * @param display_name 显示名称 (例如 "Video Display")
     * @param width 视频宽度
     * @param height 视频高度
     * @param format 像素格式
     */
    VideoDisplay(const std::string& display_name, 
                int width, 
                int height, 
                PixelFormat format = PixelFormat::RGB24);
    
    /**
     * 析构函数
     */
    ~VideoDisplay();
    
    /**
     * 初始化显示
     * 
     * @return 是否成功
     */
    bool initialize();
    
    /**
     * 显示一帧
     * 
     * @param frame 要显示的帧
     * @return 是否成功
     */
    bool display(const VideoFrame& frame);
    
    /**
     * 关闭显示
     */
    void close();
    
    /**
     * 设置窗口位置
     * 
     * @param x X坐标
     * @param y Y坐标
     */
    void setPosition(int x, int y);
    
    /**
     * 设置全屏模式
     * 
     * @param fullscreen 是否全屏
     */
    void setFullscreen(bool fullscreen);
    
private:
    std::unique_ptr<VideoDisplayImpl> impl_;
};

/**
 * 视频处理器类
 */
class VideoProcessor {
public:
    /**
     * 构造函数
     * 
     * @param use_hardware_accel 是否使用硬件加速
     */
    VideoProcessor(bool use_hardware_accel = true);
    
    /**
     * 析构函数
     */
    ~VideoProcessor();
    
    /**
     * 初始化处理器
     * 
     * @return 是否成功
     */
    bool initialize();
    
    /**
     * 添加帧处理器
     * 
     * @param processor 处理器函数
     * @param name 处理器名称(可选)
     * @return 处理器ID
     */
    int addProcessor(FrameProcessor processor, const std::string& name = "");
    
    /**
     * 移除帧处理器
     * 
     * @param processor_id 处理器ID
     * @return 是否成功
     */
    bool removeProcessor(int processor_id);
    
    /**
     * 处理帧
     * 
     * @param input_frame 输入帧
     * @param output_frame 输出帧
     * @return 是否成功
     */
    bool processFrame(const VideoFrame& input_frame, VideoFrame& output_frame);
    
    /**
     * 启动处理线程
     * 
     * @param capture 捕获对象
     * @param display 显示对象
     * @return 是否成功
     */
    bool startProcessingThread(VideoCapture& capture, VideoDisplay& display);
    
    /**
     * 停止处理线程
     */
    void stopProcessingThread();
    
    /**
     * 获取处理FPS
     * 
     * @return 当前处理帧率
     */
    float getProcessingFPS() const;
    
    /**
     * 获取最后错误
     * 
     * @return 错误消息
     */
    std::string getLastError() const;
    
private:
    std::unique_ptr<VideoProcessorImpl> impl_;
};

/**
 * 硬件加速的视频特效
 * 以下是一些通过硬件加速实现的视频特效函数
 */
namespace VideoEffects {
    /**
     * 调整亮度对比度
     * 
     * @param brightness 亮度调整 (-1.0 到 1.0)
     * @param contrast 对比度调整 (0.0 到 2.0)
     * @return 处理函数
     */
    FrameProcessor adjustBrightnessContrast(float brightness, float contrast);
    
    /**
     * 应用颜色滤镜
     * 
     * @param red_scale 红色缩放因子
     * @param green_scale 绿色缩放因子
     * @param blue_scale 蓝色缩放因子
     * @return 处理函数
     */
    FrameProcessor colorFilter(float red_scale, float green_scale, float blue_scale);
    
    /**
     * 边缘检测
     * 
     * @param threshold 阈值(0-255)
     * @return 处理函数
     */
    FrameProcessor edgeDetection(int threshold = 50);
    
    /**
     * 高斯模糊
     * 
     * @param kernel_size 内核大小 (3, 5, 7)
     * @param sigma 标准差
     * @return 处理函数
     */
    FrameProcessor gaussianBlur(int kernel_size = 3, float sigma = 1.0f);
    
    /**
     * 缩放
     * 
     * @param scale_factor 缩放因子
     * @return 处理函数
     */
    FrameProcessor resize(float scale_factor);
    
    /**
     * 旋转
     * 
     * @param angle 旋转角度(度)
     * @return 处理函数
     */
    FrameProcessor rotate(float angle);
    
    /**
     * 图像二值化
     * 
     * @param threshold 阈值(0-255)
     * @return 处理函数
     */
    FrameProcessor threshold(int threshold = 128);
    
    /**
     * 自定义硬件加速特效
     * 
     * @param effect_id 特效ID
     * @param params 特效参数数组
     * @return 处理函数
     */
    FrameProcessor customHardwareEffect(int effect_id, const std::vector<float>& params);
}

#endif // VIDEO_PROCESSOR_HPP

cpp

/**
 * video_processor.cpp - 基于Zynq的实时视频处理系统实现
 */

#include "video_processor.hpp"
#include <iostream>
#include <chrono>
#include <cstring>
#include <cmath>
#include <algorithm>
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <linux/videodev2.h>
#include <linux/fb.h>

// X11相关头文件
#include <X11/Xlib.h>
#include <X11/Xutil.h>

// 硬件加速器接口
#include "hw_accelerator.hpp"

// 用于调试输出
#define DEBUG_PRINT(msg) std::cerr << "[DEBUG] " << msg << std::endl

// 用于错误输出
#define ERROR_PRINT(msg) std::cerr << "[ERROR] " << msg << std::endl

// 视频捕获实现类
class VideoCaptureImpl {
public:
    VideoCaptureImpl(const std::string& device_path, 
                   int width, 
                   int height, 
                   PixelFormat format, 
                   int fps)
        : device_path_(device_path),
          width_(width),
          height_(height),
          format_(format),
          fps_(fps),
          fd_(-1),
          buffers_(nullptr),
          buffer_count_(0),
          is_capturing_(false),
          frame_count_(0),
          last_fps_time_(std::chrono::steady_clock::now()),
          current_fps_(0.0f) {
    }
    
    ~VideoCaptureImpl() {
        if (is_capturing_) {
            stop();
        }
        
        if (fd_ >= 0) {
            close(fd_);
            fd_ = -1;
        }
    }
    
    bool start() {
        // 打开设备
        fd_ = open(device_path_.c_str(), O_RDWR);
        if (fd_ < 0) {
            last_error_ = "无法打开视频设备: " + device_path_ + " - " + strerror(errno);
            ERROR_PRINT(last_error_);
            return false;
        }
        
        // 查询设备能力
        struct v4l2_capability cap;
        if (xioctl(fd_, VIDIOC_QUERYCAP, &cap) < 0) {
            last_error_ = "查询设备能力失败: " + std::string(strerror(errno));
            ERROR_PRINT(last_error_);
            close(fd_);
            fd_ = -1;
            return false;
        }
        
        // 检查是否是视频捕获设备
        if (!(cap.capabilities & V4L2_CAP_VIDEO_CAPTURE)) {
            last_error_ = device_path_ + " 不是视频捕获设备";
            ERROR_PRINT(last_error_);
            close(fd_);
            fd_ = -1;
            return false;
        }
        
        // 检查是否支持流IO
        if (!(cap.capabilities & V4L2_CAP_STREAMING)) {
            last_error_ = device_path_ + " 不支持流式IO";
            ERROR_PRINT(last_error_);
            close(fd_);
            fd_ = -1;
            return false;
        }
        
        // 设置视频格式
        struct v4l2_format fmt;
        memset(&fmt, 0, sizeof(fmt));
        fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        fmt.fmt.pix.width = width_;
        fmt.fmt.pix.height = height_;
        
        // 根据指定的像素格式设置
        switch (format_) {
            case PixelFormat::RGB24:
                fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_RGB24;
                break;
            case PixelFormat::RGBA32:
                fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_RGB32;
                break;
            case PixelFormat::YUV422:
                fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV;
                break;
            case PixelFormat::YUV420:
                fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_YUV420;
                break;
            case PixelFormat::GRAY8:
                fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_GREY;
                break;
            default:
                fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV;
                break;
        }
        
        fmt.fmt.pix.field = V4L2_FIELD_ANY;
        
        if (xioctl(fd_, VIDIOC_S_FMT, &fmt) < 0) {
            last_error_ = "设置视频格式失败: " + std::string(strerror(errno));
            ERROR_PRINT(last_error_);
            close(fd_);
            fd_ = -1;
            return false;
        }
        
        // 检查实际得到的格式
        if (fmt.fmt.pix.width != width_ || fmt.fmt.pix.height != height_) {
            DEBUG_PRINT("警告:请求的分辨率 " << width_ << "x" << height_ 
                       << " 与实际得到的 " << fmt.fmt.pix.width << "x" 
                       << fmt.fmt.pix.height << " 不一致");
            width_ = fmt.fmt.pix.width;
            height_ = fmt.fmt.pix.height;
        }
        
        // 设置帧率
        struct v4l2_streamparm parm;
        memset(&parm, 0, sizeof(parm));
        parm.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        
        if (xioctl(fd_, VIDIOC_G_PARM, &parm) >= 0) {
            if (parm.parm.capture.capability & V4L2_CAP_TIMEPERFRAME) {
                parm.parm.capture.timeperframe.numerator = 1;
                parm.parm.capture.timeperframe.denominator = fps_;
                
                if (xioctl(fd_, VIDIOC_S_PARM, &parm) < 0) {
                    DEBUG_PRINT("警告:设置帧率失败: " << strerror(errno));
                }
                
                // 检查实际得到的帧率
                fps_ = parm.parm.capture.timeperframe.denominator / 
                       parm.parm.capture.timeperframe.numerator;
                
                DEBUG_PRINT("实际帧率: " << fps_ << " fps");
            }
        }
        
        // 请求缓冲区
        struct v4l2_requestbuffers req;
        memset(&req, 0, sizeof(req));
        req.count = 4;  // 请求4个缓冲区
        req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        req.memory = V4L2_MEMORY_MMAP;
        
        if (xioctl(fd_, VIDIOC_REQBUFS, &req) < 0) {
            last_error_ = "请求缓冲区失败: " + std::string(strerror(errno));
            ERROR_PRINT(last_error_);
            close(fd_);
            fd_ = -1;
            return false;
        }
        
        // 检查我们至少得到了一个缓冲区
        if (req.count < 1) {
            last_error_ = "没有足够的视频缓冲区";
            ERROR_PRINT(last_error_);
            close(fd_);
            fd_ = -1;
            return false;
        }
        
        // 分配缓冲区结构
        buffers_ = new buffer[req.count];
        buffer_count_ = req.count;
        
        // 内存映射缓冲区
        for (unsigned int i = 0; i < buffer_count_; i++) {
            struct v4l2_buffer buf;
            memset(&buf, 0, sizeof(buf));
            buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
            buf.memory = V4L2_MEMORY_MMAP;
            buf.index = i;
            
            if (xioctl(fd_, VIDIOC_QUERYBUF, &buf) < 0) {
                last_error_ = "查询缓冲区失败: " + std::string(strerror(errno));
                ERROR_PRINT(last_error_);
                cleanup_buffers();
                close(fd_);
                fd_ = -1;
                return false;
            }
            
            buffers_[i].length = buf.length;
            buffers_[i].start = mmap(NULL, buf.length, PROT_READ | PROT_WRITE,
                                   MAP_SHARED, fd_, buf.m.offset);
            
            if (buffers_[i].start == MAP_FAILED) {
                last_error_ = "内存映射失败: " + std::string(strerror(errno));
                ERROR_PRINT(last_error_);
                cleanup_buffers();
                close(fd_);
                fd_ = -1;
                return false;
            }
        }
        
        // 将所有缓冲区排队
        for (unsigned int i = 0; i < buffer_count_; i++) {
            struct v4l2_buffer buf;
            memset(&buf, 0, sizeof(buf));
            buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
            buf.memory = V4L2_MEMORY_MMAP;
            buf.index = i;
            
            if (xioctl(fd_, VIDIOC_QBUF, &buf) < 0) {
                last_error_ = "排队缓冲区失败: " + std::string(strerror(errno));
                ERROR_PRINT(last_error_);
                cleanup_buffers();
                close(fd_);
                fd_ = -1;
                return false;
            }
        }
        
        // 开始视频流
        enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        if (xioctl(fd_, VIDIOC_STREAMON, &type) < 0) {
            last_error_ = "启动视频流失败: " + std::string(strerror(errno));
            ERROR_PRINT(last_error_);
            cleanup_buffers();
            close(fd_);
            fd_ = -1;
            return false;
        }
        
        is_capturing_ = true;
        frame_count_ = 0;
        last_fps_time_ = std::chrono::steady_clock::now();
        current_fps_ = 0.0f;
        
        DEBUG_PRINT("视频捕获已启动: " << width_ << "x" << height_ << " @ " << fps_ << "fps");
        return true;
    }
    
    void stop() {
        if (!is_capturing_) return;
        
        // 停止视频流
        enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        if (xioctl(fd_, VIDIOC_STREAMOFF, &type) < 0) {
            ERROR_PRINT("停止视频流失败: " << strerror(errno));
        }
        
        // 清理缓冲区
        cleanup_buffers();
        
        is_capturing_ = false;
        DEBUG_PRINT("视频捕获已停止");
    }
    
    bool read(VideoFrame& frame, int timeout_ms) {
        if (!is_capturing_ || fd_ < 0) {
            last_error_ = "视频捕获未启动";
            return false;
        }
        
        // 设置select超时
        fd_set fds;
        struct timeval tv;
        
        FD_ZERO(&fds);
        FD_SET(fd_, &fds);
        
        tv.tv_sec = timeout_ms / 1000;
        tv.tv_usec = (timeout_ms % 1000) * 1000;
        
        // 等待数据可读
        int ret = select(fd_ + 1, &fds, NULL, NULL, &tv);
        
        if (ret == -1) {
            last_error_ = "select失败: " + std::string(strerror(errno));
            ERROR_PRINT(last_error_);
            return false;
        }
        
        if (ret == 0) {
            last_error_ = "读取超时";
            return false;
        }
        
        // 获取帧
        struct v4l2_buffer buf;
        memset(&buf, 0, sizeof(buf));
        buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        buf.memory = V4L2_MEMORY_MMAP;
        
        if (xioctl(fd_, VIDIOC_DQBUF, &buf) < 0) {
            if (errno == EAGAIN) {
                return false;  // 尝试再次读取
            } else {
                last_error_ = "出队缓冲区失败: " + std::string(strerror(errno));
                ERROR_PRINT(last_error_);
                return false;
            }
        }
        
        // 填充帧信息
        frame.width = width_;
        frame.height = height_;
        frame.format = format_;
        frame.timestamp = buf.timestamp.tv_sec * 1000000LL + buf.timestamp.tv_usec;
        
        // 复制数据
        frame.data.resize(buf.bytesused);
        memcpy(frame.data.data(), buffers_[buf.index].start, buf.bytesused);
        
        // 将缓冲区重新入队
        if (xioctl(fd_, VIDIOC_QBUF, &buf) < 0) {
            last_error_ = "重新排队缓冲区失败: " + std::string(strerror(errno));
            ERROR_PRINT(last_error_);
            return false;
        }
        
        // 更新FPS计数
        frame_count_++;
        auto now = std::chrono::steady_clock::now();
        auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
            now - last_fps_time_).count();
        
        if (elapsed >= 1000) {  // 每秒更新一次
            current_fps_ = 1000.0f * frame_count_ / elapsed;
            frame_count_ = 0;
            last_fps_time_ = now;
        }
        
        return true;
    }
    
    std::string getDeviceInfo() const {
        if (fd_ < 0) {
            return "设备未打开";
        }
        
        struct v4l2_capability cap;
        if (xioctl(fd_, VIDIOC_QUERYCAP, &cap) < 0) {
            return "无法查询设备信息";
        }
        
        std::string info;
        info += "设备: " + device_path_ + "
";
        info += "驱动: " + std::string(reinterpret_cast<const char*>(cap.driver)) + "
";
        info += "卡: " + std::string(reinterpret_cast<const char*>(cap.card)) + "
";
        info += "总线信息: " + std::string(reinterpret_cast<const char*>(cap.bus_info)) + "
";
        info += "分辨率: " + std::to_string(width_) + "x" + std::to_string(height_) + "
";
        info += "帧率: " + std::to_string(fps_) + " fps
";
        info += "当前帧率: " + std::to_string(current_fps_) + " fps
";
        
        return info;
    }
    
    float getCurrentFPS() const {
        return current_fps_;
    }
    
    const std::string& getLastError() const {
        return last_error_;
    }
    
private:
    // 内部缓冲区结构
    struct buffer {
        void* start;
        size_t length;
    };
    
    // 清理内存映射的缓冲区
    void cleanup_buffers() {
        if (buffers_) {
            for (unsigned int i = 0; i < buffer_count_; i++) {
                if (buffers_[i].start != MAP_FAILED && buffers_[i].start != nullptr) {
                    munmap(buffers_[i].start, buffers_[i].length);
                    buffers_[i].start = nullptr;
                }
            }
            
            delete[] buffers_;
            buffers_ = nullptr;
        }
        
        buffer_count_ = 0;
    }
    
    // 包装ioctl调用,处理EINTR
    int xioctl(int fd, int request, void* arg) const {
        int r;
        do {
            r = ioctl(fd, request, arg);
        } while (r == -1 && errno == EINTR);
        return r;
    }
    
private:
    std::string device_path_;
    int width_;
    int height_;
    PixelFormat format_;
    int fps_;
    int fd_;
    buffer* buffers_;
    unsigned int buffer_count_;
    bool is_capturing_;
    std::string last_error_;
    
    // FPS计算
    int frame_count_;
    std::chrono::steady_clock::time_point last_fps_time_;
    float current_fps_;
};

// 视频显示实现类
class VideoDisplayImpl {
public:
    VideoDisplayImpl(const std::string& display_name, 
                   int width, 
                   int height, 
                   PixelFormat format)
        : display_name_(display_name),
          width_(width),
          height_(height),
          format_(format),
          display_(nullptr),
          window_(0),
          gc_(0),
          image_(nullptr),
          x_(0),
          y_(0),
          fullscreen_(false),
          initialized_(false) {
    }
    
    ~VideoDisplayImpl() {
        close();
    }
    
    bool initialize() {
        // 打开X11显示连接
        display_ = XOpenDisplay(nullptr);
        if (!display_) {
            last_error_ = "无法连接到X11显示";
            ERROR_PRINT(last_error_);
            return false;
        }
        
        // 获取默认屏幕和根窗口
        int screen = DefaultScreen(display_);
        Window root = RootWindow(display_, screen);
        
        // 创建窗口
        window_ = XCreateSimpleWindow(display_, root, x_, y_, width_, height_, 
                                    1, BlackPixel(display_, screen), 
                                    WhitePixel(display_, screen));
        
        // 设置窗口标题
        XStoreName(display_, window_, display_name_.c_str());
        
        // 选择输入事件
        XSelectInput(display_, window_, StructureNotifyMask | ExposureMask | 
                                       KeyPressMask);
        
        // 创建GC
        gc_ = XCreateGC(display_, window_, 0, nullptr);
        
        // 创建XImage
        Visual* visual = DefaultVisual(display_, screen);
        int depth = DefaultDepth(display_, screen);
        
        // 创建共享内存XImage (根据像素格式确定每个像素的位大小)
        int bits_per_pixel;
        
        switch (format_) {
            case PixelFormat::RGB24:
                bits_per_pixel = 24;
                break;
            case PixelFormat::RGBA32:
                bits_per_pixel = 32;
                break;
            default:
                // 对于其他格式,将转换为RGB24
                bits_per_pixel = 24;
                break;
        }
        
        image_ = XCreateImage(display_, visual, depth, ZPixmap, 0, nullptr,
                            width_, height_, 32, 0);
        
        if (!image_) {
            last_error_ = "无法创建XImage";
            ERROR_PRINT(last_error_);
            XFreeGC(display_, gc_);
            XDestroyWindow(display_, window_);
            XCloseDisplay(display_);
            display_ = nullptr;
            return false;
        }
        
        // 分配图像数据
        image_->data = (char*)malloc(height_ * image_->bytes_per_line);
        if (!image_->data) {
            last_error_ = "无法分配图像内存";
            ERROR_PRINT(last_error_);
            XDestroyImage(image_);
            image_ = nullptr;
            XFreeGC(display_, gc_);
            XDestroyWindow(display_, window_);
            XCloseDisplay(display_);
            display_ = nullptr;
            return false;
        }
        
        // 映射窗口
        XMapWindow(display_, window_);
        
        // 等待窗口可见
        XEvent event;
        do {
            XNextEvent(display_, &event);
        } while (event.type != MapNotify || event.xmap.window != window_);
        
        // 设置全屏模式(如果需要)
        if (fullscreen_) {
            setFullscreen(true);
        }
        
        initialized_ = true;
        DEBUG_PRINT("视频显示已初始化: " << width_ << "x" << height_);
        return true;
    }
    
    bool display(const VideoFrame& frame) {
        if (!initialized_) {
            last_error_ = "显示未初始化";
            return false;
        }
        
        if (frame.width != width_ || frame.height != height_) {
            last_error_ = "帧尺寸与显示尺寸不匹配";
            return false;
        }
        
        // 将帧数据转换为RGB格式
        if (frame.format == PixelFormat::RGB24 || frame.format == PixelFormat::RGBA32) {
            // 直接复制数据
            memcpy(image_->data, frame.data.data(), frame.data.size());
        } else {
            // 需要格式转换
            convertToRGB(frame);
        }
        
        // 将图像绘制到窗口
        XPutImage(display_, window_, gc_, image_, 0, 0, 0, 0, width_, height_);
        XFlush(display_);
        
        // 处理X事件
        while (XPending(display_)) {
            XEvent event;
            XNextEvent(display_, &event);
            
            // 处理按键事件
            if (event.type == KeyPress) {
                KeySym key = XLookupKeysym(&event.xkey, 0);
                
                // 按下ESC键退出全屏
                if (key == XK_Escape && fullscreen_) {
                    setFullscreen(false);
                }
                
                // 按下F键切换全屏
                if (key == XK_f || key == XK_F) {
                    setFullscreen(!fullscreen_);
                }
            }
        }
        
        return true;
    }
    
    void close() {
        if (initialized_) {
            if (image_) {
                XDestroyImage(image_);
                image_ = nullptr;
            }
            
            if (display_) {
                XFreeGC(display_, gc_);
                XDestroyWindow(display_, window_);
                XCloseDisplay(display_);
                display_ = nullptr;
            }
            
            initialized_ = false;
        }
    }
    
    void setPosition(int x, int y) {
        x_ = x;
        y_ = y;
        
        if (initialized_) {
            XMoveWindow(display_, window_, x_, y_);
            XFlush(display_);
        }
    }
    
    void setFullscreen(bool fullscreen) {
        fullscreen_ = fullscreen;
        
        if (!initialized_) return;
        
        Atom wm_state = XInternAtom(display_, "_NET_WM_STATE", False);
        Atom fullscreen_atom = XInternAtom(display_, "_NET_WM_STATE_FULLSCREEN", False);
        
        XEvent event;
        memset(&event, 0, sizeof(event));
        
        event.type = ClientMessage;
        event.xclient.window = window_;
        event.xclient.message_type = wm_state;
        event.xclient.format = 32;
        event.xclient.data.l[0] = fullscreen_ ? 1 : 0;  // 1=add, 0=remove
        event.xclient.data.l[1] = fullscreen_atom;
        event.xclient.data.l[2] = 0;
        
        XSendEvent(display_, RootWindow(display_, DefaultScreen(display_)),
                  False, SubstructureNotifyMask | SubstructureRedirectMask, &event);
        
        XFlush(display_);
    }
    
    const std::string& getLastError() const {
        return last_error_;
    }
    
private:
    // 将不同格式的帧转换为RGB
    void convertToRGB(const VideoFrame& frame) {
        switch (frame.format) {
            case PixelFormat::YUV422:
                convertYUYVToRGB(frame);
                break;
            case PixelFormat::YUV420:
                convertYUV420ToRGB(frame);
                break;
            case PixelFormat::GRAY8:
                convertGrayToRGB(frame);
                break;
            default:
                // 未知格式,填充黑色
                memset(image_->data, 0, height_ * image_->bytes_per_line);
                break;
        }
    }
    
    // YUV422 (YUYV) 转 RGB
    void convertYUYVToRGB(const VideoFrame& frame) {
        const uint8_t* yuyv = frame.data.data();
        uint8_t* rgb = reinterpret_cast<uint8_t*>(image_->data);
        
        for (int i = 0; i < height_; i++) {
            for (int j = 0; j < width_; j += 2) {
                int index = (i * width_ + j) * 2;
                
                uint8_t y0 = yuyv[index + 0];
                uint8_t u  = yuyv[index + 1];
                uint8_t y1 = yuyv[index + 2];
                uint8_t v  = yuyv[index + 3];
                
                // 第一个像素
                int rgb_index = (i * width_ + j) * 3;
                yuv2rgb(y0, u, v, rgb + rgb_index);
                
                // 第二个像素
                rgb_index += 3;
                yuv2rgb(y1, u, v, rgb + rgb_index);
            }
        }
    }
    
    // YUV420 转 RGB
    void convertYUV420ToRGB(const VideoFrame& frame) {
        const uint8_t* yuv = frame.data.data();
        uint8_t* rgb = reinterpret_cast<uint8_t*>(image_->data);
        
        int uv_offset = width_ * height_;
        
        for (int i = 0; i < height_; i++) {
            for (int j = 0; j < width_; j++) {
                int y_index = i * width_ + j;
                int u_index = uv_offset + (i/2) * (width_/2) + (j/2);
                int v_index = u_index + (width_ * height_) / 4;
                
                uint8_t y = yuv[y_index];
                uint8_t u = yuv[u_index];
                uint8_t v = yuv[v_index];
                
                int rgb_index = (i * width_ + j) * 3;
                yuv2rgb(y, u, v, rgb + rgb_index);
            }
        }
    }
    
    // 灰度转RGB
    void convertGrayToRGB(const VideoFrame& frame) {
        const uint8_t* gray = frame.data.data();
        uint8_t* rgb = reinterpret_cast<uint8_t*>(image_->data);
        
        for (int i = 0; i < height_; i++) {
            for (int j = 0; j < width_; j++) {
                uint8_t g = gray[i * width_ + j];
                int rgb_index = (i * width_ + j) * 3;
                
                rgb[rgb_index + 0] = g;
                rgb[rgb_index + 1] = g;
                rgb[rgb_index + 2] = g;
            }
        }
    }
    
    // YUV转RGB辅助函数
    void yuv2rgb(uint8_t y, uint8_t u, uint8_t v, uint8_t* rgb) {
        // YUV到RGB的转换公式
        int c = y - 16;
        int d = u - 128;
        int e = v - 128;
        
        int r = (298 * c + 409 * e + 128) >> 8;
        int g = (298 * c - 100 * d - 208 * e + 128) >> 8;
        int b = (298 * c + 516 * d + 128) >> 8;
        
        // 限制在[0, 255]范围内
        rgb[0] = (uint8_t)(std::max(0, std::min(255, r)));
        rgb[1] = (uint8_t)(std::max(0, std::min(255, g)));
        rgb[2] = (uint8_t)(std::max(0, std::min(255, b)));
    }
    
private:
    std::string display_name_;
    int width_;
    int height_;
    PixelFormat format_;
    Display* display_;
    Window window_;
    GC gc_;
    XImage* image_;
    int x_;
    int y_;
    bool fullscreen_;
    bool initialized_;
    std::string last_error_;
};

// 视频处理器实现类
class VideoProcessorImpl {
public:
    VideoProcessorImpl(bool use_hardware_accel)
        : use_hardware_accel_(use_hardware_accel),
          next_processor_id_(0),
          processing_thread_running_(false),
          current_fps_(0.0f) {
        
        // 如果使用硬件加速,初始化加速器
        if (use_hardware_accel_) {
            try {
                // 注意:地址和DMA设备路径应根据实际硬件设置调整
                hw_accelerator_ = std::make_unique<HardwareAccelerator>(0x43C00000, "/dev/xilinx_dma", true);
                
                if (!hw_accelerator_->initialize()) {
                    use_hardware_accel_ = false;
                    hw_accelerator_.reset();
                    last_error_ = "硬件加速器初始化失败,回退到软件实现";
                    ERROR_PRINT(last_error_);
                } else {
                    DEBUG_PRINT("硬件加速器已初始化,版本: " + hw_accelerator_->getVersionInfo());
                }
            } catch (const std::exception& e) {
                use_hardware_accel_ = false;
                last_error_ = "硬件加速器错误: " + std::string(e.what()) + ",回退到软件实现";
                ERROR_PRINT(last_error_);
            }
        }
    }
    
    ~VideoProcessorImpl() {
        stopProcessingThread();
    }
    
    bool initialize() {
        // 如果已经有硬件加速器,则认为已初始化
        if (use_hardware_accel_ && hw_accelerator_) {
            return true;
        }
        
        // 否则尝试重新初始化
        if (use_hardware_accel_) {
            try {
                hw_accelerator_ = std::make_unique<HardwareAccelerator>(0x43C00000, "/dev/xilinx_dma", true);
                
                if (!hw_accelerator_->initialize()) {
                    use_hardware_accel_ = false;
                    hw_accelerator_.reset();
                    last_error_ = "硬件加速器初始化失败,回退到软件实现";
                    ERROR_PRINT(last_error_);
                    return false;
                }
                
                DEBUG_PRINT("硬件加速器已初始化,版本: " + hw_accelerator_->getVersionInfo());
                return true;
            } catch (const std::exception& e) {
                use_hardware_accel_ = false;
                last_error_ = "硬件加速器错误: " + std::string(e.what()) + ",回退到软件实现";
                ERROR_PRINT(last_error_);
                return false;
            }
        }
        
        // 软件模式不需要特殊初始化
        return true;
    }
    
    int addProcessor(FrameProcessor processor, const std::string& name) {
        if (!processor) {
            last_error_ = "无效的处理器函数";
            return -1;
        }
        
        std::lock_guard<std::mutex> lock(processors_mutex_);
        int id = next_processor_id_++;
        
        ProcessorInfo info;
        info.id = id;
        info.name = name.empty() ? "处理器_" + std::to_string(id) : name;
        info.processor = processor;
        
        processors_.push_back(info);
        return id;
    }
    
    bool removeProcessor(int processor_id) {
        std::lock_guard<std::mutex> lock(processors_mutex_);
        
        for (auto it = processors_.begin(); it != processors_.end(); ++it) {
            if (it->id == processor_id) {
                processors_.erase(it);
                return true;
            }
        }
        
        last_error_ = "未找到处理器ID: " + std::to_string(processor_id);
        return false;
    }
    
    bool processFrame(const VideoFrame& input_frame, VideoFrame& output_frame) {
        // 准备输出帧
        output_frame = input_frame;  // 初始复制输入帧
        
        // 计时
        auto start_time = std::chrono::high_resolution_clock::now();
        
        // 依次应用所有处理器
        std::lock_guard<std::mutex> lock(processors_mutex_);
        
        for (const auto& processor_info : processors_) {
            try {
                if (!processor_info.processor(output_frame)) {
                    last_error_ = "处理器 '" + processor_info.name + "' 失败";
                    return false;
                }
            } catch (const std::exception& e) {
                last_error_ = "处理器 '" + processor_info.name + "' 异常: " + e.what();
                return false;
            }
        }
        
        // 计算处理时间和FPS
        auto end_time = std::chrono::high_resolution_clock::now();
        auto elapsed = std::chrono::duration<double, std::milli>(end_time - start_time).count();
        
        // 更新平均处理时间
        processing_times_.push_back(elapsed);
        if (processing_times_.size() > 30) {  // 保持最近30帧的平均
            processing_times_.pop_front();
        }
        
        // 计算平均FPS
        double avg_time = std::accumulate(processing_times_.begin(), 
                                         processing_times_.end(), 0.0) / 
                                         processing_times_.size();
        
        if (avg_time > 0) {
            current_fps_ = 1000.0 / avg_time;
        }
        
        return true;
    }
    
    bool startProcessingThread(VideoCapture& capture, VideoDisplay& display) {
        if (processing_thread_running_) {
            last_error_ = "处理线程已在运行";
            return false;
        }
        
        // 启动处理线程
        processing_thread_running_ = true;
        processing_thread_ = std::thread(&VideoProcessorImpl::processingThread, 
                                       this, std::ref(capture), std::ref(display));
        
        DEBUG_PRINT("视频处理线程已启动");
        return true;
    }
    
    void stopProcessingThread() {
        if (processing_thread_running_) {
            processing_thread_running_ = false;
            
            // 通知等待条件的线程
            {
                std::lock_guard<std::mutex> lock(frames_mutex_);
                frames_condition_.notify_all();
            }
            
            // 等待线程完成
            if (processing_thread_.joinable()) {
                processing_thread_.join();
            }
            
            DEBUG_PRINT("视频处理线程已停止");
        }
    }
    
    float getProcessingFPS() const {
        return current_fps_;
    }
    
    const std::string& getLastError() const {
        return last_error_;
    }
    
private:
    // 处理线程函数
    void processingThread(VideoCapture& capture, VideoDisplay& display) {
        VideoFrame input_frame, output_frame;
        
        while (processing_thread_running_) {
            // 读取帧
            if (!capture.read(input_frame, 100)) {  // 100ms超时
                continue;  // 读取超时或出错,继续下一帧
            }
            
            // 处理帧
            if (!processFrame(input_frame, output_frame)) {
                ERROR_PRINT("处理帧失败: " << last_error_);
                continue;
            }
            
            // 显示帧
            if (!display.display(output_frame)) {
                ERROR_PRINT("显示帧失败");
                continue;
            }
        }
    }
    
private:
    // 处理器信息结构
    struct ProcessorInfo {
        int id;
        std::string name;
        FrameProcessor processor;
    };
    
    bool use_hardware_accel_;
    std::unique_ptr<HardwareAccelerator> hw_accelerator_;
    
    std::vector<ProcessorInfo> processors_;
    std::mutex processors_mutex_;
    int next_processor_id_;
    
    std::thread processing_thread_;
    std::atomic<bool> processing_thread_running_;
    
    std::mutex frames_mutex_;
    std::condition_variable frames_condition_;
    
    std::deque<double> processing_times_;
    float current_fps_;
    std::string last_error_;
};

// VideoCapture类方法的实现 - 委托给VideoCaptureImpl

VideoCapture::VideoCapture(const std::string& device_path, 
                         int width, 
                         int height, 
                         PixelFormat format, 
                         int fps)
    : impl_(new VideoCaptureImpl(device_path, width, height, format, fps)) {
}

VideoCapture::~VideoCapture() = default;

bool VideoCapture::start() {
    return impl_->start();
}

void VideoCapture::stop() {
    impl_->stop();
}

bool VideoCapture::read(VideoFrame& frame, int timeout_ms) {
    return impl_->read(frame, timeout_ms);
}

std::string VideoCapture::getDeviceInfo() const {
    return impl_->getDeviceInfo();
}

float VideoCapture::getCurrentFPS() const {
    return impl_->getCurrentFPS();
}

// VideoDisplay类方法的实现 - 委托给VideoDisplayImpl

VideoDisplay::VideoDisplay(const std::string& display_name, 
                         int width, 
                         int height, 
                         PixelFormat format)
    : impl_(new VideoDisplayImpl(display_name, width, height, format)) {
}

VideoDisplay::~VideoDisplay() = default;

bool VideoDisplay::initialize() {
    return impl_->initialize();
}

bool VideoDisplay::display(const VideoFrame& frame) {
    return impl_->display(frame);
}

void VideoDisplay::close() {
    impl_->close();
}

void VideoDisplay::setPosition(int x, int y) {
    impl_->setPosition(x, y);
}

void VideoDisplay::setFullscreen(bool fullscreen) {
    impl_->setFullscreen(fullscreen);
}

// VideoProcessor类方法的实现 - 委托给VideoProcessorImpl

VideoProcessor::VideoProcessor(bool use_hardware_accel)
    : impl_(new VideoProcessorImpl(use_hardware_accel)) {
}

VideoProcessor::~VideoProcessor() = default;

bool VideoProcessor::initialize() {
    return impl_->initialize();
}

int VideoProcessor::addProcessor(FrameProcessor processor, const std::string& name) {
    return impl_->addProcessor(processor, name);
}

bool VideoProcessor::removeProcessor(int processor_id) {
    return impl_->removeProcessor(processor_id);
}

bool VideoProcessor::processFrame(const VideoFrame& input_frame, VideoFrame& output_frame) {
    return impl_->processFrame(input_frame, output_frame);
}

bool VideoProcessor::startProcessingThread(VideoCapture& capture, VideoDisplay& display) {
    return impl_->startProcessingThread(capture, display);
}

void VideoProcessor::stopProcessingThread() {
    impl_->stopProcessingThread();
}

float VideoProcessor::getProcessingFPS() const {
    return impl_->getProcessingFPS();
}

std::string VideoProcessor::getLastError() const {
    return impl_->getLastError();
}

// VideoEffects命名空间实现
namespace VideoEffects {

// 调整亮度对比度
FrameProcessor adjustBrightnessContrast(float brightness, float contrast) {
    return [=](VideoFrame& frame) {
        // 限制参数范围
        float b = std::max(-1.0f, std::min(1.0f, brightness));
        float c = std::max(0.0f, std::min(2.0f, contrast));
        
        // 计算亮度和对比度调整值
        int b_offset = static_cast<int>(b * 255);
        
        // 只处理RGB格式
        if (frame.format != PixelFormat::RGB24 && 
            frame.format != PixelFormat::RGBA32) {
            ERROR_PRINT("亮度/对比度调整只支持RGB格式");
            return false;
        }
        
        int channels = (frame.format == PixelFormat::RGB24) ? 3 : 4;
        int size = frame.width * frame.height;
        
        // 调整每个像素
        for (int i = 0; i < size; i++) {
            for (int j = 0; j < 3; j++) {  // 只处理RGB通道
                int idx = i * channels + j;
                int val = frame.data[idx];
                
                // 应用对比度
                val = static_cast<int>((val - 128) * c + 128);
                
                // 应用亮度
                val += b_offset;
                
                // 限制范围
                frame.data[idx] = std::max(0, std::min(255, val));
            }
        }
        
        return true;
    };
}

// 应用颜色滤镜
FrameProcessor colorFilter(float red_scale, float green_scale, float blue_scale) {
    return [=](VideoFrame& frame) {
        // 只处理RGB格式
        if (frame.format != PixelFormat::RGB24 && 
            frame.format != PixelFormat::RGBA32) {
            ERROR_PRINT("颜色滤镜只支持RGB格式");
            return false;
        }
        
        int channels = (frame.format == PixelFormat::RGB24) ? 3 : 4;
        int size = frame.width * frame.height;
        
        // 调整每个像素
        for (int i = 0; i < size; i++) {
            // RGB通道
            frame.data[i * channels + 0] = std::max(0, std::min(255, 
                static_cast<int>(frame.data[i * channels + 0] * red_scale)));
            
            frame.data[i * channels + 1] = std::max(0, std::min(255, 
                static_cast<int>(frame.data[i * channels + 1] * green_scale)));
            
            frame.data[i * channels + 2] = std::max(0, std::min(255, 
                static_cast<int>(frame.data[i * channels + 2] * blue_scale)));
        }
        
        return true;
    };
}

// 边缘检测
FrameProcessor edgeDetection(int threshold) {
    return [=](VideoFrame& frame) {
        // 确保阈值在有效范围内
        int t = std::max(0, std::min(255, threshold));
        
        // 首先转换为灰度图像
        std::vector<uint8_t> gray(frame.width * frame.height);
        
        if (frame.format == PixelFormat::RGB24 || 
            frame.format == PixelFormat::RGBA32) {
            
            int channels = (frame.format == PixelFormat::RGB24) ? 3 : 4;
            
            // RGB转灰度
            for (int i = 0; i < frame.height; i++) {
                for (int j = 0; j < frame.width; j++) {
                    int idx = (i * frame.width + j) * channels;
                    // 标准RGB到灰度转换
                    gray[i * frame.width + j] = static_cast<uint8_t>(
                        0.299f * frame.data[idx] + 
                        0.587f * frame.data[idx + 1] + 
                        0.114f * frame.data[idx + 2]);
                }
            }
        } else if (frame.format == PixelFormat::GRAY8) {
            // 已经是灰度图像
            gray = frame.data;
        } else {
            ERROR_PRINT("边缘检测不支持的格式");
            return false;
        }
        
        // 创建输出帧
        std::vector<uint8_t> edges(frame.width * frame.height, 0);
        
        // 应用Sobel算子
        for (int i = 1; i < frame.height - 1; i++) {
            for (int j = 1; j < frame.width - 1; j++) {
                // 水平梯度
                int gx = 
                    -1 * gray[(i-1) * frame.width + (j-1)] +
                    -2 * gray[(i  ) * frame.width + (j-1)] +
                    -1 * gray[(i+1) * frame.width + (j-1)] +
                     1 * gray[(i-1) * frame.width + (j+1)] +
                     2 * gray[(i  ) * frame.width + (j+1)] +
                     1 * gray[(i+1) * frame.width + (j+1)];
                
                // 垂直梯度
                int gy = 
                    -1 * gray[(i-1) * frame.width + (j-1)] +
                    -2 * gray[(i-1) * frame.width + (j  )] +
                    -1 * gray[(i-1) * frame.width + (j+1)] +
                     1 * gray[(i+1) * frame.width + (j-1)] +
                     2 * gray[(i+1) * frame.width + (j  )] +
                     1 * gray[(i+1) * frame.width + (j+1)];
                
                // 计算梯度幅值
                int magnitude = std::sqrt(gx*gx + gy*gy);
                
                // 阈值处理
                edges[i * frame.width + j] = magnitude > t ? 255 : 0;
            }
        }
        
        // 转换回输出格式
        if (frame.format == PixelFormat::RGB24 || 
            frame.format == PixelFormat::RGBA32) {
            
            int channels = (frame.format == PixelFormat::RGB24) ? 3 : 4;
            
            // 确保大小一致
            frame.data.resize(frame.width * frame.height * channels);
            
            // 灰度转RGB
            for (int i = 0; i < frame.height; i++) {
                for (int j = 0; j < frame.width; j++) {
                    int src_idx = i * frame.width + j;
                    int dst_idx = src_idx * channels;
                    
                    // 设置RGB通道
                    frame.data[dst_idx] = 
                    frame.data[dst_idx + 1] = 
                    frame.data[dst_idx + 2] = edges[src_idx];
                    
                    // 如果有Alpha通道,保持不变
                    if (channels == 4) {
                        frame.data[dst_idx + 3] = 255;
                    }
                }
            }
        } else if (frame.format == PixelFormat::GRAY8) {
            frame.data = edges;
        }
        
        return true;
    };
}

// 高斯模糊
FrameProcessor gaussianBlur(int kernel_size, float sigma) {
    return [=](VideoFrame& frame) {
        // 确保kernel_size是奇数
        int ksize = (kernel_size % 2 == 0) ? kernel_size + 1 : kernel_size;
        ksize = std::max(3, std::min(7, ksize));  // 限制在3-7范围内
        
        float s = std::max(0.1f, sigma);
        
        // 创建高斯核
        std::vector<float> kernel(ksize * ksize);
        float sum = 0.0f;
        int radius = ksize / 2;
        
        // 计算高斯核值
        for (int i = -radius; i <= radius; i++) {
            for (int j = -radius; j <= radius; j++) {
                float value = std::exp(-(i*i + j*j) / (2.0f * s * s));
                kernel[(i + radius) * ksize + (j + radius)] = value;
                sum += value;
            }
        }
        
        // 归一化
        for (auto& k : kernel) {
            k /= sum;
        }
        
        // 根据不同格式应用模糊
        if (frame.format == PixelFormat::RGB24 || 
            frame.format == PixelFormat::RGBA32) {
            
            int channels = (frame.format == PixelFormat::RGB24) ? 3 : 4;
            
            // 创建临时缓冲区
            std::vector<uint8_t> temp = frame.data;
            
            // 应用高斯模糊
            for (int i = 0; i < frame.height; i++) {
                for (int j = 0; j < frame.width; j++) {
                    float r = 0.0f, g = 0.0f, b = 0.0f;
                    
                    // 应用卷积
                    for (int ki = -radius; ki <= radius; ki++) {
                        for (int kj = -radius; kj <= radius; kj++) {
                            int ni = std::min(std::max(0, i + ki), frame.height - 1);
                            int nj = std::min(std::max(0, j + kj), frame.width - 1);
                            
                            float k = kernel[(ki + radius) * ksize + (kj + radius)];
                            int idx = (ni * frame.width + nj) * channels;
                            
                            r += temp[idx] * k;
                            g += temp[idx + 1] * k;
                            b += temp[idx + 2] * k;
                        }
                    }
                    
                    // 保存结果
                    int idx = (i * frame.width + j) * channels;
                    frame.data[idx] = std::min(255, std::max(0, static_cast<int>(r)));
                    frame.data[idx + 1] = std::min(255, std::max(0, static_cast<int>(g)));
                    frame.data[idx + 2] = std::min(255, std::max(0, static_cast<int>(b)));
                }
            }
        } else if (frame.format == PixelFormat::GRAY8) {
            // 创建临时缓冲区
            std::vector<uint8_t> temp = frame.data;
            
            // 应用高斯模糊
            for (int i = 0; i < frame.height; i++) {
                for (int j = 0; j < frame.width; j++) {
                    float sum = 0.0f;
                    
                    // 应用卷积
                    for (int ki = -radius; ki <= radius; ki++) {
                        for (int kj = -radius; kj <= radius; kj++) {
                            int ni = std::min(std::max(0, i + ki), frame.height - 1);
                            int nj = std::min(std::max(0, j + kj), frame.width - 1);
                            
                            float k = kernel[(ki + radius) * ksize + (kj + radius)];
                            sum += temp[ni * frame.width + nj] * k;
                        }
                    }
                    
                    // 保存结果
                    frame.data[i * frame.width + j] = std::min(255, std::max(0, static_cast<int>(sum)));
                }
            }
        } else {
            ERROR_PRINT("高斯模糊不支持的格式");
            return false;
        }
        
        return true;
    };
}

// 缩放
FrameProcessor resize(float scale_factor) {
    return [=](VideoFrame& frame) {
        // 限制缩放因子
        float sf = std::max(0.1f, std::min(2.0f, scale_factor));
        
        // 计算新尺寸
        int new_width = static_cast<int>(frame.width * sf);
        int new_height = static_cast<int>(frame.height * sf);
        
        // 保持尺寸一致,只处理内容
        std::vector<uint8_t> temp = frame.data;
        
        // 根据不同格式进行缩放
        if (frame.format == PixelFormat::RGB24 || 
            frame.format == PixelFormat::RGBA32) {
            
            int channels = (frame.format == PixelFormat::RGB24) ? 3 : 4;
            
            // 双线性插值缩放
            for (int i = 0; i < frame.height; i++) {
                for (int j = 0; j < frame.width; j++) {
                    // 源图像中的对应位置
                    float src_i = i / sf;
                    float src_j = j / sf;
                    
                    if (src_i >= 0 && src_i < frame.height - 1 && 
                        src_j >= 0 && src_j < frame.width - 1) {
                        
                        // 四个最近的像素
                        int i1 = static_cast<int>(src_i);
                        int i2 = i1 + 1;
                        int j1 = static_cast<int>(src_j);
                        int j2 = j1 + 1;
                        
                        // 计算权重
                        float wi = src_i - i1;
                        float wj = src_j - j1;
                        
                        // 目标像素索引
                        int dst_idx = (i * frame.width + j) * channels;
                        
                        // 对每个通道进行插值
                        for (int c = 0; c < channels; c++) {
                            // 四个像素的值
                            uint8_t v11 = temp[(i1 * frame.width + j1) * channels + c];
                            uint8_t v12 = temp[(i1 * frame.width + j2) * channels + c];
                            uint8_t v21 = temp[(i2 * frame.width + j1) * channels + c];
                            uint8_t v22 = temp[(i2 * frame.width + j2) * channels + c];
                            
                            // 双线性插值
                            float value = (1 - wi) * (1 - wj) * v11 +
                                         wi * (1 - wj) * v21 +
                                         (1 - wi) * wj * v12 +
                                         wi * wj * v22;
                            
                            frame.data[dst_idx + c] = static_cast<uint8_t>(value);
                        }
                    } else {
                        // 超出范围设为黑色
                        int dst_idx = (i * frame.width + j) * channels;
                        for (int c = 0; c < channels; c++) {
                            frame.data[dst_idx + c] = 0;
                        }
                        
                        // 如果有Alpha通道,设为透明
                        if (channels == 4) {
                            frame.data[dst_idx + 3] = 0;
                        }
                    }
                }
            }
        } else if (frame.format == PixelFormat::GRAY8) {
            // 灰度图像的双线性插值
            for (int i = 0; i < frame.height; i++) {
                for (int j = 0; j < frame.width; j++) {
                    // 源图像中的对应位置
                    float src_i = i / sf;
                    float src_j = j / sf;
                    
                    if (src_i >= 0 && src_i < frame.height - 1 && 
                        src_j >= 0 && src_j < frame.width - 1) {
                        
                        // 四个最近的像素
                        int i1 = static_cast<int>(src_i);
                        int i2 = i1 + 1;
                        int j1 = static_cast<int>(src_j);
                        int j2 = j1 + 1;
                        
                        // 计算权重
                        float wi = src_i - i1;
                        float wj = src_j - j1;
                        
                        // 四个像素的值
                        uint8_t v11 = temp[i1 * frame.width + j1];
                        uint8_t v12 = temp[i1 * frame.width + j2];
                        uint8_t v21 = temp[i2 * frame.width + j1];
                        uint8_t v22 = temp[i2 * frame.width + j2];
                        
                        // 双线性插值
                        float value = (1 - wi) * (1 - wj) * v11 +
                                     wi * (1 - wj) * v21 +
                                     (1 - wi) * wj * v12 +
                                     wi * wj * v22;
                        
                        frame.data[i * frame.width + j] = static_cast<uint8_t>(value);
                    } else {
                        // 超出范围设为黑色
                        frame.data[i * frame.width + j] = 0;
                    }
                }
            }
        } else {
            ERROR_PRINT("缩放不支持的格式");
            return false;
        }
        
        return true;
    };
}

// 旋转
FrameProcessor rotate(float angle) {
    return [=](VideoFrame& frame) {
        // 将角度转换为弧度
        float a = angle * M_PI / 180.0f;
        
        // 创建旋转矩阵
        float sina = std::sin(a);
        float cosa = std::cos(a);
        
        // 计算新的图像边界
        float center_x = frame.width / 2.0f;
        float center_y = frame.height / 2.0f;
        
        // 创建临时缓冲区
        std::vector<uint8_t> temp = frame.data;
        
        // 根据不同格式进行旋转
        if (frame.format == PixelFormat::RGB24 || 
            frame.format == PixelFormat::RGBA32) {
            
            int channels = (frame.format == PixelFormat::RGB24) ? 3 : 4;
            
            // 对每个目标像素进行反向映射
            for (int i = 0; i < frame.height; i++) {
                for (int j = 0; j < frame.width; j++) {
                    // 计算相对于中心的坐标
                    float dx = j - center_x;
                    float dy = i - center_y;
                    
                    // 应用旋转
                    float src_x = dx * cosa - dy * sina + center_x;
                    float src_y = dx * sina + dy * cosa + center_y;
                    
                    // 检查源坐标是否在图像内
                    if (src_x >= 0 && src_x < frame.width - 1 && 
                        src_y >= 0 && src_y < frame.height - 1) {
                        
                        // 使用双线性插值
                        int x1 = static_cast<int>(src_x);
                        int x2 = x1 + 1;
                        int y1 = static_cast<int>(src_y);
                        int y2 = y1 + 1;
                        
                        float wx = src_x - x1;
                        float wy = src_y - y1;
                        
                        // 四个源像素
                        int src_idx11 = (y1 * frame.width + x1) * channels;
                        int src_idx12 = (y1 * frame.width + x2) * channels;
                        int src_idx21 = (y2 * frame.width + x1) * channels;
                        int src_idx22 = (y2 * frame.width + x2) * channels;
                        
                        // 目标像素
                        int dst_idx = (i * frame.width + j) * channels;
                        
                        // 对每个通道进行双线性插值
                        for (int c = 0; c < channels; c++) {
                            uint8_t v11 = temp[src_idx11 + c];
                            uint8_t v12 = temp[src_idx12 + c];
                            uint8_t v21 = temp[src_idx21 + c];
                            uint8_t v22 = temp[src_idx22 + c];
                            
                            float value = (1-wx)*(1-wy)*v11 + wx*(1-wy)*v12 + 
                                        (1-wx)*wy*v21 + wx*wy*v22;
                            
                            frame.data[dst_idx + c] = static_cast<uint8_t>(value);
                        }
                    } else {
                        // 超出范围设为黑色
                        int dst_idx = (i * frame.width + j) * channels;
                        for (int c = 0; c < channels - (channels == 4 ? 1 : 0); c++) {
                            frame.data[dst_idx + c] = 0;
                        }
                        
                        // 如果有Alpha通道,设为透明
                        if (channels == 4) {
                            frame.data[dst_idx + 3] = 0;
                        }
                    }
                }
            }
        } else if (frame.format == PixelFormat::GRAY8) {
            // 灰度图像旋转
            for (int i = 0; i < frame.height; i++) {
                for (int j = 0; j < frame.width; j++) {
                    // 计算相对于中心的坐标
                    float dx = j - center_x;
                    float dy = i - center_y;
                    
                    // 应用旋转
                    float src_x = dx * cosa - dy * sina + center_x;
                    float src_y = dx * sina + dy * cosa + center_y;
                    
                    // 检查源坐标是否在图像内
                    if (src_x >= 0 && src_x < frame.width - 1 && 
                        src_y >= 0 && src_y < frame.height - 1) {
                        
                        // 使用双线性插值
                        int x1 = static_cast<int>(src_x);
                        int x2 = x1 + 1;
                        int y1 = static_cast<int>(src_y);
                        int y2 = y1 + 1;
                        
                        float wx = src_x - x1;
                        float wy = src_y - y1;
                        
                        // 四个源像素
                        uint8_t v11 = temp[y1 * frame.width + x1];
                        uint8_t v12 = temp[y1 * frame.width + x2];
                        uint8_t v21 = temp[y2 * frame.width + x1];
                        uint8_t v22 = temp[y2 * frame.width + x2];
                        
                        // 双线性插值
                        float value = (1-wx)*(1-wy)*v11 + wx*(1-wy)*v12 + 
                                     (1-wx)*wy*v21 + wx*wy*v22;
                        
                        frame.data[i * frame.width + j] = static_cast<uint8_t>(value);
                    } else {
                        // 超出范围设为黑色
                        frame.data[i * frame.width + j] = 0;
                    }
                }
            }
        } else {
            ERROR_PRINT("旋转不支持的格式");
            return false;
        }
        
        return true;
    };
}

// 图像二值化
FrameProcessor threshold(int threshold_value) {
    return [=](VideoFrame& frame) {
        // 确保阈值在有效范围内
        int t = std::max(0, std::min(255, threshold_value));
        
        // 如果是RGB格式先转为灰度
        if (frame.format == PixelFormat::RGB24 || 
            frame.format == PixelFormat::RGBA32) {
            
            int channels = (frame.format == PixelFormat::RGB24) ? 3 : 4;
            
            for (int i = 0; i < frame.height; i++) {
                for (int j = 0; j < frame.width; j++) {
                    int idx = (i * frame.width + j) * channels;
                    
                    // 计算灰度值
                    uint8_t gray = static_cast<uint8_t>(
                        0.299f * frame.data[idx] + 
                        0.587f * frame.data[idx + 1] + 
                        0.114f * frame.data[idx + 2]);
                    
                    // 应用阈值
                    uint8_t result = gray > t ? 255 : 0;
                    
                    // 设置所有通道
                    frame.data[idx] = frame.data[idx + 1] = frame.data[idx + 2] = result;
                }
            }
        } else if (frame.format == PixelFormat::GRAY8) {
            // 直接对灰度图像应用阈值
            for (auto& pixel : frame.data) {
                pixel = pixel > t ? 255 : 0;
            }
        } else {
            ERROR_PRINT("二值化不支持的格式");
            return false;
        }
        
        return true;
    };
}

// 自定义硬件加速特效
FrameProcessor customHardwareEffect(int effect_id, const std::vector<float>& params) {
    return [=](VideoFrame& frame) {
        // 获取硬件加速器单例
        static std::unique_ptr<HardwareAccelerator> hw_acc;
        
        // 懒初始化
        if (!hw_acc) {
            try {
                hw_acc = std::make_unique<HardwareAccelerator>(0x43C00000, "/dev/xilinx_dma", true);
                
                if (!hw_acc->initialize()) {
                    ERROR_PRINT("硬件加速器初始化失败,回退到软件实现");
                    return false;
                }
            } catch (const std::exception& e) {
                ERROR_PRINT("硬件加速器错误: " << e.what());
                return false;
            }
        }
        
        // 设置操作类型
        hw_acc->setOperationType(static_cast<HardwareAccelerator::OperationType>(effect_id));
        
        // 设置参数
        for (size_t i = 0; i < params.size() && i < 4; i++) {
            uint32_t param_bits;
            memcpy(&param_bits, &params[i], sizeof(param_bits));
            hw_acc->setParameter(i, param_bits);
        }
        
        // 准备输入/输出数据
        std::vector<float> input_data;
        std::vector<float> output_data;
        
        // 转换图像数据格式以供硬件处理
        // 实际实现依赖于硬件加速器的具体要求
        
        // 调用硬件加速器
        bool success = hw_acc->processFloatData(input_data, output_data, input_data.size());
        
        if (!success) {
            ERROR_PRINT("硬件处理失败");
            return false;
        }
        
        // 将处理结果转换回图像格式
        // 实际实现依赖于硬件加速器的输出格式
        
        return true;
    };
}

} // namespace VideoEffects

cpp

/**
 * video_demo.cpp - 视频处理系统演示程序
 * 
 * 演示如何使用视频处理系统捕获、处理和显示视频
 * 
 * 编译: g++ -std=c++11 -o video_demo video_demo.cpp video_processor.cpp hw_accelerator.cpp -lX11
 */

#include <iostream>
#include <vector>
#include <string>
#include <chrono>
#include <thread>
#include <csignal>
#include "video_processor.hpp"

// 全局运行标志,用于捕获Ctrl+C信号
volatile sig_atomic_t running = 1;

// 信号处理函数
void signal_handler(int signal) {
    if (signal == SIGINT) {
        std::cout << "
收到Ctrl+C,正在优雅地退出..." << std::endl;
        running = 0;
    }
}

// 信息显示处理器
FrameProcessor createInfoOverlay() {
    return [](VideoFrame& frame) {
        static int frame_count = 0;
        static auto last_time = std::chrono::steady_clock::now();
        static float fps = 0.0f;
        
        // 更新帧计数和FPS
        frame_count++;
        auto current_time = std::chrono::steady_clock::now();
        auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
            current_time - last_time).count();
        
        if (elapsed >= 1000) {  // 每秒更新FPS
            fps = 1000.0f * frame_count / elapsed;
            frame_count = 0;
            last_time = current_time;
        }
        
        // 只处理RGB格式
        if (frame.format != PixelFormat::RGB24 && 
            frame.format != PixelFormat::RGBA32) {
            return true;
        }
        
        int channels = (frame.format == PixelFormat::RGB24) ? 3 : 4;
        
        // 在图像上绘制FPS信息
        std::string info = "FPS: " + std::to_string(static_cast<int>(fps));
        
        // 简单的字体渲染 (只是绘制矩形表示文本)
        int rect_width = 80;
        int rect_height = 20;
        int rect_x = 10;
        int rect_y = 10;
        
        // 绘制背景矩形
        for (int i = rect_y; i < rect_y + rect_height && i < frame.height; i++) {
            for (int j = rect_x; j < rect_x + rect_width && j < frame.width; j++) {
                int idx = (i * frame.width + j) * channels;
                
                // 半透明黑色背景
                frame.data[idx] = frame.data[idx] / 2;
                frame.data[idx + 1] = frame.data[idx + 1] / 2;
                frame.data[idx + 2] = frame.data[idx + 2] / 2;
            }
        }
        
        // 绘制文字 (这里只是绘制文本位置的亮点作为示例)
        int text_x = rect_x + 5;
        int text_y = rect_y + rect_height / 2;
        
        for (size_t i = 0; i < info.length(); i++) {
            int x = text_x + i * 5;
            int y = text_y;
            
            if (x < frame.width && y < frame.height) {
                int idx = (y * frame.width + x) * channels;
                
                // 白色文字点
                frame.data[idx] = 255;
                frame.data[idx + 1] = 255;
                frame.data[idx + 2] = 255;
            }
        }
        
        return true;
    };
}

int main(int argc, char* argv[]) {
    // 注册信号处理函数
    signal(SIGINT, signal_handler);
    
    std::cout << "Zynq视频处理系统演示" << std::endl;
    std::cout << "==================" << std::endl;
    
    // 默认设置
    std::string device = "/dev/video0";
    int width = 640;
    int height = 480;
    bool use_hardware = true;
    
    // 解析命令行参数
    for (int i = 1; i < argc; i++) {
        std::string arg = argv[i];
        
        if (arg == "-d" || arg == "--device") {
            if (i + 1 < argc) {
                device = argv[++i];
            }
        } else if (arg == "-w" || arg == "--width") {
            if (i + 1 < argc) {
                width = std::stoi(argv[++i]);
            }
        } else if (arg == "-h" || arg == "--height") {
            if (i + 1 < argc) {
                height = std::stoi(argv[++i]);
            }
        } else if (arg == "--software") {
            use_hardware = false;
        } else if (arg == "--help") {
            std::cout << "用法:" << argv[0] << " [选项]" << std::endl;
            std::cout << "选项:" << std::endl;
            std::cout << "  -d, --device DEV    视频设备路径 (默认: /dev/video0)" << std::endl;
            std::cout << "  -w, --width WIDTH   视频宽度 (默认: 640)" << std::endl;
            std::cout << "  -h, --height HEIGHT 视频高度 (默认: 480)" << std::endl;
            std::cout << "  --software          使用软件处理 (不使用硬件加速)" << std::endl;
            std::cout << "  --help              显示此帮助消息" << std::endl;
            return 0;
        }
    }
    
    // 开始初始化
    std::cout << "正在初始化..." << std::endl;
    std::cout << "设备: " << device << std::endl;
    std::cout << "分辨率: " << width << "x" << height <


cpp

    std::cout << "分辨率: " << width << "x" << height << std::endl;
    std::cout << "处理模式: " << (use_hardware ? "硬件加速" : "软件模式") << std::endl;
    
    try {
        // 创建视频捕获
        VideoCapture capture(device, width, height, PixelFormat::YUV422, 30);
        
        // 启动捕获
        if (!capture.start()) {
            std::cerr << "无法启动视频捕获" << std::endl;
            return -1;
        }
        
        // 显示设备信息
        std::cout << "
设备信息:" << std::endl;
        std::cout << capture.getDeviceInfo() << std::endl;
        
        // 创建显示
        VideoDisplay display("Zynq视频处理", width, height, PixelFormat::RGB24);
        
        // 初始化显示
        if (!display.initialize()) {
            std::cerr << "无法初始化视频显示" << std::endl;
            capture.stop();
            return -1;
        }
        
        // 创建视频处理器
        VideoProcessor processor(use_hardware);
        
        // 初始化处理器
        if (!processor.initialize()) {
            std::cerr << "警告: 处理器初始化失败: " << processor.getLastError() << std::endl;
            // 继续但不使用硬件加速
        }
        
        // 添加处理效果
        // 信息覆盖
        processor.addProcessor(createInfoOverlay(), "InfoOverlay");
        
        // 调整颜色
        processor.addProcessor(VideoEffects::adjustBrightnessContrast(0.1f, 1.2f), "BrightnessContrast");
        
        // 用于显示不同效果的状态变量
        int current_effect = 0;
        const int num_effects = 5;
        
        // 定义效果处理器ID数组(用于移除)
        int effect_processors[num_effects] = {-1, -1, -1, -1, -1};
        
        // 添加初始效果
        effect_processors[current_effect] = processor.addProcessor(
            VideoEffects::colorFilter(1.2f, 1.0f, 1.0f), "RedBoost");
        
        // 启动处理线程
        processor.startProcessingThread(capture, display);
        
        // 主循环 - 每3秒切换一次效果
        while (running) {
            // 打印状态信息
            std::cout << "
当前FPS: " << std::fixed << std::setprecision(1) 
                     << capture.getCurrentFPS() << " 处理FPS: " 
                     << processor.getProcessingFPS() << "     " << std::flush;
            
            // 等待1秒
            std::this_thread::sleep_for(std::chrono::seconds(3));
            
            // 切换效果
            if (running) {
                // 移除当前效果
                if (effect_processors[current_effect] >= 0) {
                    processor.removeProcessor(effect_processors[current_effect]);
                }
                
                // 切换到下一个效果
                current_effect = (current_effect + 1) % num_effects;
                
                // 添加新效果
                switch (current_effect) {
                    case 0: // 红色增强
                        effect_processors[current_effect] = processor.addProcessor(
                            VideoEffects::colorFilter(1.2f, 1.0f, 1.0f), "RedBoost");
                        std::cout << "
切换效果: 红色增强" << std::endl;
                        break;
                        
                    case 1: // 边缘检测
                        effect_processors[current_effect] = processor.addProcessor(
                            VideoEffects::edgeDetection(50), "EdgeDetection");
                        std::cout << "
切换效果: 边缘检测" << std::endl;
                        break;
                        
                    case 2: // 高斯模糊
                        effect_processors[current_effect] = processor.addProcessor(
                            VideoEffects::gaussianBlur(5, 1.5f), "GaussianBlur");
                        std::cout << "
切换效果: 高斯模糊" << std::endl;
                        break;
                        
                    case 3: // 缩放
                        effect_processors[current_effect] = processor.addProcessor(
                            VideoEffects::resize(0.8f), "Resize");
                        std::cout << "
切换效果: 缩小" << std::endl;
                        break;
                        
                    case 4: // 旋转
                        effect_processors[current_effect] = processor.addProcessor(
                            VideoEffects::rotate(15.0f), "Rotate");
                        std::cout << "
切换效果: 旋转" << std::endl;
                        break;
                }
            }
        }
        
        // 停止处理线程
        processor.stopProcessingThread();
        
        // 停止捕获
        capture.stop();
        
        // 关闭显示
        display.close();
        
        std::cout << "
程序正常退出" << std::endl;
        
    } catch (const std::exception& e) {
        std::cerr << "异常: " << e.what() << std::endl;
        return -1;
    }
    
    return 0;
}

7. Zynq Linux内核驱动开发

7.1 Linux设备驱动基础与PetaLinux集成

在Zynq平台上,Linux设备驱动开发主要涉及以下方面:

Linux设备模型:理解字符设备、块设备和网络设备
与PetaLinux集成:通过PetaLinux工具来编译和部署驱动模块
设备树:用于描述硬件配置的机制
IP核驱动:为Vivado IP核创建设备驱动

以下是一个简单的字符设备驱动示例,展示如何在Zynq上开发Linux驱动:

c

/**
 * zynq_example_driver.c - Zynq平台示例字符设备驱动
 * 
 * 演示如何创建一个基本的字符设备驱动并与Zynq PL逻辑交互
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/uaccess.h>
#include <linux/device.h>
#include <linux/platform_device.h>
#include <linux/of.h>
#include <linux/io.h>
#include <linux/interrupt.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>

// 驱动信息
#define DRIVER_NAME "zynq_example_driver"
#define DRIVER_CLASS "zynqexample"

// 寄存器偏移
#define REG_CONTROL   0x00
#define REG_STATUS    0x04
#define REG_DATA      0x08
#define REG_IRQ_EN    0x0C

// 控制寄存器位定义
#define CTRL_START    0x01
#define CTRL_RESET    0x02

// 状态寄存器位定义
#define STATUS_DONE   0x01
#define STATUS_BUSY   0x02
#define STATUS_ERROR  0x04

// 驱动设备结构
struct zynq_example_dev {
    struct cdev cdev;
    dev_t devt;
    struct class *class;
    struct device *device;
    int major;
    int minor;
    
    void __iomem *base_addr;
    unsigned long mem_start;
    unsigned long mem_end;
    int irq;
    
    bool has_irq;
    wait_queue_head_t wait_queue;
    
    // 驱动特定数据
    uint32_t data_value;
};

static struct zynq_example_dev *example_dev;

// 中断处理函数
static irqreturn_t zynq_example_isr(int irq, void *dev_id)
{
    struct zynq_example_dev *dev = (struct zynq_example_dev *)dev_id;
    uint32_t status;
    
    // 读取状态寄存器
    status = ioread32(dev->base_addr + REG_STATUS);
    
    // 如果操作完成
    if (status & STATUS_DONE) {
        // 读取数据结果
        dev->data_value = ioread32(dev->base_addr + REG_DATA);
        
        // 唤醒等待进程
        wake_up_interruptible(&dev->wait_queue);
        
        // 禁用中断
        iowrite32(0, dev->base_addr + REG_IRQ_EN);
        
        return IRQ_HANDLED;
    }
    
    return IRQ_NONE;
}

// file_operations方法

// 打开设备
static int zynq_example_open(struct inode *inode, struct file *file)
{
    // 获取设备结构体
    struct zynq_example_dev *dev = container_of(inode->i_cdev, 
                                              struct zynq_example_dev, cdev);
    file->private_data = dev;
    
    printk(KERN_INFO "%s: device opened
", DRIVER_NAME);
    return 0;
}

// 关闭设备
static int zynq_example_release(struct inode *inode, struct file *file)
{
    printk(KERN_INFO "%s: device closed
", DRIVER_NAME);
    return 0;
}

// 读取设备
static ssize_t zynq_example_read(struct file *file, char __user *buf, 
                               size_t count, loff_t *offset)
{
    struct zynq_example_dev *dev = (struct zynq_example_dev *)file->private_data;
    uint32_t data;
    
    // 如果请求大小小于4字节,返回错误
    if (count < sizeof(uint32_t))
        return -EINVAL;
    
    // 读取当前数据寄存器值
    data = ioread32(dev->base_addr + REG_DATA);
    
    // 复制到用户空间
    if (copy_to_user(buf, &data, sizeof(uint32_t)))
        return -EFAULT;
    
    printk(KERN_INFO "%s: read data: 0x%08x
", DRIVER_NAME, data);
    
    return sizeof(uint32_t);
}

// 写入设备
static ssize_t zynq_example_write(struct file *file, const char __user *buf, 
                                size_t count, loff_t *offset)
{
    struct zynq_example_dev *dev = (struct zynq_example_dev *)file->private_data;
    uint32_t data;
    uint32_t status;
    
    // 如果请求大小小于4字节,返回错误
    if (count < sizeof(uint32_t))
        return -EINVAL;
    
    // 从用户空间复制
    if (copy_from_user(&data, buf, sizeof(uint32_t)))
        return -EFAULT;
    
    // 写入数据寄存器
    iowrite32(data, dev->base_addr + REG_DATA);
    
    // 检查设备是否忙
    status = ioread32(dev->base_addr + REG_STATUS);
    if (status & STATUS_BUSY) {
        printk(KERN_WARNING "%s: device busy
", DRIVER_NAME);
        return -EBUSY;
    }
    
    // 如果使用中断
    if (dev->has_irq) {
        // 使能中断
        iowrite32(1, dev->base_addr + REG_IRQ_EN);
        
        // 启动操作
        iowrite32(CTRL_START, dev->base_addr + REG_CONTROL);
        
        // 等待中断(可中断)
        if (wait_event_interruptible(dev->wait_queue, 
                                   (ioread32(dev->base_addr + REG_STATUS) & STATUS_DONE))) {
            // 如果被信号中断
            return -ERESTARTSYS;
        }
    } else {
        // 使用轮询方式
        // 启动操作
        iowrite32(CTRL_START, dev->base_addr + REG_CONTROL);
        
        // 轮询等待完成
        while (!((status = ioread32(dev->base_addr + REG_STATUS)) & STATUS_DONE)) {
            // 检查错误
            if (status & STATUS_ERROR) {
                printk(KERN_ERR "%s: operation error
", DRIVER_NAME);
                return -EIO;
            }
            
            // 让出CPU时间片
            schedule();
        }
        
        // 读取结果
        dev->data_value = ioread32(dev->base_addr + REG_DATA);
    }
    
    printk(KERN_INFO "%s: wrote data: 0x%08x, result: 0x%08x
", 
           DRIVER_NAME, data, dev->data_value);
    
    return sizeof(uint32_t);
}

// ioctl处理
static long zynq_example_ioctl(struct file *file, unsigned int cmd, 
                             unsigned long arg)
{
    struct zynq_example_dev *dev = (struct zynq_example_dev *)file->private_data;
    
    switch (cmd) {
        case 0: // 复位设备
            iowrite32(CTRL_RESET, dev->base_addr + REG_CONTROL);
            printk(KERN_INFO "%s: device reset
", DRIVER_NAME);
            return 0;
            
        case 1: // 获取状态
            return ioread32(dev->base_addr + REG_STATUS);
            
        default:
            return -ENOTTY; // 不支持的命令
    }
}

// 定义file_operations结构
static struct file_operations zynq_example_fops = {
    .owner = THIS_MODULE,
    .open = zynq_example_open,
    .release = zynq_example_release,
    .read = zynq_example_read,
    .write = zynq_example_write,
    .unlocked_ioctl = zynq_example_ioctl,
};

// 平台驱动probe函数
static int zynq_example_probe(struct platform_device *pdev)
{
    struct resource *r_mem;
    struct device *dev = &pdev->dev;
    int rc = 0;
    
    // 分配设备结构体
    example_dev = devm_kzalloc(dev, sizeof(*example_dev), GFP_KERNEL);
    if (!example_dev)
        return -ENOMEM;
    
    // 初始化等待队列
    init_waitqueue_head(&example_dev->wait_queue);
    
    // 获取内存资源
    r_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
    if (!r_mem) {
        dev_err(dev, "No memory resource
");
        return -ENODEV;
    }
    
    example_dev->mem_start = r_mem->start;
    example_dev->mem_end = r_mem->end;
    
    // 检查内存区域是否可用
    if (!request_mem_region(example_dev->mem_start,
                           example_dev->mem_end - example_dev->mem_start + 1,
                           DRIVER_NAME)) {
        dev_err(dev, "Memory region already in use
");
        return -EBUSY;
    }
    
    // 映射寄存器空间
    example_dev->base_addr = ioremap(example_dev->mem_start,
                                   example_dev->mem_end - example_dev->mem_start + 1);
    if (!example_dev->base_addr) {
        dev_err(dev, "Cannot map registers
");
        rc = -ENOMEM;
        goto err_release_mem_region;
    }
    
    // 获取中断资源
    example_dev->irq = platform_get_irq(pdev, 0);
    if (example_dev->irq >= 0) {
        // 请求中断
        rc = request_irq(example_dev->irq, zynq_example_isr, 0, DRIVER_NAME, example_dev);
        if (rc) {
            dev_err(dev, "Cannot register interrupt: %d
", rc);
            example_dev->has_irq = false;
        } else {
            example_dev->has_irq = true;
            dev_info(dev, "Registered interrupt %d
", example_dev->irq);
        }
    } else {
        dev_info(dev, "No IRQ resource, using polling mode
");
        example_dev->has_irq = false;
    }
    
    // 分配设备号
    rc = alloc_chrdev_region(&example_dev->devt, 0, 1, DRIVER_NAME);
    if (rc) {
        dev_err(dev, "Failed to allocate char device region
");
        goto err_unmap;
    }
    
    example_dev->major = MAJOR(example_dev->devt);
    example_dev->minor = MINOR(example_dev->devt);
    
    // 创建设备类
    example_dev->class = class_create(THIS_MODULE, DRIVER_CLASS);
    if (IS_ERR(example_dev->class)) {
        rc = PTR_ERR(example_dev->class);
        dev_err(dev, "Failed to create class
");
        goto err_unregister_chrdev;
    }
    
    // 初始化字符设备
    cdev_init(&example_dev->cdev, &zynq_example_fops);
    example_dev->cdev.owner = THIS_MODULE;
    
    // 添加字符设备
    rc = cdev_add(&example_dev->cdev, example_dev->devt, 1);
    if (rc) {
        dev_err(dev, "Failed to add char device
");
        goto err_class_destroy;
    }
    
    // 创建设备节点
    example_dev->device = device_create(example_dev->class, NULL, 
                                      example_dev->devt, NULL, 
                                      DRIVER_NAME);
    if (IS_ERR(example_dev->device)) {
        rc = PTR_ERR(example_dev->device);
        dev_err(dev, "Failed to create device
");
        goto err_cdev_del;
    }
    
    // 初始化设备 - 复位
    iowrite32(CTRL_RESET, example_dev->base_addr + REG_CONTROL);
    
    dev_info(dev, "Initialized at 0x%lx, IRQ: %d
", 
             example_dev->mem_start, example_dev->irq);
    
    return 0;

err_cdev_del:
    cdev_del(&example_dev->cdev);
err_class_destroy:
    class_destroy(example_dev->class);
err_unregister_chrdev:
    unregister_chrdev_region(example_dev->devt, 1);
err_unmap:
    iounmap(example_dev->base_addr);
    if (example_dev->has_irq)
        free_irq(example_dev->irq, example_dev);
err_release_mem_region:
    release_mem_region(example_dev->mem_start, 
                      example_dev->mem_end - example_dev->mem_start + 1);
    
    return rc;
}

// 平台驱动remove函数
static int zynq_example_remove(struct platform_device *pdev)
{
    // 删除设备节点
    device_destroy(example_dev->class, example_dev->devt);
    
    // 删除字符设备
    cdev_del(&example_dev->cdev);
    
    // 销毁类
    class_destroy(example_dev->class);
    
    // 释放设备号
    unregister_chrdev_region(example_dev->devt, 1);
    
    // 如果使用了中断,释放
    if (example_dev->has_irq)
        free_irq(example_dev->irq, example_dev);
    
    // 取消内存映射
    iounmap(example_dev->base_addr);
    
    // 释放内存区域
    release_mem_region(example_dev->mem_start, 
                      example_dev->mem_end - example_dev->mem_start + 1);
    
    return 0;
}

// 设备树匹配表
static const struct of_device_id zynq_example_of_match[] = {
    { .compatible = "vendor,zynq-example-1.00.a" },
    { /* end of list */ },
};
MODULE_DEVICE_TABLE(of, zynq_example_of_match);

// 平台驱动结构
static struct platform_driver zynq_example_driver = {
    .driver = {
        .name = DRIVER_NAME,
        .owner = THIS_MODULE,
        .of_match_table = zynq_example_of_match,
    },
    .probe = zynq_example_probe,
    .remove = zynq_example_remove,
};

// 模块初始化
static int __init zynq_example_init(void)
{
    int rc;
    
    printk(KERN_INFO "%s: Init module
", DRIVER_NAME);
    
    // 注册平台驱动
    rc = platform_driver_register(&zynq_example_driver);
    if (rc) {
        printk(KERN_ERR "%s: Failed to register driver
", DRIVER_NAME);
        return rc;
    }
    
    return 0;
}

// 模块退出
static void __exit zynq_example_exit(void)
{
    printk(KERN_INFO "%s: Exit module
", DRIVER_NAME);
    
    // 注销平台驱动
    platform_driver_unregister(&zynq_example_driver);
}

module_init(zynq_example_init);
module_exit(zynq_example_exit);

MODULE_AUTHOR("Your Name");
MODULE_DESCRIPTION("Zynq Example Driver");
MODULE_LICENSE("GPL");
MODULE_VERSION("1.0");

7.2 UIO与用户空间驱动

c

/**
 * custom_uio_config.c - UIO驱动配置示例
 *
 * 演示如何配置UIO驱动来允许用户空间直接访问设备
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/io.h>
#include <linux/of.h>
#include <linux/uio_driver.h>

#define DRIVER_NAME "custom_uio"

// 驱动数据结构
struct custom_uio_data {
    struct uio_info *info;
    void __iomem *base_addr;
    unsigned long mem_start;
    unsigned long mem_end;
    int irq;
};

// 中断处理函数
static irqreturn_t custom_uio_handler(int irq, struct uio_info *info)
{
    struct custom_uio_data *data = info->priv;
    
    // 在这里禁用设备中断,以避免中断风暴
    // 此处假设寄存器偏移0x04控制中断使能
    iowrite32(0, data->base_addr + 0x04);
    
    // 返回UIO_IRQ_HANDLED表示我们处理了中断
    // 这会通知UIO框架唤醒等待中断的用户空间进程
    return IRQ_HANDLED;
}

// 平台驱动probe函数
static int custom_uio_probe(struct platform_device *pdev)
{
    struct resource *r_mem;
    struct custom_uio_data *data;
    struct uio_info *info;
    int ret = 0;
    struct device *dev = &pdev->dev;
    
    dev_info(dev, "Probing custom UIO device
");
    
    // 分配私有数据结构
    data = devm_kzalloc(dev, sizeof(struct custom_uio_data), GFP_KERNEL);
    if (!data)
        return -ENOMEM;
    
    // 分配UIO信息结构
    info = devm_kzalloc(dev, sizeof(struct uio_info), GFP_KERNEL);
    if (!info)
        return -ENOMEM;
    
    // 获取内存资源
    r_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
    if (!r_mem) {
        dev_err(dev, "No memory resource
");
        return -ENODEV;
    }
    
    data->mem_start = r_mem->start;
    data->mem_end = r_mem->end;
    
    // 检查并请求内存区域
    if (!request_mem_region(data->mem_start, 
                          data->mem_end - data->mem_start + 1,
                          dev_name(dev))) {
        dev_err(dev, "Memory region already in use
");
        return -EBUSY;
    }
    
    // 映射寄存器空间
    data->base_addr = ioremap(data->mem_start, 
                             data->mem_end - data->mem_start + 1);
    if (!data->base_addr) {
        dev_err(dev, "Cannot map registers
");
        ret = -ENOMEM;
        goto err_release_mem_region;
    }
    
    // 获取中断资源
    data->irq = platform_get_irq(pdev, 0);
    if (data->irq < 0) {
        dev_err(dev, "No IRQ resource
");
        ret = data->irq;
        goto err_unmap;
    }
    
    // 设置UIO信息
    info->name = dev_name(dev);
    info->version = "1.0";
    info->mem[0].name = "registers";
    info->mem[0].addr = data->mem_start;
    info->mem[0].size = data->mem_end - data->mem_start + 1;
    info->mem[0].memtype = UIO_MEM_PHYS;
    
    info->irq = data->irq;
    info->irq_flags = IRQF_SHARED;
    info->handler = custom_uio_handler;
    info->priv = data;
    
    // 注册UIO设备
    ret = uio_register_device(dev, info);
    if (ret) {
        dev_err(dev, "Unable to register UIO device
");
        goto err_unmap;
    }
    
    // 保存私有数据
    data->info = info;
    platform_set_drvdata(pdev, data);
    
    dev_info(dev, "UIO device registered, IRQ: %d
", data->irq);
    
    return 0;
    
err_unmap:
    iounmap(data->base_addr);
err_release_mem_region:
    release_mem_region(data->mem_start, 
                      data->mem_end - data->mem_start + 1);
    
    return ret;
}

// 平台驱动remove函数
static int custom_uio_remove(struct platform_device *pdev)
{
    struct custom_uio_data *data = platform_get_drvdata(pdev);
    
    // 注销UIO设备
    uio_unregister_device(data->info);
    
    // 取消内存映射
    iounmap(data->base_addr);
    
    // 释放内存区域
    release_mem_region(data->mem_start, 
                      data->mem_end - data->mem_start + 1);
    
    return 0;
}

// 设备树匹配表
static const struct of_device_id custom_uio_of_match[] = {
    { .compatible = "vendor,custom-uio-1.0" },
    { /* end of list */ },
};
MODULE_DEVICE_TABLE(of, custom_uio_of_match);

// 平台驱动结构
static struct platform_driver custom_uio_driver = {
    .driver = {
        .name = DRIVER_NAME,
        .owner = THIS_MODULE,
        .of_match_table = custom_uio_of_match,
    },
    .probe = custom_uio_probe,
    .remove = custom_uio_remove,
};

// 模块初始化
static int __init custom_uio_init(void)
{
    int ret;
    
    ret = platform_driver_register(&custom_uio_driver);
    if (ret) {
        pr_err("Failed to register driver
");
        return ret;
    }
    
    return 0;
}

// 模块退出
static void __exit custom_uio_exit(void)
{
    platform_driver_unregister(&custom_uio_driver);
}

module_init(custom_uio_init);
module_exit(custom_uio_exit);

MODULE_AUTHOR("Your Name");
MODULE_DESCRIPTION("Custom UIO Driver");
MODULE_LICENSE("GPL");
MODULE_VERSION("1.0");

以下是使用上述UIO驱动的用户空间程序示例:

c

/**
 * uio_example.c - UIO用户空间程序示例
 *
 * 演示如何在用户空间中使用UIO驱动访问设备
 */

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <poll.h>
#include <stdint.h>

// 寄存器偏移
#define REG_CONTROL   0x00
#define REG_INT_EN    0x04
#define REG_STATUS    0x08
#define REG_DATA      0x0C

// 控制寄存器位
#define CTRL_START    0x01
#define CTRL_RESET    0x02

// 状态寄存器位
#define STATUS_DONE   0x01
#define STATUS_BUSY   0x02
#define STATUS_ERROR  0x04

int main(int argc, char **argv)
{
    int fd;
    void *map_base;
    uint32_t value;
    int ret;
    struct pollfd pfd;
    
    // 打开UIO设备
    fd = open("/dev/uio0", O_RDWR);
    if (fd < 0) {
        perror("Failed to open /dev/uio0");
        return EXIT_FAILURE;
    }
    
    printf("UIO device opened successfully
");
    
    // 映射寄存器空间
    map_base = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
    if (map_base == MAP_FAILED) {
        perror("Failed to map device memory");
        close(fd);
        return EXIT_FAILURE;
    }
    
    printf("Device memory mapped at %p
", map_base);
    
    // 读取初始状态
    value = *((volatile uint32_t *)(map_base + REG_STATUS));
    printf("Initial status: 0x%08x
", value);
    
    // 复位设备
    *((volatile uint32_t *)(map_base + REG_CONTROL)) = CTRL_RESET;
    usleep(10000);  // 等待10ms
    
    // 检查状态
    value = *((volatile uint32_t *)(map_base + REG_STATUS));
    printf("Status after reset: 0x%08x
", value);
    
    // 写入数据
    *((volatile uint32_t *)(map_base + REG_DATA)) = 0x12345678;
    printf("Data written: 0x12345678
");
    
    // 使能中断
    *((volatile uint32_t *)(map_base + REG_INT_EN)) = 1;
    
    // 启动操作
    *((volatile uint32_t *)(map_base + REG_CONTROL)) = CTRL_START;
    printf("Operation started
");
    
    // 等待中断
    pfd.fd = fd;
    pfd.events = POLLIN;
    
    printf("Waiting for interrupt...
");
    ret = poll(&pfd, 1, 5000);  // 5秒超时
    
    if (ret > 0) {
        // 读取中断计数,这会重置中断状态
        // 这是UIO框架的要求
        uint32_t interrupt_count;
        if (read(fd, &interrupt_count, sizeof(interrupt_count)) != sizeof(interrupt_count)) {
            perror("Error reading interrupt count");
        }
        
        printf("Interrupt received, count: %u
", interrupt_count);
        
        // 读取结果
        value = *((volatile uint32_t *)(map_base + REG_DATA));
        printf("Result: 0x%08x
", value);
        
        // 重新使能中断(这通常应该由硬件完成,但是为了安全起见)
        *((volatile uint32_t *)(map_base + REG_INT_EN)) = 1;
    } else if (ret == 0) {
        printf("Timeout waiting for interrupt
");
    } else {
        perror("Error in poll");
    }
    
    // 清理
    munmap(map_base, 4096);
    close(fd);
    
    printf("Test completed
");
    
    return EXIT_SUCCESS;
}

7.3 DMA驱动开发

c

/**
 * zynq_dma_driver.c - Zynq AXI DMA驱动示例
 *
 * 演示如何开发Zynq平台上的DMA驱动
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/of_platform.h>
#include <linux/of_irq.h>
#include <linux/of_dma.h>
#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
#include <linux/wait.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/sched.h>
#include <linux/platform_device.h>
#include <linux/uaccess.h>

// 驱动信息
#define DRIVER_NAME     "zynq_dma"
#define DRIVER_CLASS    "zynqdma"
#define TX_CHANNEL_NAME "dma_tx"
#define RX_CHANNEL_NAME "dma_rx"

// 设备操作代码
#define DMA_IOC_MAGIC       'D'
#define DMA_IOC_RESET       _IO(DMA_IOC_MAGIC, 0)
#define DMA_IOC_START_TX    _IOW(DMA_IOC_MAGIC, 1, unsigned long)
#define DMA_IOC_START_RX    _IOW(DMA_IOC_MAGIC, 2, unsigned long)
#define DMA_IOC_SYNC        _IO(DMA_IOC_MAGIC, 3)
#define DMA_IOC_ALLOC       _IOWR(DMA_IOC_MAGIC, 4, unsigned long)
#define DMA_IOC_FREE        _IOW(DMA_IOC_MAGIC, 5, unsigned long)

// 设备方向
#define DMA_DEV_TX 0
#define DMA_DEV_RX 1

// 缓冲区分配请求结构
struct dma_buf_req {
    size_t size;
    void *virt_addr;
    dma_addr_t phys_addr;
};

// 驱动私有数据
struct zynq_dma_dev {
    struct cdev cdev;
    struct class *class;
    struct device *device;
    dev_t devt;
    int major;
    int minor;
    
    struct dma_chan *tx_chan;  // TX DMA通道
    struct dma_chan *rx_chan;  // RX DMA通道
    
    struct completion tx_cmp;  // TX完成事件
    struct completion rx_cmp;  // RX完成事件
    
    dma_cookie_t tx_cookie;
    dma_cookie_t rx_cookie;
    
    int irq;
    int dev_direction;  // 设备方向(TX或RX)
    
    // DMA缓冲区
    void *buffer;
    dma_addr_t buffer_dma;
    size_t buffer_size;
    
    // DMA配置
    struct dma_slave_config tx_conf;
    struct dma_slave_config rx_conf;
};

// 全局设备结构体
static struct zynq_dma_dev *tx_dev;
static struct zynq_dma_dev *rx_dev;

// DMA TX完成回调
static void dma_tx_callback(void *data)
{
    struct zynq_dma_dev *dev = (struct zynq_dma_dev *)data;
    
    pr_info("%s: TX transfer completed
", DRIVER_NAME);
    complete(&dev->tx_cmp);
}

// DMA RX完成回调
static void dma_rx_callback(void *data)
{
    struct zynq_dma_dev *dev = (struct zynq_dma_dev *)data;
    
    pr_info("%s: RX transfer completed
", DRIVER_NAME);
    complete(&dev->rx_cmp);
}

// 打开设备
static int zynq_dma_open(struct inode *inode, struct file *file)
{
    struct zynq_dma_dev *dev = container_of(inode->i_cdev, struct zynq_dma_dev, cdev);
    file->private_data = dev;
    
    pr_info("%s: device opened
", DRIVER_NAME);
    return 0;
}

// 关闭设备
static int zynq_dma_release(struct inode *inode, struct file *file)
{
    pr_info("%s: device closed
", DRIVER_NAME);
    return 0;
}

// 从DMA读取数据
static ssize_t zynq_dma_read(struct file *file, char __user *buf, 
                           size_t count, loff_t *offset)
{
    struct zynq_dma_dev *dev = (struct zynq_dma_dev *)file->private_data;
    int ret;
    
    // 只允许读取RX设备
    if (dev->dev_direction != DMA_DEV_RX) {
        pr_err("%s: Cannot read from TX device
", DRIVER_NAME);
        return -EINVAL;
    }
    
    // 检查缓冲区是否已分配
    if (!dev->buffer) {
        pr_err("%s: DMA buffer not allocated
", DRIVER_NAME);
        return -ENOMEM;
    }
    
    // 检查大小
    if (count > dev->buffer_size) {
        count = dev->buffer_size;
    }
    
    // 将数据从DMA缓冲区复制到用户空间
    if (copy_to_user(buf, dev->buffer, count)) {
        pr_err("%s: Failed to copy data to user
", DRIVER_NAME);
        return -EFAULT;
    }
    
    pr_info("%s: read %zu bytes
", DRIVER_NAME, count);
    
    return count;
}

// 写入数据到DMA
static ssize_t zynq_dma_write(struct file *file, const char __user *buf, 
                            size_t count, loff_t *offset)
{
    struct zynq_dma_dev *dev = (struct zynq_dma_dev *)file->private_data;
    int ret;
    
    // 只允许写入TX设备
    if (dev->dev_direction != DMA_DEV_TX) {
        pr_err("%s: Cannot write to RX device
", DRIVER_NAME);
        return -EINVAL;
    }
    
    // 检查缓冲区是否已分配
    if (!dev->buffer) {
        pr_err("%s: DMA buffer not allocated
", DRIVER_NAME);
        return -ENOMEM;
    }
    
    // 检查大小
    if (count > dev->buffer_size) {
        count = dev->buffer_size;
    }
    
    // 从用户空间复制数据到DMA缓冲区
    if (copy_from_user(dev->buffer, buf, count)) {
        pr_err("%s: Failed to copy data from user
", DRIVER_NAME);
        return -EFAULT;
    }
    
    pr_info("%s: wrote %zu bytes
", DRIVER_NAME, count);
    
    return count;
}

// 启动DMA传输
static int start_dma_transfer(struct zynq_dma_dev *dev, size_t size, int direction)
{
    struct dma_async_tx_descriptor *desc;
    struct dma_chan *chan;
    enum dma_transfer_direction dma_dir;
    dma_cookie_t *cookie;
    
    if (!dev->buffer) {
        pr_err("%s: DMA buffer not allocated
", DRIVER_NAME);
        return -ENOMEM;
    }
    
    if (size > dev->buffer_size) {
        pr_warn("%s: Size too large, truncating to %zu
", DRIVER_NAME, dev->buffer_size);
        size = dev->buffer_size;
    }
    
    // 根据方向选择通道和配置
    if (direction == DMA_DEV_TX) {
        chan = dev->tx_chan;
        dma_dir = DMA_MEM_TO_DEV;
        cookie = &dev->tx_cookie;
        
        // 配置TX通道
        if (dmaengine_slave_config(chan, &dev->tx_conf)) {
            pr_err("%s: Failed to configure TX channel
", DRIVER_NAME);
            return -EINVAL;
        }
        
        // 准备内存到设备传输
        desc = dmaengine_prep_slave_single(chan, dev->buffer_dma, size, 
                                          dma_dir, DMA_PREP_INTERRUPT);
    } else {
        chan = dev->rx_chan;
        dma_dir = DMA_DEV_TO_MEM;
        cookie = &dev->rx_cookie;
        
        // 配置RX通道
        if (dmaengine_slave_config(chan, &dev->rx_conf)) {
            pr_err("%s: Failed to configure RX channel
", DRIVER_NAME);
            return -EINVAL;
        }
        
        // 准备设备到内存传输
        desc = dmaengine_prep_slave_single(chan, dev->buffer_dma, size, 
                                          dma_dir, DMA_PREP_INTERRUPT);
    }
    
    if (!desc) {
        pr_err("%s: Failed to prepare DMA transfer
", DRIVER_NAME);
        return -ENOMEM;
    }
    
    // 设置完成回调
    if (direction == DMA_DEV_TX) {
        desc->callback = dma_tx_callback;
        desc->callback_param = dev;
        reinit_completion(&dev->tx_cmp);
    } else {
        desc->callback = dma_rx_callback;
        desc->callback_param = dev;
        reinit_completion(&dev->rx_cmp);
    }
    
    // 提交传输
    *cookie = dmaengine_submit(desc);
    if (dma_submit_error(*cookie)) {
        pr_err("%s: Failed to submit DMA transfer
", DRIVER_NAME);
        return -ENOMEM;
    }
    
    // 开始传输
    dma_async_issue_pending(chan);
    
    pr_info("%s: Started %s transfer of %zu bytes
", DRIVER_NAME, 
           (direction == DMA_DEV_TX) ? "TX" : "RX", size);
    
    return 0;
}

// 等待DMA传输完成
static int wait_dma_transfer(struct zynq_dma_dev *dev, int direction)
{
    struct dma_chan *chan;
    dma_cookie_t cookie;
    unsigned long timeout;
    enum dma_status status;
    
    // 根据方向选择通道和cookie
    if (direction == DMA_DEV_TX) {
        chan = dev->tx_chan;
        cookie = dev->tx_cookie;
        timeout = wait_for_completion_timeout(&dev->tx_cmp, 
                                            msecs_to_jiffies(5000));  // 5秒超时
    } else {
        chan = dev->rx_chan;
        cookie = dev->rx_cookie;
        timeout = wait_for_completion_timeout(&dev->rx_cmp, 
                                            msecs_to_jiffies(5000));  // 5秒超时
    }
    
    if (timeout == 0) {
        pr_err("%s: Timeout waiting for DMA transfer
", DRIVER_NAME);
        return -ETIMEDOUT;
    }
    
    // 检查DMA状态
    status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
    if (status != DMA_COMPLETE) {
        pr_err("%s: DMA transfer failed or incomplete
", DRIVER_NAME);
        return -EIO;
    }
    
    pr_info("%s: DMA transfer completed successfully
", DRIVER_NAME);
    
    return 0;
}

// ioctl设备控制
static long zynq_dma_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
    struct zynq_dma_dev *dev = (struct zynq_dma_dev *)file->private_data;
    int ret = 0;
    size_t size;
    struct dma_buf_req buf_req;
    
    switch (cmd) {
        case DMA_IOC_RESET:
            pr_info("%s: Resetting DMA channel
", DRIVER_NAME);
            
            if (dev->dev_direction == DMA_DEV_TX && dev->tx_chan) {
                dmaengine_terminate_all(dev->tx_chan);
            } else if (dev->dev_direction == DMA_DEV_RX && dev->rx_chan) {
                dmaengine_terminate_all(dev->rx_chan);
            }
            break;
            
        case DMA_IOC_START_TX:
            pr_info("%s: Starting TX transfer
", DRIVER_NAME);
            
            // 获取传输大小
            if (copy_from_user(&size, (void __user *)arg, sizeof(size))) {
                pr_err("%s: Failed to copy size from user
", DRIVER_NAME);
                return -EFAULT;
            }
            
            // 只在TX设备上允许TX传输
            if (dev->dev_direction != DMA_DEV_TX) {
                pr_err("%s: Cannot start TX on RX device
", DRIVER_NAME);
                return -EINVAL;
            }
            
            return start_dma_transfer(dev, size, DMA_DEV_TX);
            
        case DMA_IOC_START_RX:
            pr_info("%s: Starting RX transfer
", DRIVER_NAME);
            
            // 获取传输大小
            if (copy_from_user(&size, (void __user *)arg, sizeof(size))) {
                pr_err("%s: Failed to copy size from user
", DRIVER_NAME);
                return -EFAULT;
            }
            
            // 只在RX设备上允许RX传输
            if (dev->dev_direction != DMA_DEV_RX) {
                pr_err("%s: Cannot start RX on TX device
", DRIVER_NAME);
                return -EINVAL;
            }
            
            return start_dma_transfer(dev, size, DMA_DEV_RX);
            
        case DMA_IOC_SYNC:
            pr_info("%s: Waiting for DMA completion
", DRIVER_NAME);
            
            if (dev->dev_direction == DMA_DEV_TX) {
                return wait_dma_transfer(dev, DMA_DEV_TX);
            } else {
                return wait_dma_transfer(dev, DMA_DEV_RX);
            }
            
        case DMA_IOC_ALLOC:
            pr_info("%s: Allocating DMA buffer
", DRIVER_NAME);
            
            // 获取用户请求
            if (copy_from_user(&buf_req, (void __user *)arg, sizeof(buf_req))) {
                pr_err("%s: Failed to copy buffer request from user
", DRIVER_NAME);
                return -EFAULT;
            }
            
            // 释放之前的缓冲区(如果存在)
            if (dev->buffer) {
                dma_free_coherent(&dev->device->dev, dev->buffer_size, 
                                 dev->buffer, dev->buffer_dma);
                dev->buffer = NULL;
            }
            
            // 分配DMA一致性缓冲区
            dev->buffer = dma_alloc_coherent(&dev->device->dev, buf_req.size, 
                                           &dev->buffer_dma, GFP_KERNEL);
            if (!dev->buffer) {
                pr_err("%s: Failed to allocate DMA buffer
", DRIVER_NAME);
                return -ENOMEM;
            }
            
            dev->buffer_size = buf_req.size;
            
            // 返回虚拟地址和物理地址给用户
            buf_req.virt_addr = dev->buffer;
            buf_req.phys_addr = dev->buffer_dma;
            
            if (copy_to_user((void __user *)arg, &buf_req, sizeof(buf_req))) {
                pr_err("%s: Failed to copy buffer info to user
", DRIVER_NAME);
                dma_free_coherent(&dev->device->dev, dev->buffer_size, 
                                 dev->buffer, dev->buffer_dma);
                dev->buffer = NULL;
                return -EFAULT;
            }
            break;
            
        case DMA_IOC_FREE:
            pr_info("%s: Freeing DMA buffer
", DRIVER_NAME);
            
            if (dev->buffer) {
                dma_free_coherent(&dev->device->dev, dev->buffer_size, 
                                 dev->buffer, dev->buffer_dma);
                dev->buffer = NULL;
                dev->buffer_size = 0;
            }
            break;
            
        default:
            return -ENOTTY;  // 不支持的命令
    }
    
    return ret;
}

// 映射DMA缓冲区到用户空间
static int zynq_dma_mmap(struct file *file, struct vm_area_struct *vma)
{
    struct zynq_dma_dev *dev = (struct zynq_dma_dev *)file->private_data;
    unsigned long size = vma->vm_end - vma->vm_start;
    
    // 检查缓冲区是否已分配
    if (!dev->buffer) {
        pr_err("%s: DMA buffer not allocated
", DRIVER_NAME);
        return -ENOMEM;
    }
    
    // 检查大小
    if (size > dev->buffer_size) {
        pr_err("%s: Requested mapping size too large
", DRIVER_NAME);
        return -EINVAL;
    }
    
    // 设置VM标志
    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
    
    // 执行重映射
    if (remap_pfn_range(vma, vma->vm_start, 
                       __phys_to_pfn(dev->buffer_dma),
                       size, vma->vm_page_prot)) {
        pr_err("%s: Failed to map buffer to userspace
", DRIVER_NAME);
        return -EAGAIN;
    }
    
    pr_info("%s: mmap succeeded
", DRIVER_NAME);
    
    return 0;
}

// 文件操作结构
static const struct file_operations zynq_dma_fops = {
    .owner          = THIS_MODULE,
    .open           = zynq_dma_open,
    .release        = zynq_dma_release,
    .read           = zynq_dma_read,
    .write          = zynq_dma_write,
    .unlocked_ioctl = zynq_dma_ioctl,
    .mmap           = zynq_dma_mmap,
};

// 平台驱动probe函数
static int zynq_dma_probe(struct platform_device *pdev)
{
    struct zynq_dma_dev *tx_dev_local;
    struct zynq_dma_dev *rx_dev_local;
    struct device *dev = &pdev->dev;
    struct device_node *node = pdev->dev.of_node;
    struct resource *res;
    struct dma_chan *tx_chan, *rx_chan;
    dma_cap_mask_t mask;
    int rc = 0;
    dev_t devt;
    
    pr_info("%s: Probing device
", DRIVER_NAME);
    
    // 验证设备树是否存在
    if (!node) {
        dev_err(dev, "No device tree node found
");
        return -ENODEV;
    }
    
    // 分配TX设备结构体
    tx_dev_local = devm_kzalloc(dev, sizeof(*tx_dev_local), GFP_KERNEL);
    if (!tx_dev_local) {
        return -ENOMEM;
    }
    
    // 分配RX设备结构体
    rx_dev_local = devm_kzalloc(dev, sizeof(*rx_dev_local), GFP_KERNEL);
    if (!rx_dev_local) {
        return -ENOMEM;
    }
    
    // 为TX和RX设备分配两个次设备号
    rc = alloc_chrdev_region(&devt, 0, 2, DRIVER_NAME);
    if (rc) {
        dev_err(dev, "Failed to allocate char device region
");
        return rc;
    }
    
    // 创建设备类
    tx_dev_local->class = class_create(THIS_MODULE, DRIVER_CLASS);
    if (IS_ERR(tx_dev_local->class)) {
        rc = PTR_ERR(tx_dev_local->class);
        dev_err(dev, "Failed to create class
");
        goto err_unregister_chrdev;
    }
    
    // 设置设备号
    tx_dev_local->major = MAJOR(devt);
    tx_dev_local->minor = MINOR(devt);
    rx_dev_local->major = MAJOR(devt);
    rx_dev_local->minor = MINOR(devt) + 1;
    
    // 设置设备方向
    tx_dev_local->dev_direction = DMA_DEV_TX;
    rx_dev_local->dev_direction = DMA_DEV_RX;
    
    // 共享相同的类
    rx_dev_local->class = tx_dev_local->class;
    
    // 初始化TX设备
    cdev_init(&tx_dev_local->cdev, &zynq_dma_fops);
    tx_dev_local->cdev.owner = THIS_MODULE;
    tx_dev_local->devt = MKDEV(tx_dev_local->major, tx_dev_local->minor);
    
    rc = cdev_add(&tx_dev_local->cdev, tx_dev_local->devt, 1);
    if (rc) {
        dev_err(dev, "Failed to add TX char device
");
        goto err_class_destroy;
    }
    
    tx_dev_local->device = device_create(tx_dev_local->class, NULL, 
                                       tx_dev_local->devt, NULL, 
                                       "%s_%s", DRIVER_NAME, TX_CHANNEL_NAME);
    if (IS_ERR(tx_dev_local->device)) {
        rc = PTR_ERR(tx_dev_local->device);
        dev_err(dev, "Failed to create TX device
");
        goto err_tx_cdev_del;
    }
    
    // 初始化RX设备
    cdev_init(&rx_dev_local->cdev, &zynq_dma_fops);
    rx_dev_local->cdev.owner = THIS_MODULE;
    rx_dev_local->devt = MKDEV(rx_dev_local->major, rx_dev_local->minor);
    
    rc = cdev_add(&rx_dev_local->cdev, rx_dev_local->devt, 1);
    if (rc) {
        dev_err(dev, "Failed to add RX char device
");
        goto err_tx_device_destroy;
    }
    
    rx_dev_local->device = device_create(rx_dev_local->class, NULL, 
                                       rx_dev_local->devt, NULL, 
                                       "%s_%s", DRIVER_NAME, RX_CHANNEL_NAME);
    if (IS_ERR(rx_dev_local->device)) {
        rc = PTR_ERR(rx_dev_local->device);
        dev_err(dev, "Failed to create RX device
");
        goto err_rx_cdev_del;
    }
    
    // 获取DMA通道
    dma_cap_zero(mask);
    dma_cap_set(DMA_SLAVE, mask);
    dma_cap_set(DMA_PRIVATE, mask);
    
    // 获取TX通道
    tx_chan = dma_request_slave_channel_reason(dev, "tx");
    if (IS_ERR(tx_chan)) {
        dev_err(dev, "TX DMA channel request failed
");
        rc = PTR_ERR(tx_chan);
        goto err_rx_device_destroy;
    }
    
    // 获取RX通道
    rx_chan = dma_request_slave_channel_reason(dev, "rx");
    if (IS_ERR(rx_chan)) {
        dev_err(dev, "RX DMA channel request failed
");
        rc = PTR_ERR(rx_chan);
        goto err_tx_chan_release;
    }
    
    // 设置DMA通道
    tx_dev_local->tx_chan = tx_chan;
    rx_dev_local->rx_chan = rx_chan;
    
    // 初始化完成变量
    init_completion(&tx_dev_local->tx_cmp);
    init_completion(&rx_dev_local->rx_cmp);
    
    // 配置DMA通道
    tx_dev_local->tx_conf.direction = DMA_MEM_TO_DEV;
    tx_dev_local->tx_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
    
    rx_dev_local->rx_conf.direction = DMA_DEV_TO_MEM;
    rx_dev_local->rx_conf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
    
    // 保存设备指针
    tx_dev = tx_dev_local;
    rx_dev = rx_dev_local;
    
    platform_set_drvdata(pdev, tx_dev);
    
    pr_info("%s: DMA driver initialized successfully
", DRIVER_NAME);
    
    return 0;
    
err_tx_chan_release:
    dma_release_channel(tx_chan);
err_rx_device_destroy:
    device_destroy(rx_dev_local->class, rx_dev_local->devt);
err_rx_cdev_del:
    cdev_del(&rx_dev_local->cdev);
err_tx_device_destroy:
    device_destroy(tx_dev_local->class, tx_dev_local->devt);
err_tx_cdev_del:
    cdev_del(&tx_dev_local->cdev);
err_class_destroy:
    class_destroy(tx_dev_local->class);
err_unregister_chrdev:
    unregister_chrdev_region(devt, 2);
    
    return rc;
}

// 平台驱动remove函数
static int zynq_dma_remove(struct platform_device *pdev)
{
    // 释放资源
    if (tx_dev) {
        // 释放DMA缓冲区
        if (tx_dev->buffer) {
            dma_free_coherent(&tx_dev->device->dev, tx_dev->buffer_size, 
                             tx_dev->buffer, tx_dev->buffer_dma);
        }
        
        // 删除TX设备
        device_destroy(tx_dev->class, tx_dev->devt);
        cdev_del(&tx_dev->cdev);
        
        // 释放TX通道
        if (tx_dev->tx_chan) {
            dma_release_channel(tx_dev->tx_chan);
        }
    }
    
    if (rx_dev) {
        // 释放DMA缓冲区
        if (rx_dev->buffer) {
            dma_free_coherent(&rx_dev->device->dev, rx_dev->buffer_size, 
                             rx_dev->buffer, rx_dev->buffer_dma);
        }
        
        // 删除RX设备
        device_destroy(rx_dev->class, rx_dev->devt);
        cdev_del(&rx_dev->cdev);
        
        // 释放RX通道
        if (rx_dev->rx_chan) {
            dma_release_channel(rx_dev->rx_chan);
        }
    }
    
    // 删除类和设备号
    if (tx_dev && tx_dev->class) {
        class_destroy(tx_dev->class);
        unregister_chrdev_region(MKDEV(tx_dev->major, tx_dev->minor), 2);
    }
    
    pr_info("%s: DMA driver removed
", DRIVER_NAME);
    
    return 0;
}

// 设备树匹配表
static const struct of_device_id zynq_dma_of_match[] = {
    { .compatible = "xlnx,axi-dma-1.00.a" },
    { /* end of list */ },
};
MODULE_DEVICE_TABLE(of, zynq_dma_of_match);

// 平台驱动结构
static struct platform_driver zynq_dma_driver = {
    .driver = {
        .name = DRIVER_NAME,
        .owner = THIS_MODULE,
        .of_match_table = zynq_dma_of_match,
    },
    .probe = zynq_dma_probe,
    .remove = zynq_dma_remove,
};

// 模块初始化
static int __init zynq_dma_init(void)
{
    pr_info("%s: Initializing module
", DRIVER_NAME);
    return platform_driver_register(&zynq_dma_driver);
}

// 模块退出
static void __exit zynq_dma_exit(void)
{
    pr_info("%s: Exiting module
", DRIVER_NAME);
    platform_driver_unregister(&zynq_dma_driver);
}

module_init(zynq_dma_init);
module_exit(zynq_dma_exit);

MODULE_AUTHOR("Your Name");
MODULE_DESCRIPTION("Zynq AXI DMA Driver Example");
MODULE_LICENSE("GPL");
MODULE_VERSION("1.0");

8. 总结与资源

8.1 Zynq开发最佳实践

PS-PL接口选择

选择合适的AXI接口类型(AXI-Lite/AXI4/AXI-Stream)
对于高带宽应用,考虑使用DMA
适当选择中断或轮询方式

性能优化

使用高性能AXI接口(HP)进行大块数据传输
考虑缓存一致性问题(使用ACP或手动刷新缓存)
使用DMA进行数据传输而不是CPU拷贝
合理划分PS和PL功能以达到最佳性能

代码结构

采用抽象层隔离硬件细节
使用PIMPL模式提高封装性和可维护性
采用线程安全设计

调试技巧

使用ILA(集成逻辑分析仪)调试PL部分
使用SDK调试器和性能分析工具
利用硬件计数器和定时器来衡量性能

系统设计

考虑功耗和热量问题
实现适当的错误处理和恢复机制
规划良好的中断处理架构
考虑设备树的灵活性

8.2 常见问题与解决方案

PS-PL同步问题

使用中断、轮询或同步变量
实现超时机制避免死锁
使用屏障指令确保内存一致性

DMA传输失败

检查缓存一致性和对齐要求
确保缓冲区分配正确
验证中断设置正确

设备树配置错误

仔细检查设备树参数和地址映射
确保中断号正确配置
验证兼容性字符串匹配

Linux驱动加载失败

确保内核版本和驱动兼容
检查依赖模块是否已加载
查看系统日志了解详细错误信息

性能问题

分析数据路径找出瓶颈
考虑使用硬件加速而不是软件处理
优化内存访问模式

8.3 资源与参考

Xilinx/AMD官方资料

Zynq-7000 SoC Technical Reference Manual
UltraScale+ MPSoC Technical Reference Manual
Vivado Design Suite User Guide
Vitis Unified Software Platform Documentation

社区资源

FPGA/Zynq开发者论坛
GitHub上的开源项目示例
Stack Overflow问答

学习资源

AMD/Xilinx培训课程和视频
FPGA和嵌入式系统设计书籍
在线教程和大学课程

Zynq开发是一个结合了硬件设计、嵌入式软件和系统架构的跨学科领域。通过本教程,您应该已经掌握了Zynq平台开发的基础知识和实战技能。随着对这一平台更深入的探索,您将能够开发出更加高效、可靠的嵌入式系统。

© 版权声明
THE END
如果内容对您有所帮助,就支持一下吧!
点赞0 分享
评论 抢沙发

请登录后发表评论

    暂无评论内容