使用 python实现 CNN 并展示图片
以下是一个完整的示例代码,使用 PyTorch 构建 CNN 模型对 CIFAR-10 数据集进行分类,并展示图片及预测结果。
环境准备
确保已安装必要的库:
pip install torch torchvision matplotlib numpy
代码实现
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
# 定义 CNN 模型
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(torch.relu(self.conv1(x)))
x = self.pool(torch.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.fc3(x)
return x
# 数据加载和预处理
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
# 初始化模型、损失函数和优化器
net = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
# 训练模型
for epoch in range(2):
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
inputs, labels = data
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 2000 == 1999:
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
print('Finished Training')
# 展示测试图片及预测结果
def imshow(img):
img = img / 2 + 0.5 # 反归一化
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
# 随机获取一些测试图片
dataiter = iter(testloader)
images, labels = dataiter.next()
# 显示图片
imshow(torchvision.utils.make_grid(images))
# 打印真实标签
print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(4)))
# 进行预测
outputs = net(images)
_, predicted = torch.max(outputs, 1)
# 打印预测结果
print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}' for j in range(4)))
代码说明
CNN 模型结构:包含两个卷积层和三个全连接层,使用 ReLU 激活函数和最大池化。
数据预处理:对 CIFAR-10 数据进行归一化处理。
训练过程:使用交叉熵损失和 SGD 优化器进行模型训练。
结果展示:随机选取测试集中的图片,显示图片并输出真实标签和预测结果。
可视化其他图片的预测结果
可以通过以下代码展示更多图片的预测结果:
# 展示更多测试图片及预测结果
dataiter = iter(testloader)
images, labels = dataiter.next()
# 显示图片
imshow(torchvision.utils.make_grid(images))
# 打印真实标签
print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(4)))
# 进行预测
outputs = net(images)
_, predicted = torch.max(outputs, 1)
# 打印预测结果
print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}' for j in range(4)))
模型保存与加载
保存训练好的模型:
PATH = './cifar_net.pth'
torch.save(net.state_dict(), PATH)
加载模型进行预测:
net = CNN()
net.load_state_dict(torch.load(PATH))
以上代码提供了一个完整的 CNN 实现,包括数据加载、模型训练、结果展示等功能。根据实际需求,可以调整模型结构或参数以获得更好的性能。
整体代码改进版本
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
# 1. 数据加载与预处理
transform = transforms.Compose([
transforms.ToTensor(), # 转为张量
transforms.Normalize((0.5,), (0.5,)) # 归一化到 [-1, 1]
])
# 加载 MNIST 数据集
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)
# 2. 定义 CNN 模型
class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN, self).__init__()
# 定义卷积层
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1) # 输入1通道,输出32通道
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1) # 输入32通道,输出64通道
# 定义全连接层
self.fc1 = nn.Linear(64 * 7 * 7, 128) # 展平后输入到全连接层
self.fc2 = nn.Linear(128, 10) # 10 个类别
def forward(self, x):
x = F.relu(self.conv1(x)) # 第一层卷积 + ReLU
x = F.max_pool2d(x, 2) # 最大池化
x = F.relu(self.conv2(x)) # 第二层卷积 + ReLU
x = F.max_pool2d(x, 2) # 最大池化
x = x.view(-1, 64 * 7 * 7) # 展平
x = F.relu(self.fc1(x)) # 全连接层 + ReLU
x = self.fc2(x) # 最后一层输出
return x
# 创建模型实例
model = SimpleCNN()
# 3. 定义损失函数与优化器
criterion = nn.CrossEntropyLoss() # 多分类交叉熵损失
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# 4. 模型训练
num_epochs = 5
model.train() # 设置模型为训练模式
for epoch in range(num_epochs):
total_loss = 0
for images, labels in train_loader:
outputs = model(images) # 前向传播
loss = criterion(outputs, labels) # 计算损失
optimizer.zero_grad() # 清空梯度
loss.backward() # 反向传播
optimizer.step() # 更新参数
total_loss += loss.item()
print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss / len(train_loader):.4f}")
# 5. 模型测试
model.eval() # 设置模型为评估模式
correct = 0
total = 0
with torch.no_grad(): # 关闭梯度计算
for images, labels in test_loader:
outputs = model(images)
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")
# 6. 可视化测试结果
dataiter = iter(test_loader)
images, labels = next(dataiter)
outputs = model(images)
_, predictions = torch.max(outputs, 1)
fig, axes = plt.subplots(1, 36, figsize=(12, 4))
for i in range(36):
axes[i].imshow(images[i][0], cmap='gray')
axes[i].set_title(f"Label: {labels[i]}
Pred: {predictions[i]}")
axes[i].axis('off')
plt.show()
运行结果
python show_cnn.py
Epoch [1/5], Loss: 0.2375
Epoch [2/5], Loss: 0.0513
Epoch [3/5], Loss: 0.0357
Epoch [4/5], Loss: 0.0278
Epoch [5/5], Loss: 0.0216
Test Accuracy: 99.10%

继续改进,微调
用于对MNIST手写数字数据集进行数字分类:我们可以看到如何使用PyTorch构建一个简单的CNN模型来对MNIST数据集进行数字分类。在这个案例中,我们定义了一个包含一个卷积层、ReLU激活函数、池化层和全连接层的简单CNN模型,然后对模型进行训练和在测试集上进行评估。
Go语言实现的卷积神经网络
以下是一个简单的使用Go语言实现的卷积神经网络(CNN)
导入必要的包和库
package main
import (
"fmt"
"math/rand"
)
创建一个卷积层结构体
type ConvLayer struct {
FilterSize int
NumFilters int
Stride int
Activation func(float64) float64
Weights [][]float64
Biases []float64
}
func NewConvLayer(filterSize, numFilters, stride int, activation func(float64) float64) *ConvLayer {
weights := make([][]float64, numFilters)
for i := range weights {
weights[i] = make([]float64, filterSize*filterSize)
for j := range weights[i] {
weights[i][j] = rand.Float64()
}
}
biases := make([]float64, numFilters)
for i := range biases {
biases[i] = rand.Float64()
}
return &ConvLayer{
FilterSize: filterSize,
NumFilters: numFilters,
Stride: stride,
Activation: activation,
Weights: weights,
Biases: biases,
}
}
创建一个全连接层结构体
type DenseLayer struct {
InputSize int
OutputSize int
Activation func(float64) float64
Weights [][]float64
Biases []float64
}
func NewDenseLayer(inputSize, outputSize int, activation func(float64) float64) *DenseLayer {
weights := make([][]float64, outputSize)
for i := range weights {
weights[i] = make([]float64, inputSize)
for j := range weights[i] {
weights[i][j] = rand.Float64()
}
}
biases := make([]float64, outputSize)
for i := range biases {
biases[i] = rand.Float64()
}
return &DenseLayer{
InputSize: inputSize,
OutputSize: outputSize,
Activation: activation,
Weights: weights,
Biases: biases,
}
}
使用PyTorch加载MNIST数据集中的图像,对所有图像进行归一化处理,并显示图像以及对其进行预测并打印出估计值和真实值的示例代码:
import torch
import torchvision
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
# 加载MNIST数据集
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
test_dataset = datasets.MNIST(root='data/', train=False, transform=transform, download=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=len(test_dataset), shuffle=True)
# 加载模型
model = YourModel() # 请在此处替换为您训练好的模型
# 获取数据及标签
images, labels = next(iter(test_loader))
# 对图像进行预测
outputs = model(images)
_, predicted = torch.max(outputs, 1)
# 将图像进行反归一化处理并转换为NumPy数组
images = (images + 1) / 2
images = images.numpy()
# 展示图像并打印估计值和真实值
for i in range(len(images)):
img = np.transpose(images[i], (1, 2, 0))
plt.imshow(img.squeeze(), cmap='gray')
plt.title(f'Predicted: {predicted[i].item()}, Actual: {labels[i].item()}')
plt.show()
© 版权声明
文章版权归作者所有,未经允许请勿转载。如内容涉嫌侵权,请在本页底部进入<联系我们>进行举报投诉!
THE END














暂无评论内容