1 下载项目
源码:(https://github.com/ultralytics/yolov5)
2 配置环境
创建conda环境
conda activate create -n yolov5 python=3.7
在项目目录下安装包
pip install -r requirements.txt
3090显卡亲测可用版本
pip install torch===1.7.1+cu110 torchvision===0.8.2+cu110 torchaudio===0.7.2 -f https://download.pytorch.org/whl/torch_stable.html
3 准备数据集(WiderPerson转VOC格式)
3.1 创建数据文件夹目录
在下载好的yolov5项目文件夹下,创建文件夹VOCdevkit,在这个文件夹下,再建立文件夹VOC2007,然后在VOC2007下建立三个文件夹分别是:
- JPEGImages:用来存放所有的图片
- Annotations:用来存放图片所对应的标签文件
- ImageSets:然后在Imagesets文件夹下创建文件夹Main,用来存放生成的train.txt, val.txt, trainval.txt, test.txt文件。
3.2 将WiderPerson转为VOC
文件目录
- WiderPerson与VOC2007同级
-
![[YOLOv5]训练WiderPerson数据集](https://pic.songma.com/blogimg/20251106/4ce5abc6ef0840d5b89c253c7e075cc0.jpg)
- 将图片以及标签文件放入相应的文件夹。在VOC2007下创建test.py文件,代码如下(相对路径,无需修改,直接运行),他的作用是划分训练集和验证集,从而生成四个文件,也就是Main文件夹下的train.txt, val.txt, trainval.txt, test.txt。
- WiderPerson文件夹中的trainval.txt是train.txt与val.txt的集合(Annotations中没有test的信息所以不放进来),test.py是下面的转换代码,另外注意将Images文件夹改为images
-
注意000040.jpg.txt和相对应的图片都要删掉,由于是乱码,影响生成xml文件
![[YOLOv5]训练WiderPerson数据集](https://pic.songma.com/blogimg/20251106/48babab0711d4edf9f21a2f26bf8686b.jpg)
转换代码
import os
import numpy as np
import scipy.io as sio
import shutil
from lxml.etree import Element, SubElement, tostring
from xml.dom.minidom import parseString
import cv2
def make_voc_dir():
# labels 目录若不存在,创建labels目录。若存在,则清空目录
if not os.path.exists( ../VOC2007/Annotations ):
os.makedirs( ../VOC2007/Annotations )
if not os.path.exists( ../VOC2007/ImageSets ):
os.makedirs( ../VOC2007/ImageSets )
os.makedirs( ../VOC2007/ImageSets/Main )
if not os.path.exists( ../VOC2007/JPEGImages ):
os.makedirs( ../VOC2007/JPEGImages )
if __name__ == __main__ :
classes = { 1 : pedestrians ,
2 : riders ,
3 : partially ,
4 : ignore ,
5 : crowd }
VOCRoot = ../VOC2007
widerDir = ../WiderPerson # 数据集所在的路径
wider_path = ../WiderPerson/trainval.txt
make_voc_dir()
with open(wider_path, r ) as f:
imgIds = [x for x in f.read().splitlines()]
for imgId in imgIds:
objCount = 0 # 一个标志位,用来判断该img是否包含我们需要的标注
filename = imgId + .jpg
img_path = ../WiderPerson/images/ + filename
print( Img :%s % img_path)
img = cv2.imread(img_path)
width = img.shape[1] # 获取图片尺寸
height = img.shape[0] # 获取图片尺寸 360
node_root = Element( annotation )
node_folder = SubElement(node_root, folder )
node_folder.text = JPEGImages
node_filename = SubElement(node_root, filename )
node_filename.text = VOC2007/JPEGImages/%s % filename
node_size = SubElement(node_root, size )
node_width = SubElement(node_size, width )
node_width.text = %s % width
node_height = SubElement(node_size, height )
node_height.text = %s % height
node_depth = SubElement(node_size, depth )
node_depth.text = 3
label_path = img_path.replace( images , Annotations ) + .txt
with open(label_path) as file:
line = file.readline()
count = int(line.split(
)[0]) # 里面行人个数
line = file.readline()
while line:
cls_id = line.split( )[0]
xmin = int(line.split( )[1]) + 1
ymin = int(line.split( )[2]) + 1
xmax = int(line.split( )[3]) + 1
ymax = int(line.split( )[4].split(
)[0]) + 1
line = file.readline()
cls_name = classes[cls_id]
obj_width = xmax - xmin
obj_height = ymax - ymin
difficult = 0
if obj_height <= 6 or obj_width <= 6:
difficult = 1
node_object = SubElement(node_root, object )
node_name = SubElement(node_object, name )
node_name.text = cls_name
node_difficult = SubElement(node_object, difficult )
node_difficult.text = %s % difficult
node_bndbox = SubElement(node_object, bndbox )
node_xmin = SubElement(node_bndbox, xmin )
node_xmin.text = %s % xmin
node_ymin = SubElement(node_bndbox, ymin )
node_ymin.text = %s % ymin
node_xmax = SubElement(node_bndbox, xmax )
node_xmax.text = %s % xmax
node_ymax = SubElement(node_bndbox, ymax )
node_ymax.text = %s % ymax
node_name = SubElement(node_object, pose )
node_name.text = Unspecified
node_name = SubElement(node_object, truncated )
node_name.text = 0
image_path = VOCRoot + /JPEGImages/ + filename
xml = tostring(node_root, pretty_print=True) # annotation
dom = parseString(xml)
xml_name = filename.replace( .jpg , .xml )
xml_path = VOCRoot + /Annotations/ + xml_name
with open(xml_path, wb ) as f:
f.write(xml)
# widerDir = ../WiderPerson # 数据集所在的路径
shutil.copy(img_path, ../VOC2007/JPEGImages/ + filename)
转换后Main文件夹下生成以下几个文件
![[YOLOv5]训练WiderPerson数据集](https://pic.songma.com/blogimg/20251106/6747a1dfa9a24df990cedca4df46d628.jpg)
3.3 修改utils/datasets.py
- coco数据集用的是images文件夹
- voc数据集用的是JPEGImages文件夹
所以将utils/datasets.py文件中的img2label_paths中的images换成JPEGImages -
![[YOLOv5]训练WiderPerson数据集](https://pic.songma.com/blogimg/20251106/1ad9f17788434eca927f5982e836d86d.jpg)
3.4 生成lables文件夹
下一步在项目根目录创建voc_label.py文件,代码如下,将classes的类别改成自己要训练的类别。
代码
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
sets=[( 2007 , train ), ( 2007 , val ), ( 2007 , test )]
classes = ["pedestrians","riders","partially","ignore","crowd"]
def convert(size, box):
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(year, image_id):
in_file = open( VOCdevkit/VOC%s/Annotations/%s.xml %(year, image_id))
out_file = open( VOCdevkit/VOC%s/labels/%s.txt %(year, image_id), w )
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find( size )
w = int(size.find( width ).text)
h = int(size.find( height ).text)
for obj in root.iter( object ):
difficult = obj.find( difficult ).text
cls = obj.find( name ).text
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find( bndbox )
b = (float(xmlbox.find( xmin ).text), float(xmlbox.find( xmax ).text), float(xmlbox.find( ymin ).text), float(xmlbox.find( ymax ).text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) +
)
wd = getcwd()
for year, image_set in sets:
if not os.path.exists( VOCdevkit/VOC%s/labels/ %(year)):
os.makedirs( VOCdevkit/VOC%s/labels/ %(year))
image_ids = open( VOCdevkit/VOC%s/ImageSets/Main/%s.txt %(year, image_set)).read().strip().split()
list_file = open( %s_%s.txt %(year, image_set), w )
for image_id in image_ids:
list_file.write( %s/VOCdevkit/VOC%s/JPEGImages/%s.jpg
%(wd, year, image_id))
convert_annotation(year, image_id)
list_file.close()
os.system("cat 2007_train.txt 2007_val.txt > train.txt")
os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt > train.all.txt")
运行voc_label.py 得到VOC2007文件夹下面的 新的lables文件夹,内容是每个图片对应的标签的txt格式,有图片中目标的类别和检测框的坐标位置,大致如图所示。
![[YOLOv5]训练WiderPerson数据集](https://pic.songma.com/blogimg/20251106/d598ad89fb9248cc925a35fb34eda124.jpg)
同时在根目录得到2007_train.txt,2007_val.txt以及2007_test.txt。其中每个txt都是图片的绝对路径,如下图所示。
![[YOLOv5]训练WiderPerson数据集](https://pic.songma.com/blogimg/20251106/0ec9ad53208143a38bb44b876085f537.jpg)
4 修改配置文件
4.1 修改数据配置文件
在项目根目录下的data文件夹下复制coco.yaml文件,并重命名为mycoco.yaml,打开这个文件,进行如下修改,如下图所示:
修改类别、类别名称为自己的种类
train:是生成的2007_train.txt的绝对路径
val:是生成的2007_val.txt的绝对路径
test:是生成的2007_test.txt的绝对路径
其他的download可以注释掉
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# COCO 2017 dataset http://cocodataset.org by Microsoft
# Example usage: python train.py --data coco.yaml
# parent
# ├── yolov5
# └── datasets
# └── coco ← downloads here (20.1 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: /home/user/myproject/yolov5/VOCdevkit/VOC2007 # dataset root dir
train: /home/user/myproject/yolov5/2007_train.txt # train images (relative to path ) 118287 images
val: /home/user/myproject/yolov5/2007_val.txt # val images (relative to path ) 5000 images
test: /home/user/myproject/yolov5/2007_test.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
# Classes
nc: 5 # number of classes
names: [ pedestrians , riders , partially , ignore , crowd ] # class names
# Download script/URL (optional)
download: |
from utils.general import download, Path
# Download labels
segments = False # segment or box labels
dir = Path(yaml[ path ]) # dataset root dir
url = https://github.com/ultralytics/yolov5/releases/download/v1.0/
urls = [url + ( coco2017labels-segments.zip if segments else coco2017labels.zip )] # labels
download(urls, dir=dir.parent)
# Download data
urls = [ http://images.cocodataset.org/zips/train2017.zip , # 19G, 118k images
http://images.cocodataset.org/zips/val2017.zip , # 1G, 5k images
http://images.cocodataset.org/zips/test2017.zip ] # 7G, 41k images (optional)
download(urls, dir=dir / images , threads=3)
4.2 修改模型的配置文件
首选需要选择一个模型,yolov5提供了四个模型,分别是s、m、L、X,根据显卡和数据的情况自行选择。一般移动端选择s、m,云部署使用l、x。这里选择l,那么修改models文件夹下的yolov5l.yaml,只需要修改第一行nc后面改为自己的类别就可以。
![[YOLOv5]训练WiderPerson数据集](https://pic.songma.com/blogimg/20251106/d61612a8dcde44be88a0244a751548cb.jpg)
![[YOLOv5]训练WiderPerson数据集](https://pic.songma.com/blogimg/20251106/15dc338bfd2440fcb77e64fd0375f1d8.jpg)
5 模型训练
第一需要下载预训练权重,可以从官网下载,链接是
https://github.com/ultralytics/yolov5/releases
- 或者在脚本中导入utils里的attempt_download函数来下载。
接下来修改train.py中的一些参数:
- weights权重:将default后面的路径换成下载好的权重路径
- cfg:将default后面的文件换成自己使用的模型配置文件
- data:换成刚刚弄好的mycoco.yaml
- epochs:根据自己的需要改变
- batch-size:根据显卡算力调节,如果显卡不行,那就调小
- img-size:输入图片的尺寸,默认的是640,需要是32的倍数才可以。
parser.add_argument( --weights , type=str, default= ./weights/yolov5l.pt , help= initial weights path )
parser.add_argument( --cfg , type=str, default= ./models/yolov5l.yaml , help= model.yaml path )
parser.add_argument( --data , type=str, default= ./data/mycoco.yaml , help= dataset.yaml path )
parser.add_argument( --hyp , type=str, default=ROOT / data/hyps/hyp.scratch-low.yaml , help= hyperparameters path )
parser.add_argument( --epochs , type=int, default=300)
parser.add_argument( --batch-size , type=int, default=16, help= total batch size for all GPUs, -1 for autobatch )
parser.add_argument( --imgsz , --img , --img-size , type=int, default=640, help= train, val image size (pixels) )
运行train.py文件即可。模型权重会保存到runs/exp(x)/weights/文件夹下,保存两个模型,最好的和最后的。由于是在云端进行训练,所以使用nohup python -u train.py >log.out 2>&1 &运行。
6.训练过程可视化(使用wandb进行可视化)
第一pip install wandb
然后在wandb官网注册一个账号,然后获取该账号的私钥。然后在命令行执行:wandb login
根据提示输入私钥即可。
![[YOLOv5]训练WiderPerson数据集](https://pic.songma.com/blogimg/20251106/743b62dce3d149f286a4e994b81580dc.jpg)
7 推理
修改detect.py文件:
将权重换成训练好的best.pt的绝对路径
source就是需要检测的图片路径
其他的阈值参数根据自己的需求修改。
![[YOLOv5]训练WiderPerson数据集](https://pic.songma.com/blogimg/20251106/8c9d63e8b02d4c6ebc4368cf1e062701.jpg)
运行python detect.py --save-txt即可(或者直接运行时指定参数 python detect.py --weights runs/train/exp16/weights/best.pt --source data/test/Tc400_137.jpg)
结果显示在runs/detect/exp中,详细的数据在与图片名一样的txt文件夹中,如果命令不加入后面的–save-txt,那么就只生成图片。
8 模型的参数调优
https://blog.csdn.net/HowieXue/article/details/118463534















- 最新
- 最热
只看作者