The data set used by YOLOV5 is different from the previous YOLO model, and the data structure is shown as follows:
The images folder holds the images for Train and Val
Labels stores the object data of train and Val, and each TXT file in labels corresponds to the picture in images.
The contents of the TXT file are as follows:
Format: Object category X Y W H
Coordinates are not real coordinates, which are calculated by dividing the coordinates by the width and height, relative to the width and height.
Now that the data has been introduced, here is how to convert VOC data to the data set used by YoloV5.
The data set used in this study is PASCAL VOC 2007.
Address:
Training set and verification set:
Host. Robots. Ox. Ac. UK/PASCAL/VOC /…
Test set:
Host. Robots. Ox. Ac. UK/PASCAL/VOC /…
Download and unpack the test set and training set together. Create TMP folder under YOLOV5 project and place VOC data set under TMP folder, as shown below:
Create a new voc2txt.py file in TMP folder and convert VOC data to TXT data.
Voc2txt.py code:
The import package:
import xml.etree.ElementTree as ET
import os
from os import getcwd
Lists the categories of datasets:
sets=[(‘2007’, ‘train’), (‘2007’, ‘val’), (‘2007’, ‘test’)]
classes = [“aeroplane”, “bicycle”, “bird”, “boat”, “bottle”,
“bus”, “car”, “cat”, “chair”, “cow”, “diningtable”, “dog”,
“horse”, “motorbike”, “person”, “pottedplant”, “sheep”,
“sofa”, “train”, “tvmonitor”]
Convert box to a decimal of relative width and length:
def convert(size, box):
dw = 1./(size[0])
dh = 1./(size[1])
X = (box[0] + box[1])/ 2.0-1
Y = (box[2] + box[3])/ 2.0-1
w = box[1] – box[0]
h = box[3] – box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
The following method takes the content of a single XML and transforms it.
def convert_annotation(year, image_id):
in_file = open(‘VOCdevkit/VOC%s/Annotations/%s.xml’%(year, image_id))
out_file = open(‘VOCdevkit/VOC%s/labels/%s.txt’%(year, image_id), ‘w’)
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find(‘size’)
w = int(size.find(‘width’).text)
h = int(size.find(‘height’).text)
for obj in root.iter(‘object’):
difficult = obj.find(‘difficult’).text
cls = obj.find(‘name’).text
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find(‘bndbox’)
b = (float(xmlbox.find(‘xmin’).text), float(xmlbox.find(‘xmax’).text), float(xmlbox.find(‘ymin’).text), float(xmlbox.find(‘ymax’).text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + ” ” + ” “.join([str(a) for a in bb]) + ‘\n’)
The overall code is as follows:
import xml.etree.ElementTree as ET
import os
from os import getcwd
sets = [('2007'.'train'), ('2007'.'val'), ('2007'.'test')]
classes = ["aeroplane"."bicycle"."bird"."boat"."bottle"."bus"."car"."cat"."chair"."cow"."diningtable"."dog"."horse"."motorbike"."person"."pottedplant"."sheep"."sofa"."train"."tvmonitor"]
def convert(size, box) :
dw = 1. / (size[0])
dh = 1. / (size[1])
x = (box[0] + box[1) /2.0 - 1
y = (box[2] + box[3) /2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def convert_annotation(year, image_id) :
in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml' % (year, image_id))
out_file = open('VOCdevkit/VOC%s/labels/%s.txt' % (year, image_id), 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
bb = convert((w, h), b)
out_file.write(str(cls_id) + "" + "".join([str(a) for a in bb]) + '\n')
wd = getcwd()
for year, image_set in sets:
if not os.path.exists('VOCdevkit/VOC%s/labels/' % year):
os.makedirs('VOCdevkit/VOC%s/labels/' % year)
image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt' % (year, image_set)).read().strip().split()
list_file = open('%s.txt' % image_set, 'w')
for image_id in image_ids:
list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n' % (wd, year, image_id))
convert_annotation(year, image_id)
list_file.close()
Copy the code
TXT tram. TXT val.txt. As shown in figure:
Create a new makedata.py in the TMP folder to convert the generated intermediate results into the final code used by YOLOV5.
The code is as follows:
import shutil
import os
file_List = ["train"."val"."test"]
for file in file_List:
if not os.path.exists('.. /VOC/images/%s' % file):
os.makedirs('.. /VOC/images/%s' % file)
if not os.path.exists('.. /VOC/labels/%s' % file):
os.makedirs('.. /VOC/labels/%s' % file)
print(os.path.exists('.. /tmp/%s.txt' % file))
f = open('.. /tmp/%s.txt' % file, 'r')
lines = f.readlines()
for line in lines:
print(line)
line = "/".join(line.split('/')[-5:]).strip()
shutil.copy(line, ".. /VOC/images/%s" % file)
line = line.replace('JPEGImages'.'labels')
line = line.replace('jpg'.'txt')
shutil.copy(line, ".. /VOC/labels/%s/" % file)
Copy the code
After execution, the final data set is generated under the YoloV5 project.