from voc dataset to coco dataset

文件结构

image

1_name_to_num.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import  os

all_ = os.listdir('all_img')

count = 0
for i in all_:
filename = os.path.join('all_img', i)
new_name = os.path.join('all_img', '{}.jpg'.format(count))
#print(filename, new_name)
os.rename(filename, new_name)

filename = os.path.join('all_xml', i[:-3]+'xml')
new_name = os.path.join('all_xml', '{}.xml'.format(count))
#print(filename, new_name)
#input("!!")
os.rename(filename, new_name)

count+=1

2_all_name_to_a_txt.py

1
2
3
4
5
6
7
8
9
import os

all_ = os.listdir('all_img')

for i in all_:
print(i[:-4])
#input("!!")
f = open('train.txt','a')
f.write(i[:-4]+'\n')

3_xml_to_txt.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# 导包
import copy
from lxml.etree import Element, SubElement, tostring, ElementTree

import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join

# 类别列表(根据自己的开发需求的实际情况填写)
classes = ['brand', 'cucumber', 'eggplant', 'tomatoes'] #类别

# label中锚框坐标归一化
def convert(size, box): # size:(原图w,原图h) , box:(xmin,xmax,ymin,ymax)
dw = 1./size[0] # 1./w
dh = 1./size[1] # 1./h
x = (box[0] + box[1])/2.0 # 物体在图中的中心点x坐标
y = (box[2] + box[3])/2.0 # 物体在图中的中心点y坐标
w = box[1] - box[0] # 物体实际像素宽度
h = box[3] - box[2] # 物体实际像素高度
x = x*dw # 物体中心点x的坐标比(相当于 x/原图w)
w = w*dw # 物体宽度的宽度比(相当于 w/原图w)
y = y*dh # 物体中心点y的坐标比(相当于 y/原图h)
h = h*dh # 物体高度的高度比(相当于 h/原图h)
return (x,y,w,h) # 返回相对于原图的物体中心的(x坐标比,y坐标比,宽度比, 高度比),取值范围[0-1]

# Label格式转化
def convert_annotation(image_id):

in_file = open('all_xml/%s.xml'%(image_id)) # 需要转化的标签路径
out_file = open('to_txt/%s.txt'%(image_id),'w') # 生成txt格式的标签文件(label)的保存路径
tree=ET.parse(in_file) # 解析xml文件
root = tree.getroot() # 获取xml文件的根节点
size = root.find('size') # 获取指定节点的图像尺寸
w = int(size.find('width').text) # 获取图像的宽
h = int(size.find('height').text) # 获取图像的高

for obj in root.iter('object'):
cls = obj.find('name').text # xml里的name参数(类别名称)
if cls not in classes :
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')


image_ids_train = open(r'./train.txt').read().strip().split() # 汇总所有.jpg图像文件名称的txt清单(上述生成的)

for image_id in image_ids_train:
print(image_id)
convert_annotation(image_id) # 转化标注文件格式

yolov5 demo

主要修改 3 个文件

  1. coco.yaml
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
    # COCO 2017 dataset http://cocodataset.org by Microsoft
    # Example usage: python train.py --data coco.yaml
    # parent
    # ├── yolov5
    # └── datasets
    # └── coco ← downloads here (20.1 GB)


    # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
    #path: ./dataset/coco # dataset root dir
    train: ./dataset/coco/images/train # train images (relative to 'path') 118287 images
    val: ./dataset/coco/images/val # val images (relative to 'path') 5000 images
    #test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794

    # Classes
    nc: 2 # number of classes
    names: ['no_mask', 'mask'] # class names


    # Download script/URL (optional)
    #download: |
    # from utils.general import download, Path
    #
    #
    # # Download labels
    # segments = False # segment or box labels
    # dir = Path(yaml['path']) # dataset root dir
    # url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
    # urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels
    # download(urls, dir=dir.parent)
    #
    # # Download data
    # urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
    # 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
    # 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
    # download(urls, dir=dir / 'images', threads=3)
  2. yolov5s.yaml
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    # YOLOv5 🚀 by Ultralytics, GPL-3.0 license

    # Parameters
    nc: 2 # number of classes
    depth_multiple: 0.33 # model depth multiple
    width_multiple: 0.50 # layer channel multiple
    anchors:
    - [10,13, 16,30, 33,23] # P3/8
    - [30,61, 62,45, 59,119] # P4/16
    - [116,90, 156,198, 373,326] # P5/32

    # YOLOv5 v6.0 backbone
    backbone:
    # [from, number, module, args]
    [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
    [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
    [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
    [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
    [-1, 3, C3, [1024]],
    [-1, 1, SPPF, [1024, 5]], # 9
    ]

    # YOLOv5 v6.0 head
    head:
    [[-1, 1, Conv, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
    [[-1, 6], 1, Concat, [1]], # cat backbone P4
    [-1, 3, C3, [512, False]], # 13

    [-1, 1, Conv, [256, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
    [[-1, 4], 1, Concat, [1]], # cat backbone P3
    [-1, 3, C3, [256, False]], # 17 (P3/8-small)

    [-1, 1, Conv, [256, 3, 2]],
    [[-1, 14], 1, Concat, [1]], # cat head P4
    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)

    [-1, 1, Conv, [512, 3, 2]],
    [[-1, 10], 1, Concat, [1]], # cat head P5
    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)

    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
    ]
  3. train.py 中的几个参数
    1
    2
    3
    4
    5
    6
    7
    8
    9
    def parse_opt(known=False):
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default='yolov5s.pt', help='initial weights path')
    parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml path')
    parser.add_argument('--data', type=str, default='coco.yaml', help='dataset.yaml path')
    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
    parser.add_argument('--epochs', type=int, default=300)
    parser.add_argument('--batch-size', type=int, default=64, help='total batch size for all GPUs, -1 for autobatch')
    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')