from voc dataset to coco dataset

文件结构

1_name_to_num.py

import  os

all_ = os.listdir('all_img')

count = 0
for i in all_:
    filename = os.path.join('all_img', i)
    new_name = os.path.join('all_img', '{}.jpg'.format(count))
    #print(filename, new_name)
    os.rename(filename, new_name)

    filename = os.path.join('all_xml', i[:-3]+'xml')
    new_name = os.path.join('all_xml', '{}.xml'.format(count))
    #print(filename, new_name)
    #input("!!")
    os.rename(filename, new_name)

    count+=1

2_all_name_to_a_txt.py

import os

all_ = os.listdir('all_img')

for i in all_:
    print(i[:-4])
    #input("!!")
    f = open('train.txt','a')
    f.write(i[:-4]+'\n')

3_xml_to_txt.py

# 导包
import copy
from lxml.etree import Element, SubElement, tostring, ElementTree
 
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
 
 # 类别列表（根据自己的开发需求的实际情况填写）
classes = ['brand', 'cucumber', 'eggplant', 'tomatoes']  #类别

 # label中锚框坐标归一化
def convert(size, box):  # size:(原图w,原图h) , box:(xmin,xmax,ymin,ymax)
    dw = 1./size[0]  # 1./w
    dh = 1./size[1]  # 1./h
    x = (box[0] + box[1])/2.0  # 物体在图中的中心点x坐标
    y = (box[2] + box[3])/2.0  # 物体在图中的中心点y坐标
    w = box[1] - box[0]  # 物体实际像素宽度
    h = box[3] - box[2]  # 物体实际像素高度
    x = x*dw  # 物体中心点x的坐标比（相当于 x/原图w）
    w = w*dw  # 物体宽度的宽度比（相当于 w/原图w）
    y = y*dh # 物体中心点y的坐标比（相当于 y/原图h）
    h = h*dh # 物体高度的高度比（相当于 h/原图h）
    return (x,y,w,h)  # 返回相对于原图的物体中心的（x坐标比，y坐标比，宽度比， 高度比），取值范围[0-1]

# Label格式转化 
def convert_annotation(image_id):

    in_file = open('all_xml/%s.xml'%(image_id)) # 需要转化的标签路径
    out_file = open('to_txt/%s.txt'%(image_id),'w') # 生成txt格式的标签文件（label）的保存路径
    tree=ET.parse(in_file) # 解析xml文件
    root = tree.getroot() # 获取xml文件的根节点
    size = root.find('size')   # 获取指定节点的图像尺寸
    w = int(size.find('width').text)  # 获取图像的宽
    h = int(size.find('height').text) # 获取图像的高
 
    for obj in root.iter('object'):
        cls = obj.find('name').text  # xml里的name参数（类别名称）
        if cls not in classes :
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')   
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
        
        
image_ids_train = open(r'./train.txt').read().strip().split() # 汇总所有.jpg图像文件名称的txt清单（上述生成的）
 
for image_id in image_ids_train:
    print(image_id)
    convert_annotation(image_id) # 转化标注文件格式

yolov5 demo

主要修改 3 个文件

coco.yaml

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# COCO 2017 dataset http://cocodataset.org by Microsoft
# Example usage: python train.py --data coco.yaml
# parent
# ├── yolov5
# └── datasets
#     └── coco  ← downloads here (20.1 GB)


# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
#path: ./dataset/coco  # dataset root dir
train: ./dataset/coco/images/train  # train images (relative to 'path') 118287 images
val: ./dataset/coco/images/val  # val images (relative to 'path') 5000 images
#test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794

# Classes
nc: 2  # number of classes
names: ['no_mask', 'mask']  # class names


# Download script/URL (optional)
#download: |
#  from utils.general import download, Path
#
#
#  # Download labels
#  segments = False  # segment or box labels
#  dir = Path(yaml['path'])  # dataset root dir
#  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
#  urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')]  # labels
#  download(urls, dir=dir.parent)
#
#  # Download data
#  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
#          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
#          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
#  download(urls, dir=dir / 'images', threads=3)

yolov5s.yaml

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license

# Parameters
nc: 2  # number of classes
depth_multiple: 0.33  # model depth multiple
width_multiple: 0.50  # layer channel multiple
anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

# YOLOv5 v6.0 backbone
backbone:
  # [from, number, module, args]
  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 6, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 3, C3, [1024]],
   [-1, 1, SPPF, [1024, 5]],  # 9
  ]

# YOLOv5 v6.0 head
head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [512, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)

   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]

train.py 中的几个参数

def parse_opt(known=False):
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default='yolov5s.pt', help='initial weights path')
    parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml path')
    parser.add_argument('--data', type=str, default='coco.yaml', help='dataset.yaml path')
    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
    parser.add_argument('--epochs', type=int, default=300)
    parser.add_argument('--batch-size', type=int, default=64, help='total batch size for all GPUs, -1 for autobatch')
    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')