计算机视觉:目标检测数据集简介、下载与转换

  • 目标检测数据集简介
  • 目标检测数据集下载
  • VOC2007数据集下载
  • VOC2012数据集下载
  • COCO数据集下载
  • LVIS数据集下载
  • 目标检测数据集格式转换


目标检测数据集简介

目标检测中常见的基准数据集有Pascal VOC、COCO、ImageNet和LVIS等,简介如下:

  • Pascal VOC2007 是一个目标检测中一个中等规模的数据集,共有20个类别。其数据分为训练验证集(5011张),测试集(4952张),共计9963幅图。下载解压后有以下三个文件夹:
    Annotations:存放 xml 文件,目标真值
    JPEGImages:9963张JPG图片
    ImageSets:数据集划分文件
  • Pascal VOC2012是一个用于对象检测的中型数据集,与Pascal VOC2007拥有相同的20个类别。其数据分为训练集(5717张)、验证(5823张)。在检测任务中,Pascal VOC 2012 是 Pascal VOC 2007 的无交集扩展,我们通常将两者一起使用。下载解压后有以下三个文件夹:
    Annotations:存放 xml 文件,目标真值
    JPEGImages:11540张JPG图片
    ImageSets:数据集划分文件
  • COCO 是具有80个类别的大规模数据集。其数据分为三部分:训练、验证和测试,每部分分别包含 118287, 5000 和 40670张图片。下载解压后有以下四个文件夹:
    annotations:标注文件
    train2017:训练集数据
    val2017:验证集数据
    test2017:测试集数据
  • ImageNet 也是一个拥有200个类别的重要数据集。然而景观其规模很大,但是目标的尺度范围和VOC数据集相似,所以通常不用做目标检测的基准数据集。但是目标检测模型的backbone却仍在大量采用使用ImageNet预训练好的模型。
  • LVIS是一个用于长尾实例分割的数据集,包含164000张图像和1000多个类别。

目标检测数据集下载

VOC2007数据集下载

1、下载VOC2007数据集压缩包

# 训练验证集
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
# 测试集
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
# 开发工具包代码和文档
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar

2、解压

tar xvf VOCtrainval_06-Nov-2007.tar
tar xvf VOCtest_06-Nov-2007.tar
tar xvf VOCdevkit_08-Jun-2007.tar

VOC2012数据集下载

1、下载VOC2012数据集压缩包

wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar

2、解压

tar xvf VOCtrainval_11-May-2012.tar

COCO数据集下载

1、下载COCO数据集压缩包

wget http://images.cocodataset.org/zips/train2017.zip
wget http://images.cocodataset.org/zips/val2017.zip
wget http://images.cocodataset.org/zips/test2017.zip
wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip

2、解压

unzip train2017.zip
unzip val2017.zip
unzip test2017.zip
unzip annotations_trainval2017.zip

LVIS数据集下载

前往官网下载

目标检测数据集格式转换

COCO数据集转为VOC数据集格式:

from pycocotools.coco import COCO
import skimage.io as io
import matplotlib.pyplot as plt
import pylab,os,cv2,shutil
from lxml import etree, objectify
from tqdm import tqdm
import random
from PIL import Image
 
pylab.rcParams['figure.figsize'] = (8.0, 10.0)
 
dataDir='./coco'
CK5cats=['elephant','zebra']
 
CKdir="./coco2voc"
CKimg_dir=CKdir+"/"+"JPEGImages"
CKanno_dir=CKdir+"/"+"Annotations"
 
def mkr(dir):
    if not os.path.exists(dir):
        os.makedirs(dir)
 
def showimg(coco,dataType,img,CK5Ids):
    global dataDir
    I = io.imread('%s/%s/%s' % (dataDir, dataType, img['file_name']))
    plt.imshow(I)
    plt.axis('off')
    annIds = coco.getAnnIds(imgIds=img['id'], catIds=CK5Ids, iscrowd=None)
    anns = coco.loadAnns(annIds)
    coco.showAnns(anns)
    plt.show()
 
def save_annotations(dataType,filename,objs):
    annopath=CKanno_dir+"/"+filename[:-3]+"xml"
    img_path=dataDir+"/"+dataType+"/"+filename
    dst_path=CKimg_dir+"/"+filename
    img=cv2.imread(img_path)
    im=Image.open(img_path)
    if im.mode!="RGB":
        print(filename+" not a RGB image")
        im.close()
        return
    im.close()
    shutil.copy(img_path, dst_path)
    E = objectify.ElementMaker(annotate=False)
    anno_tree = E.annotation(
        E.folder('VOC2007'),
        E.filename(filename),
        E.source(
            E.database('The VOC2007 Database'),
            E.annotation('PASCAL VOC2007'),
            E.image('COCO2VOC')
        ),
        E.size(
            E.width(img.shape[1]),
            E.height(img.shape[0]),
            E.depth(img.shape[2])
        ),
        E.segmented(0)
    )
    for obj in objs:
        E2 = objectify.ElementMaker(annotate=False)
        anno_tree2 = E2.object(
            E.name(obj[0]),
            E.pose(),
            E.truncated("0"),
            E.difficult(0),
            E.bndbox(
                E.xmin(obj[2]),
                E.ymin(obj[3]),
                E.xmax(obj[4]),
                E.ymax(obj[5])
            )
        )
        anno_tree.append(anno_tree2)
    etree.ElementTree(anno_tree).write(annopath, pretty_print=True)
 
def showbycv(coco,dataType,img,classes,CK5Ids):
    global dataDir
    filename= img['file_name']
    filepath='%s/%s/%s' % (dataDir, dataType,filename)
    I = cv2.imread(filepath)
    annIds = coco.getAnnIds(imgIds=img['id'], catIds=CK5Ids, iscrowd=None)
    anns = coco.loadAnns(annIds)
    objs=[]
    for ann in anns:
        name=classes[ann['category_id']]
        if name in CK5cats:
            if 'bbox' in ann:
                bbox = ann['bbox']
                xmin=(int)(bbox[0])
                ymin=(int)(bbox[1])
                xmax=(int)(bbox[2]+bbox[0])
                ymax=(int)(bbox[3]+bbox[1])
                obj=[name,1.0,xmin,ymin,xmax,ymax]
                objs.append(obj)
                cv2.rectangle(I, (xmin,ymin),(xmax,ymax),(255,0,0))
                cv2.putText(I,name,(xmin,ymin),3,1,(0,0,255))
    save_annotations(dataType,filename,objs)
    #cv2.imshow("img",I)
    cv2.waitKey(1)
 
def catid2name(coco):
    classes=dict()
    for cat in coco.dataset['categories']:
        classes[cat['id']]=cat['name']
        #print(str(cat['id'])+":"+cat['name'])
    return classes
 
def get_CK5():
    mkr(CKimg_dir)
    mkr(CKanno_dir)
    dataTypes=['train2017','val2017']
    for dataType in dataTypes:
        annFile = '{}/annotations/instances_{}.json'.format(dataDir, dataType)
        coco = COCO(annFile)
        CK5Ids = coco.getCatIds(catNms=CK5cats)
        classes=catid2name(coco)
        for srccat in CK5cats:
            print(dataType + ":" + srccat)
            catIds = coco.getCatIds(catNms=[srccat])
            imgIds = coco.getImgIds(catIds=catIds)
            #imgIds=imgIds[0:100]
            for imgId in tqdm(imgIds):
                img=coco.loadImgs(imgId)[0]
                showbycv(coco,dataType,img,classes,CK5Ids)
                #showimg(coco,dataType,img,CK5Ids)
 
#split train and test for training
def split_traintest(trainratio=0.7,valratio=0.2,testratio=0.1):
    dataset_dir=CKdir
    files=os.listdir(CKimg_dir)
    trains=[]
    vals=[]
    trainvals=[]
    tests=[]
    random.shuffle(files)
    for i in range(len(files)):
        filepath=CKimg_dir+"/"+files[i][:-3]+"jpg"
        if(i<trainratio*len(files)):
            trains.append(files[i])
            trainvals.append(files[i])
        elif i<(trainratio+valratio)*len(files):
            vals.append(files[i])
            trainvals.append(files[i])
        else:
            tests.append(files[i])
    #write txt files for yolo
    #with open(dataset_dir+"/trainval.txt","w")as f:
    #    for line in trainvals:
    #        line=CKimg_dir+"/"+line
    #        f.write(line+"\n")
    #with open(dataset_dir+"/test.txt","w") as f:
    #    for line in tests:
    #        line=CKimg_dir+"/"+line
    #        f.write(line+"\n")
    #write files for voc
    maindir=dataset_dir+"/"+"ImageSets/Main"
    mkr(maindir)
    with open(maindir+"/train.txt","w") as f:
        for line in trains:
            line=line[:line.rfind(".")]
            f.write(line+"\n")
    with open(maindir+"/val.txt","w") as f:
        for line in vals:
            line=line[:line.rfind(".")]
            f.write(line+"\n")
    with open(maindir+"/trainval.txt","w") as f:
        for line in trainvals:
            line=line[:line.rfind(".")]
            f.write(line+"\n")
    with open(maindir+"/test.txt","w") as f:
        for line in tests:
            line=line[:line.rfind(".")]
            f.write(line+"\n")
    print("spliting done")
 
 
if __name__=="__main__":
    get_CK5()
    split_traintest()