计算机视觉:目标检测数据集简介、下载与转换
- 目标检测数据集简介
- 目标检测数据集下载
- VOC2007数据集下载
- VOC2012数据集下载
- COCO数据集下载
- LVIS数据集下载
- 目标检测数据集格式转换
目标检测数据集简介
目标检测中常见的基准数据集有Pascal VOC、COCO、ImageNet和LVIS等,简介如下:
- Pascal VOC2007 是一个目标检测中一个中等规模的数据集,共有20个类别。其数据分为训练验证集(5011张),测试集(4952张),共计9963幅图。下载解压后有以下三个文件夹:
Annotations:存放 xml 文件,目标真值
JPEGImages:9963张JPG图片
ImageSets:数据集划分文件 - Pascal VOC2012是一个用于对象检测的中型数据集,与Pascal VOC2007拥有相同的20个类别。其数据分为训练集(5717张)、验证(5823张)。在检测任务中,Pascal VOC 2012 是 Pascal VOC 2007 的无交集扩展,我们通常将两者一起使用。下载解压后有以下三个文件夹:
Annotations:存放 xml 文件,目标真值
JPEGImages:11540张JPG图片
ImageSets:数据集划分文件 - COCO 是具有80个类别的大规模数据集。其数据分为三部分:训练、验证和测试,每部分分别包含 118287, 5000 和 40670张图片。下载解压后有以下四个文件夹:
annotations:标注文件
train2017:训练集数据
val2017:验证集数据
test2017:测试集数据 - ImageNet 也是一个拥有200个类别的重要数据集。然而景观其规模很大,但是目标的尺度范围和VOC数据集相似,所以通常不用做目标检测的基准数据集。但是目标检测模型的backbone却仍在大量采用使用ImageNet预训练好的模型。
- LVIS是一个用于长尾实例分割的数据集,包含164000张图像和1000多个类别。
目标检测数据集下载
VOC2007数据集下载
1、下载VOC2007数据集压缩包
# 训练验证集
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
# 测试集
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
# 开发工具包代码和文档
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar
2、解压
tar xvf VOCtrainval_06-Nov-2007.tar
tar xvf VOCtest_06-Nov-2007.tar
tar xvf VOCdevkit_08-Jun-2007.tar
VOC2012数据集下载
1、下载VOC2012数据集压缩包
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
2、解压
tar xvf VOCtrainval_11-May-2012.tar
COCO数据集下载
1、下载COCO数据集压缩包
wget http://images.cocodataset.org/zips/train2017.zip
wget http://images.cocodataset.org/zips/val2017.zip
wget http://images.cocodataset.org/zips/test2017.zip
wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
2、解压
unzip train2017.zip
unzip val2017.zip
unzip test2017.zip
unzip annotations_trainval2017.zip
LVIS数据集下载
前往官网下载
目标检测数据集格式转换
COCO数据集转为VOC数据集格式:
from pycocotools.coco import COCO
import skimage.io as io
import matplotlib.pyplot as plt
import pylab,os,cv2,shutil
from lxml import etree, objectify
from tqdm import tqdm
import random
from PIL import Image
pylab.rcParams['figure.figsize'] = (8.0, 10.0)
dataDir='./coco'
CK5cats=['elephant','zebra']
CKdir="./coco2voc"
CKimg_dir=CKdir+"/"+"JPEGImages"
CKanno_dir=CKdir+"/"+"Annotations"
def mkr(dir):
if not os.path.exists(dir):
os.makedirs(dir)
def showimg(coco,dataType,img,CK5Ids):
global dataDir
I = io.imread('%s/%s/%s' % (dataDir, dataType, img['file_name']))
plt.imshow(I)
plt.axis('off')
annIds = coco.getAnnIds(imgIds=img['id'], catIds=CK5Ids, iscrowd=None)
anns = coco.loadAnns(annIds)
coco.showAnns(anns)
plt.show()
def save_annotations(dataType,filename,objs):
annopath=CKanno_dir+"/"+filename[:-3]+"xml"
img_path=dataDir+"/"+dataType+"/"+filename
dst_path=CKimg_dir+"/"+filename
img=cv2.imread(img_path)
im=Image.open(img_path)
if im.mode!="RGB":
print(filename+" not a RGB image")
im.close()
return
im.close()
shutil.copy(img_path, dst_path)
E = objectify.ElementMaker(annotate=False)
anno_tree = E.annotation(
E.folder('VOC2007'),
E.filename(filename),
E.source(
E.database('The VOC2007 Database'),
E.annotation('PASCAL VOC2007'),
E.image('COCO2VOC')
),
E.size(
E.width(img.shape[1]),
E.height(img.shape[0]),
E.depth(img.shape[2])
),
E.segmented(0)
)
for obj in objs:
E2 = objectify.ElementMaker(annotate=False)
anno_tree2 = E2.object(
E.name(obj[0]),
E.pose(),
E.truncated("0"),
E.difficult(0),
E.bndbox(
E.xmin(obj[2]),
E.ymin(obj[3]),
E.xmax(obj[4]),
E.ymax(obj[5])
)
)
anno_tree.append(anno_tree2)
etree.ElementTree(anno_tree).write(annopath, pretty_print=True)
def showbycv(coco,dataType,img,classes,CK5Ids):
global dataDir
filename= img['file_name']
filepath='%s/%s/%s' % (dataDir, dataType,filename)
I = cv2.imread(filepath)
annIds = coco.getAnnIds(imgIds=img['id'], catIds=CK5Ids, iscrowd=None)
anns = coco.loadAnns(annIds)
objs=[]
for ann in anns:
name=classes[ann['category_id']]
if name in CK5cats:
if 'bbox' in ann:
bbox = ann['bbox']
xmin=(int)(bbox[0])
ymin=(int)(bbox[1])
xmax=(int)(bbox[2]+bbox[0])
ymax=(int)(bbox[3]+bbox[1])
obj=[name,1.0,xmin,ymin,xmax,ymax]
objs.append(obj)
cv2.rectangle(I, (xmin,ymin),(xmax,ymax),(255,0,0))
cv2.putText(I,name,(xmin,ymin),3,1,(0,0,255))
save_annotations(dataType,filename,objs)
#cv2.imshow("img",I)
cv2.waitKey(1)
def catid2name(coco):
classes=dict()
for cat in coco.dataset['categories']:
classes[cat['id']]=cat['name']
#print(str(cat['id'])+":"+cat['name'])
return classes
def get_CK5():
mkr(CKimg_dir)
mkr(CKanno_dir)
dataTypes=['train2017','val2017']
for dataType in dataTypes:
annFile = '{}/annotations/instances_{}.json'.format(dataDir, dataType)
coco = COCO(annFile)
CK5Ids = coco.getCatIds(catNms=CK5cats)
classes=catid2name(coco)
for srccat in CK5cats:
print(dataType + ":" + srccat)
catIds = coco.getCatIds(catNms=[srccat])
imgIds = coco.getImgIds(catIds=catIds)
#imgIds=imgIds[0:100]
for imgId in tqdm(imgIds):
img=coco.loadImgs(imgId)[0]
showbycv(coco,dataType,img,classes,CK5Ids)
#showimg(coco,dataType,img,CK5Ids)
#split train and test for training
def split_traintest(trainratio=0.7,valratio=0.2,testratio=0.1):
dataset_dir=CKdir
files=os.listdir(CKimg_dir)
trains=[]
vals=[]
trainvals=[]
tests=[]
random.shuffle(files)
for i in range(len(files)):
filepath=CKimg_dir+"/"+files[i][:-3]+"jpg"
if(i<trainratio*len(files)):
trains.append(files[i])
trainvals.append(files[i])
elif i<(trainratio+valratio)*len(files):
vals.append(files[i])
trainvals.append(files[i])
else:
tests.append(files[i])
#write txt files for yolo
#with open(dataset_dir+"/trainval.txt","w")as f:
# for line in trainvals:
# line=CKimg_dir+"/"+line
# f.write(line+"\n")
#with open(dataset_dir+"/test.txt","w") as f:
# for line in tests:
# line=CKimg_dir+"/"+line
# f.write(line+"\n")
#write files for voc
maindir=dataset_dir+"/"+"ImageSets/Main"
mkr(maindir)
with open(maindir+"/train.txt","w") as f:
for line in trains:
line=line[:line.rfind(".")]
f.write(line+"\n")
with open(maindir+"/val.txt","w") as f:
for line in vals:
line=line[:line.rfind(".")]
f.write(line+"\n")
with open(maindir+"/trainval.txt","w") as f:
for line in trainvals:
line=line[:line.rfind(".")]
f.write(line+"\n")
with open(maindir+"/test.txt","w") as f:
for line in tests:
line=line[:line.rfind(".")]
f.write(line+"\n")
print("spliting done")
if __name__=="__main__":
get_CK5()
split_traintest()