前言
最近查找车牌检测数据集,了解到CCPD数据集,CCPD 是一个在开源免费的中国城市车牌识别数据集,非常不错。
具体实现
1. 数据集简介
CCPD2019数据集包含将近30万张图片、图片尺寸为720x1160x3,共包含9种类型图片,每种类型、数量及类型说明参考下表。
类型 | 图片数量 | 备注 |
ccpd_base | 199996 | 正常车牌 |
ccpd_blur | 20611 | 模糊车牌 |
ccpd_challenge | 50003 | 比较有挑战的车牌 |
ccpd_db | 10132 | 光线较亮或较暗车牌 |
ccpd_fn | 20967 | 距离摄像头较远或较近 |
ccpd_np | 3036 | 没上牌的新车 |
ccpd_rotate | 10053 | 水平倾斜20°-50°,垂直倾斜-10°-10° |
ccpd_tilt | 30216 | 水平倾斜15°-45°,垂直倾斜-15°-45° |
ccpd_weather | 9999 | 雨天、雪天或者大雾天的车牌 |
| 355013 |
|
数据标注格式:
CCPD的标注数据格式较为特别,是通过解析图片名的方式获取具体信息,即图像名就是标注内容。
如图片【025-95_113-154&383_386&473-386&473_177&454_154&383_363&402-0_0_22_27_27_33_16-37-15.jpg】,其文件名的含义如下:
025:车牌区域占整个画面的比例;
95_113: 车牌水平和垂直角度, 水平95°, 竖直113°
154&383_386&473:标注框左上、右下坐标,左上(154, 383), 右下(386, 473)
86&473_177&454_154&383_363&402:标注框四个角点坐标,顺序为右下、左下、左上、右上
0_0_22_27_27_33_16:车牌号码映射关系如下: 第一个0为省份 对应省份字典provinces中的’皖’,;第二个0是该车所在地的地市一级代码,对应地市一级代码字典alphabets的’A’;后5位为字母和文字, 查看车牌号ads字典,如22为Y,27为3,33为9,16为S,最终车牌号码为皖AY339S
车牌字典
# 34 省份
provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫",
"鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"]
# 25 地市
alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N',
'P', 'Q', 'R', 'S', 'T', 'U', 'V','W', 'X', 'Y', 'Z', 'O']
# 35 车牌号码
ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T',
'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']
2. 将CCPD中车牌区域解析为coco格式
# 20240703: ccpd dataset to coco format dataset.
import os
import cv2 as cv
import numpy as np
imgw = 720
imgh = 1160
imgsz = imgw, imgh
# 34
provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫",
"鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"]
# 25
alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N',
'P', 'Q', 'R', 'S', 'T', 'U', 'V','W', 'X', 'Y', 'Z', 'O']
# 35
ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T',
'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']
def get_plate_licenses(plate):
"""
普通蓝牌共有7位字符;新能源车牌有8位字符:https://baike.baidu.com/item/%E8%BD%A6%E7%89%8C/8347320?fr=aladdin
《新能源电动汽车牌照和普通牌照区别介绍》https://www.yoojia.com/ask/4-11906976349117851507.html
新能源汽车车牌可分为三部分:省份简称(1位汉字)十地方行政区代号(1位字母)十序号(6位)
字母“D”代表纯电动汽车;
字母“F”代表非纯电动汽车(包括插电式混合动力和燃料电池汽车等)。
:param plate:
:return:
"""
result = [provinces[int(plate[0])], alphabets[int(plate[1])]]
result += [ads[int(p)] for p in plate[2:]]
result = "".join(result)
# 新能源车牌的要求,如果不是新能源车牌可以删掉这个if
# if result[2] != 'D' and result[2] != 'F' \
# and result[-1] != 'D' and result[-1] != 'F':
# print(plate)
# print("Error label, Please check!")
# print(plate, result)
return result
def ccpd2coco(path):
dataset_path = os.path.join(path, 'CCPD2020')
green_path = os.path.join(dataset_path, 'ccpd_green')
labelpath = os.path.join(dataset_path, 'green_label')
for path, subpaths, files in os.walk(dataset_path):
# print('subpaths: ', subpaths)
# print('files: ', files)
i = 0
for filename in files:
# if i>1:
# break
# i = i + 1
print(f'file in path: {path}, subpath: {subpaths}, filename: {filename}')
annoinfo = parse_annotation(filename, labelpath)
# display(path, annoinfo)
def display(filepath, annoinfo):
filename = annoinfo['filename']
bboxes = annoinfo['bboxes'] # [xyxy]
x1, y1, x2, y2 = bboxes[0]
img = cv.imread(os.path.join(filepath, filename))
cv.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0)) # (leftup, rightdown)
cv.imwrite(filename, img)
def get_bbox(size, box):
# Convert xyxy box to YOLOv5 xywh box
dw = 1. / size[0]
dh = 1. / size[1]
xc = (box[0] + box[2])*0.5*dw
yc = (box[1] + box[3])*0.5*dh
w = (box[2]-box[0])*dw
h = (box[3]-box[1])*dh
return xc, yc, w, h
def parse_annotation(filename, labelpath):
"""
:param filename:
:return: 返回标注信息info
"""
# 0014128352490421455-90_90-212&467_271&489-271&489_212&489_212&467_271&467-0_0_3_30_30_25_31_32-79-4.jpg
annotations = filename.split("-")
# print('annotations: ', annotations)
rate = annotations[0] # 车牌区域占整个画面的比例;
angle = annotations[1].split("_") # 车牌水平和垂直角度, 水平95°, 竖直113°
box = annotations[2].replace("&", "_").split("_") # 标注框左上、右下坐标,左上(154, 383), 右下(386, 473)
point = annotations[3].replace("&", "_").split("_") # 标注框四个角点坐标,顺序为右下、左下、左上、右上
plate = annotations[4].split("_") # licenses 标注框四个角点坐标,顺序为右下、左下、左上、右上
plate = get_plate_licenses(plate)
box = [int(b) for b in box] # xyxy
bbox = get_bbox(imgsz, box) # xywh
point = [int(b) for b in point]
point = np.asarray(point).reshape(-1, 2)
bboxes = [box] # [xyxy]
angles = [angle]
points = [point]
plates = [plate]
labels = ["plate"] * len(bboxes)
classid = 1 # plate
annoinfo = {"filename": filename, "bboxes": bboxes, "points": points, "labels": labels, "plates": plates, "angles": angles}
# print('rate: ', rate)
# print('angle: ', angle)
# print('box: ', box)
# print('point: ', point)
# print('plate: ', plate)
# print('bboxes: ', bboxes)
# print('labels: ', labels)
# write coco info.
info = f"{classid} {' '.join(f'{x:.6f}' for x in bbox)}\n"
labelname = os.path.join(labelpath, filename.replace('jpg', 'txt'))
labelfile = open(labelname, 'w+')
labelfile.write(info)
labelfile.close()
return annoinfo
if __name__ == "__main__":
rootpath = os.path.dirname(os.path.realpath(__file__))
ccpd2coco(rootpath)
View Code
3. 数据集下载
CCPD2019:官方原始数据,主要是蓝牌数据,约34W
【下载地址】
https://pan.baidu.com/s/1i5AOjAbtkwb17Zy-NQGqkw
提取码:hm0u
CCPD2020:官方原始数据,主要是新能源绿牌数据,约1万
【下载地址】
https://pan.baidu.com/s/1JSpc9BZXFlPkXxRK4qUCyw
提取码:ol3j
【数据集官方地址】
https://github.com/detectRecog/CCPD.git
数据集目录
./
├── CCPD2019
│ ├── ccpd_base
│ ├── ccpd_blur
│ ├── ccpd_challenge
│ ├── ccpd_db
│ ├── ccpd_fn
│ ├── ccpd_np
│ ├── ccpd_rotate
│ ├── ccpd_tilt
│ ├── ccpd_weather
│ ├── LICENSE
│ ├── README.md
│ └── splits
├── CCPD2020
│ ├── ccpd_green
参考
2. 【开源数据集】智慧城市之CCPD车牌数据集_ccpd数据集
3. GitHub - detectRecog/CCPD: [ECCV 2018] CCPD: a diverse and well-annotated dataset for license plate ;
4. Zhenbo_Xu_Towards_End-to-End_License_ECCV_2018_paper;
5. CCPD车牌检测识别数据集_ccpd数据集全称
完