Mask-RCNN(TensorFlow版)的项目地址:
https://github.com/matterport/Mask_RCNN
目录
准备数据集
标定样本预处理
开始训练
准备数据集
首先将你的数据集分为两类,一类为训练集(train),一类为验证集(val)。
然后是标注数据,这里使用我另一篇博客提到的标记工具。
对样本进行标定。
标定样本预处理
将标定号的图片放到datasets文件夹内,分train,val两个文件夹存放训练及测试样本数据集。每个文件夹图片对应有一个同名的.json标注信息文件。
由于原始图片是各种尺寸,而mask-rcnn样本需要归一化为统一尺寸。这里参考inspect_data.ipynb文件的处理方式。重新建立了一个train_sample_convert.ipynb文件。
其作用是将datasets目录下train,val两个文件夹里的图片尺寸归一化,并将标定信息改变对应到新图像上,将新图像及标定信息存储在samples文件夹内。
文件内容如下:
import os
import sys
import random
import visualize
#import model as modellib
import utils
import json
import skimage.draw
import numpy as np
from model import log
from config import Config
# 获取该project的根目录
ROOT_DIR = os.path.abspath('')
sys.path.append(ROOT_DIR)
#原始数据datasets文件夹
ORIDATA_DIR = os.path.join(ROOT_DIR, "datasets")
#原始数据文件夹下要转换的目录列表
oridirs = ['train', 'val']
#归一化后图像文件夹
NORDATA_DIR = os.path.join(ROOT_DIR,'samples')
class CoalMineConfig(Config):
"""Configuration for training on the toy shapes dataset.
Derives from the base Config class and overrides values specific
to the toy shapes dataset.
"""
# Give the configuration a recognizable name
NAME = "CoalMine"
# Use small images for faster training. Set the limits of the small side
# the large side, and that determines the image shape.
IMAGE_MIN_DIM = 512
IMAGE_MAX_DIM = 512
config = CoalMineConfig()
#config.display()
def mkdir(path):
# 引入模块
import os
# 去除首位空格
path=path.strip()
# 去除尾部 \ 符号
path=path.rstrip("\\")
# 判断路径是否存在
# 存在 True
# 不存在 False
isExists=os.path.exists(path)
# 判断结果
if not isExists:
# 如果不存在则创建目录
# 创建目录操作函数
os.makedirs(path)
print (path+'创建成功')
return True
else:
# 如果目录存在则不创建,并提示目录已存在
print (path+'目录已存在')
return False
class DatasetChange(utils.Dataset):
"""Generates the shapes synthetic dataset. The dataset consists of simple
shapes (triangles, squares, circles) placed randomly on a blank surface.
The images are generated on the fly. No file access required.
"""
def change_sample(self, dataset_dir, subset,config):
"""定义数据集有哪些类别.
dataset_dir: 数据集根路径.
subset: train or val
"""
# 加载训练集还是测试集
assert subset in ["train", "val"]
dataset_dir = os.path.join(dataset_dir, subset)
print("原始文件夹:",dataset_dir)
newdate_dir = os.path.join(NORDATA_DIR,subset)
print("转换后文件夹:",newdate_dir)
mkdir(newdate_dir)
sublist = os.listdir(dataset_dir)
imagelist = [];
jsonlist = [];
for i in range(0, len(sublist)):
path = os.path.join(dataset_dir, sublist[i])
if os.path.isfile(path) and path.lower().endswith(".jpg"):
jsonname = path[:-3]+"json"
if os.path.isfile(jsonname):
imagelist.append(sublist[i]);
jsonlist.append(sublist[i][:-3]+"json");
print("转换",len(imagelist),"张图片")
#坐标变换函数
change = lambda array, rate, step : array*rate+step+0.5
for i in range(len(imagelist)):
fo = open(os.path.join(dataset_dir, jsonlist[i]),encoding='utf-8')
text = fo.read()
fo.close()
if text.startswith(u'\ufeff'):
text = text.encode('utf8')[3:].decode('utf8')
annotations = json.loads(text)
if not annotations['regions']:
print(imagelist[i], "no regions")
continue
image_path = os.path.join(dataset_dir, imagelist[i])
new_path = os.path.join(newdate_dir, imagelist[i])
new_json = os.path.join(newdate_dir, jsonlist[i])
#按限定尺寸改变图像大小 并保存
image = skimage.io.imread(image_path)
if image.ndim != 3:
image = skimage.color.gray2rgb(image)
image, window, scale, padding = utils.resize_image(image, min_dim=config.IMAGE_MIN_DIM,
max_dim=config.IMAGE_MAX_DIM,padding=config.IMAGE_PADDING)
skimage.io.imsave(new_path, image)
#转变坐标值
for i in range(len(annotations['regions']) ):
xarray = np.array(annotations['regions'][i]['shape_attributes']['all_points_x'],float)
xarray = np.array( change(xarray, scale, padding[1][0]), int)
annotations['regions'][i]['shape_attributes']['all_points_x'] = xarray.tolist()
yarray = np.array(annotations['regions'][i]['shape_attributes']['all_points_y'],float)
yarray = np.array( change(yarray, scale, padding[0][0]), int)
annotations['regions'][i]['shape_attributes']['all_points_y'] = yarray.tolist()
with open(new_json, "w") as f:
json.dump(annotations, f,separators=(',',':'),ensure_ascii=False)
#转换样本图片
for dir in oridirs:
dataset_ori = DatasetChange()
dataset_ori.change_sample(ORIDATA_DIR, dir,config)
执行完上面文件,可以在samples目录下得到归一化转换完成的train,val两个样本文件夹。
开始训练
训练文件仿照train_shapes.ipynb,重新新建一个train_sample.ipynb
代码如下:
import os
import sys
import random
import visualize
import model as modellib
import utils
import json
import skimage.draw
import numpy as np
from model import log
from config import Config
# 获取该project的根目录
ROOT_DIR = os.path.abspath('')
# 存放训练模型的logs路径
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
# 加载预训练的COCO权重
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
sys.path.append(ROOT_DIR)
#samples文件夹下是归一化的训练集和验证集
BALLOON_DIR = os.path.join(ROOT_DIR, "samples")
class NewDateSetConfig(Config):
"""Configuration for training on the toy shapes dataset.
Derives from the base Config class and overrides values specific
to the toy shapes dataset.
"""
# Give the configuration a recognizable name
NAME = "NewDateSet"
# TITAN X有12gb显存可以一次装两张,因此设置为2
GPU_COUNT = 1
IMAGES_PER_GPU = 1
# 类别的数目(包括背景)
NUM_CLASSES = 1 + 2
# Use small images for faster training. Set the limits of the small side
# the large side, and that determines the image shape.
IMAGE_MIN_DIM = 128
IMAGE_MAX_DIM = 128
# 每一个epoch迭代多少次
STEPS_PER_EPOCH = 500
# Use smaller anchors because our image and objects are small
RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128) # anchor side in pixels
# Reduce training ROIs per image because the images are small and have
# few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
TRAIN_ROIS_PER_IMAGE = 32
# use small validation steps since the epoch is small
VALIDATION_STEPS = 5
# 学习率和动量,不同的优化器有不同的配置,而且不同框架之间的实现也不一样,因此需要自己尝试
LEARNING_RATE = 0.001
LEARNING_MOMENTUM = 0.9
# 权重衰减因子
WEIGHT_DECAY = 0.0001
config = NewDateSetConfig()
#config.display()
class NewDataSetDataset(utils.Dataset):
"""Generates the shapes synthetic dataset. The dataset consists of simple
shapes (triangles, squares, circles) placed randomly on a blank surface.
The images are generated on the fly. No file access required.
"""
def load_newdataset(self, dataset_dir, subset):
"""定义数据集有哪些类别.
dataset_dir: 数据集根路径.
subset: train or val
"""
# 添加类别的名称和ID号,在这里,为了简便,我只添加一类:‘car’,如果是多类,挨着添加即可,例如我还有‘cat’,‘dog’....
self.add_class("newdataset", 1, "stone")
self.add_class("newdataset", 2, "ironbolt")
#self.add_class("newdataset", 3, "dog")
# 加载训练集还是测试集?
assert subset in ["train", "val"]
dataset_dir = os.path.join(dataset_dir, subset)
imagelist = [];
jsonlist = [];
sublist = os.listdir(dataset_dir)
for i in range(0, len(sublist)):
path = os.path.join(dataset_dir, sublist[i])
if os.path.isfile(path) and path.lower().endswith(".jpg"):
jsonname = path[:-3]+"json"
if os.path.isfile(jsonname)
imagelist.append(sublist[i]);
jsonlist.append(sublist[i][:-3]+"json");
for i in range(len(imagelist)):
fo = open(os.path.join(dataset_dir, jsonlist[i]),encoding='utf-8')
text = fo.read()
fo.close()
if text.startswith(u'\ufeff'):
text = text.encode('utf8')[3:].decode('utf8')
annotations = json.loads(text)
#print(annotations)
# 加载图片
# 获取组成每个物体实例轮廓的多边形点的x、y坐标。
# 这些坐标保存在r['shape_attributes'中,(参见上面的json格式)
#polygons = [r['shape_attributes'] for r in a['regions'].values()]
polygons = [r['shape_attributes'] for r in annotations['regions']]
region = [r['region_attributes'] for r in annotations['regions']]
image_path = os.path.join(dataset_dir, annotations['filename'])
image = skimage.io.imread(image_path)
height, width = image.shape[:2]
self.add_image(
"newdataset",
image_id=annotations['filename'],
path=image_path,
width=width, height=height,
polygons=polygons, region=region)
def load_mask(self, image_id):
"""为图像生成实例mask.
Returns:
masks: 一个bool数组,每个实例一个mask,
其中每个mask为:[高, 宽, 数目]
class_ids: 每个mask对应的类别.
"""
image_info = self.image_info[image_id]
if image_info["source"] != "newdataset":
return super(self.__class__, self).load_mask(image_id)
# 将多边形转换为一个二值mask,[高, 宽, 数目]
info = self.image_info[image_id]
# print("这是P",info["height"], info["width"], len(info["polygons"]))
mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
dtype=np.uint8)
for i, p in enumerate(info["polygons"]):
# 获取多边形内的像素索引并将其设置为1
#rr, cc = skimage.draw.rectangle((p['y'], p['x']), extent=(p['height'], p['width']))
rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
mask[rr, cc, i] = 1
region = image_info["region"]
class_ids = np.array([self.class_names.index(s["type"]) for s in region])
# 返回mask和类别的信息
return mask, class_ids.astype(np.int32)
def image_reference(self, image_id):
"""Return the shapes data of the image."""
info = self.image_info[image_id]
if info["source"] == "newdataset":
return info["newdataset"]
else:
super(self.__class__).image_reference(self, image_id)
#加载训练集
dataset_train = NewDataSetDataset()
dataset_train.load_newdataset(BALLOON_DIR, "train")
dataset_train.prepare()
#打印出训练集信息
print("Train Image Count: {}".format(len(dataset_train.image_ids)))
print("Class Count: {}".format(dataset_train.num_classes))
for i, info in enumerate(dataset_train.class_info):
print("{:3}. {:50}".format(i, info['name']))
#从训练集中选出4张用与显示训练集是否正确
image_ids = dataset_train.image_ids[:2]
for image_id in image_ids:
image = dataset_train.load_image(image_id)
mask, class_ids = dataset_train.load_mask(image_id)
visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)
#加载验证集
dataset_val = NewDataSetDataset()
dataset_val.load_newdataset(BALLOON_DIR, "val")
dataset_val.prepare()
#打印出验证集信息
print("Val Image Count: {}".format(len(dataset_val.image_ids)))
print("Class Count: {}".format(dataset_val.num_classes))
for i, info in enumerate(dataset_val.class_info):
print("{:3}. {:50}".format(i, info['name']))
#从验证集中选出4张用与显示验证集是否正确
image_ids = np.random.choice(dataset_val.image_ids, 2)
for image_id in image_ids:
image = dataset_val.load_image(image_id)
mask, class_ids = dataset_val.load_mask(image_id)
visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)
#创建一个训练模型
model = modellib.MaskRCNN(mode="training", config=config,
model_dir=MODEL_DIR)
#设定权重的初始化方式,有imagenet,coco,last三种
init_with = "coco"
if init_with == "imagenet":
model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
model.load_weights(COCO_MODEL_PATH, by_name=True,
exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",
"mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
model.load_weights(model.find_last()[1], by_name=True)
#设定训练参数,如学习率,epoch等
model.train(dataset_train, dataset_val, learning_rate=config.LEARNING_RATE, epochs=500, layers="heads")
# Save weights
# Typically not needed because callbacks save after every epoch
# Uncomment to save manually
model_path = os.path.join(MODEL_DIR, "mask_rcnn_cocoNew.h5")
model.keras_model.save_weights(model_path)
等训练完成后,会在模型文件夹保存mask_rcnn_cocoNew.h5权重文件。
本文代码在ubuntu下运行验证,windows下运行可能遇到标定信息文件读取编码问题,可尝试修改文件encoding参数。
运行中出现什么问题,欢迎留言探讨!