Mask-RCNN(TensorFlow版)的项目地址:

https://github.com/matterport/Mask_RCNN

目录

准备数据集

标定样本预处理

开始训练


准备数据集

首先将你的数据集分为两类,一类为训练集(train),一类为验证集(val)。

然后是标注数据,这里使用我另一篇博客提到的标记工具。


对样本进行标定。

 

标定样本预处理

将标定号的图片放到datasets文件夹内,分train,val两个文件夹存放训练及测试样本数据集。每个文件夹图片对应有一个同名的.json标注信息文件。

使用maskrcnn训练自己的数据集_训练

由于原始图片是各种尺寸,而mask-rcnn样本需要归一化为统一尺寸。这里参考inspect_data.ipynb文件的处理方式。重新建立了一个train_sample_convert.ipynb文件。

其作用是将datasets目录下train,val两个文件夹里的图片尺寸归一化,并将标定信息改变对应到新图像上,将新图像及标定信息存储在samples文件夹内。

文件内容如下:

import os
import sys
import random
import visualize
#import model as modellib
import utils
import json
import skimage.draw
import numpy as np
from model import log
from config import Config


# 获取该project的根目录
ROOT_DIR = os.path.abspath('')
sys.path.append(ROOT_DIR)

#原始数据datasets文件夹
ORIDATA_DIR = os.path.join(ROOT_DIR, "datasets")

#原始数据文件夹下要转换的目录列表
oridirs = ['train', 'val']
#归一化后图像文件夹
NORDATA_DIR = os.path.join(ROOT_DIR,'samples')


class CoalMineConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "CoalMine"
    
    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 512
    IMAGE_MAX_DIM = 512
 
config = CoalMineConfig()
#config.display()

def mkdir(path):
    # 引入模块
    import os
 
    # 去除首位空格
    path=path.strip()
    # 去除尾部 \ 符号
    path=path.rstrip("\\")
 
    # 判断路径是否存在
    # 存在     True
    # 不存在   False
    isExists=os.path.exists(path)
 
    # 判断结果
    if not isExists:
        # 如果不存在则创建目录
        # 创建目录操作函数
        os.makedirs(path) 
 
        print (path+'创建成功')
        return True
    else:
        # 如果目录存在则不创建,并提示目录已存在
        print (path+'目录已存在')
        return False


class DatasetChange(utils.Dataset):
    """Generates the shapes synthetic dataset. The dataset consists of simple
    shapes (triangles, squares, circles) placed randomly on a blank surface.
    The images are generated on the fly. No file access required.
    """

    def change_sample(self, dataset_dir, subset,config):
        """定义数据集有哪些类别.
        dataset_dir: 数据集根路径.
        subset: train or val
        """
 
        # 加载训练集还是测试集
        assert subset in ["train", "val"]
        
        dataset_dir = os.path.join(dataset_dir, subset)
        print("原始文件夹:",dataset_dir)
        newdate_dir = os.path.join(NORDATA_DIR,subset)
        print("转换后文件夹:",newdate_dir)
        mkdir(newdate_dir)
 
        sublist = os.listdir(dataset_dir)
        imagelist = [];
        jsonlist  = [];
        for i in range(0, len(sublist)):
            path = os.path.join(dataset_dir, sublist[i])
            if os.path.isfile(path) and path.lower().endswith(".jpg"):
                jsonname = path[:-3]+"json"
                if os.path.isfile(jsonname):
                    imagelist.append(sublist[i]);
                    jsonlist.append(sublist[i][:-3]+"json");
        print("转换",len(imagelist),"张图片")
        
        #坐标变换函数
        change = lambda array, rate, step : array*rate+step+0.5
        
        for  i in range(len(imagelist)):
            fo = open(os.path.join(dataset_dir, jsonlist[i]),encoding='utf-8')
            text = fo.read()
            fo.close()
            if text.startswith(u'\ufeff'):
                text = text.encode('utf8')[3:].decode('utf8')
            annotations = json.loads(text)
            
            if not annotations['regions']:
                print(imagelist[i], "no regions")
                continue

            image_path = os.path.join(dataset_dir, imagelist[i])
            new_path = os.path.join(newdate_dir, imagelist[i])
            new_json = os.path.join(newdate_dir, jsonlist[i])
            
            #按限定尺寸改变图像大小 并保存
            image = skimage.io.imread(image_path)
            if image.ndim != 3:
                image = skimage.color.gray2rgb(image)
            image, window, scale, padding = utils.resize_image(image, min_dim=config.IMAGE_MIN_DIM, 
                                                               max_dim=config.IMAGE_MAX_DIM,padding=config.IMAGE_PADDING)
            skimage.io.imsave(new_path, image)
            
            #转变坐标值
            for i in range(len(annotations['regions']) ):
                xarray = np.array(annotations['regions'][i]['shape_attributes']['all_points_x'],float)
                xarray = np.array( change(xarray, scale, padding[1][0]), int)
                annotations['regions'][i]['shape_attributes']['all_points_x'] = xarray.tolist()
                yarray = np.array(annotations['regions'][i]['shape_attributes']['all_points_y'],float)
                yarray = np.array( change(yarray, scale, padding[0][0]), int)
                annotations['regions'][i]['shape_attributes']['all_points_y'] = yarray.tolist()
                
            with open(new_json, "w") as f:
                json.dump(annotations, f,separators=(',',':'),ensure_ascii=False)    


#转换样本图片
for dir in oridirs:
    dataset_ori = DatasetChange()
    dataset_ori.change_sample(ORIDATA_DIR, dir,config)

执行完上面文件,可以在samples目录下得到归一化转换完成的train,val两个样本文件夹。

开始训练

训练文件仿照train_shapes.ipynb,重新新建一个train_sample.ipynb

代码如下:

import os
import sys
import random
import visualize
import model as modellib
import utils
import json
import skimage.draw
import numpy as np
from model import log
from config import Config


# 获取该project的根目录
ROOT_DIR = os.path.abspath('')
# 存放训练模型的logs路径
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
 
# 加载预训练的COCO权重
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
sys.path.append(ROOT_DIR)

#samples文件夹下是归一化的训练集和验证集
BALLOON_DIR = os.path.join(ROOT_DIR, "samples")


class NewDateSetConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "NewDateSet"
    
    # TITAN X有12gb显存可以一次装两张,因此设置为2
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    
    # 类别的数目(包括背景)   
    NUM_CLASSES = 1 + 2
    
    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 128
    IMAGE_MAX_DIM = 128
    
    # 每一个epoch迭代多少次
    STEPS_PER_EPOCH = 500
    
    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 32

    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 5
    
    # 学习率和动量,不同的优化器有不同的配置,而且不同框架之间的实现也不一样,因此需要自己尝试
    LEARNING_RATE = 0.001
    LEARNING_MOMENTUM = 0.9
    # 权重衰减因子
    WEIGHT_DECAY = 0.0001

config = NewDateSetConfig()
#config.display()


class NewDataSetDataset(utils.Dataset):
    """Generates the shapes synthetic dataset. The dataset consists of simple
    shapes (triangles, squares, circles) placed randomly on a blank surface.
    The images are generated on the fly. No file access required.
    """

    def load_newdataset(self, dataset_dir, subset):
        """定义数据集有哪些类别.
        dataset_dir: 数据集根路径.
        subset: train or val
        """
        # 添加类别的名称和ID号,在这里,为了简便,我只添加一类:‘car’,如果是多类,挨着添加即可,例如我还有‘cat’,‘dog’....
        self.add_class("newdataset", 1, "stone")
        self.add_class("newdataset", 2, "ironbolt")
        #self.add_class("newdataset", 3, "dog")
 
        # 加载训练集还是测试集?
        assert subset in ["train", "val"]
        dataset_dir = os.path.join(dataset_dir, subset)
 
        imagelist = [];
        jsonlist  = [];
        sublist = os.listdir(dataset_dir)
        for i in range(0, len(sublist)):
            path = os.path.join(dataset_dir, sublist[i])
            if os.path.isfile(path) and path.lower().endswith(".jpg"):
                jsonname = path[:-3]+"json"
                if os.path.isfile(jsonname)
                    imagelist.append(sublist[i]);
                    jsonlist.append(sublist[i][:-3]+"json");
        
        
        for  i in range(len(imagelist)):
            fo = open(os.path.join(dataset_dir, jsonlist[i]),encoding='utf-8')
            text = fo.read()
            fo.close()
            if text.startswith(u'\ufeff'):
                text = text.encode('utf8')[3:].decode('utf8')
            annotations = json.loads(text)
            
            #print(annotations)
            # 加载图片
            # 获取组成每个物体实例轮廓的多边形点的x、y坐标。
            # 这些坐标保存在r['shape_attributes'中,(参见上面的json格式)
            #polygons = [r['shape_attributes'] for r in a['regions'].values()]
            polygons = [r['shape_attributes'] for r in annotations['regions']]
            region = [r['region_attributes'] for r in annotations['regions']]
            image_path = os.path.join(dataset_dir, annotations['filename'])
            image = skimage.io.imread(image_path)
            height, width = image.shape[:2]

            self.add_image(
                "newdataset",
                image_id=annotations['filename'],
                path=image_path,
                width=width, height=height,
                polygons=polygons, region=region)
 
    def load_mask(self, image_id):
        """为图像生成实例mask.
        Returns:
        masks:  一个bool数组,每个实例一个mask,
                其中每个mask为:[高, 宽, 数目]
        class_ids: 每个mask对应的类别.
        """
 
        image_info = self.image_info[image_id]
        if image_info["source"] != "newdataset":
            return super(self.__class__, self).load_mask(image_id)
 
        # 将多边形转换为一个二值mask,[高, 宽, 数目]
        info = self.image_info[image_id]
        # print("这是P",info["height"], info["width"], len(info["polygons"]))
        mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
                        dtype=np.uint8)
        for i, p in enumerate(info["polygons"]):
            # 获取多边形内的像素索引并将其设置为1
            #rr, cc = skimage.draw.rectangle((p['y'], p['x']), extent=(p['height'], p['width']))
            rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
            mask[rr, cc, i] = 1
        region = image_info["region"]
        class_ids = np.array([self.class_names.index(s["type"]) for s in region])
        # 返回mask和类别的信息
        return mask, class_ids.astype(np.int32)
 
    def image_reference(self, image_id):
        """Return the shapes data of the image."""
        info = self.image_info[image_id]
        if info["source"] == "newdataset":
            return info["newdataset"]
        else:
            super(self.__class__).image_reference(self, image_id)



#加载训练集
dataset_train = NewDataSetDataset()
dataset_train.load_newdataset(BALLOON_DIR, "train")
dataset_train.prepare()

#打印出训练集信息
print("Train Image Count: {}".format(len(dataset_train.image_ids)))
print("Class Count: {}".format(dataset_train.num_classes))
for i, info in enumerate(dataset_train.class_info):
    print("{:3}. {:50}".format(i, info['name']))
 
#从训练集中选出4张用与显示训练集是否正确
image_ids = dataset_train.image_ids[:2]
for image_id in image_ids:
    image = dataset_train.load_image(image_id)
    mask, class_ids = dataset_train.load_mask(image_id)
    visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)
 
#加载验证集
dataset_val = NewDataSetDataset()
dataset_val.load_newdataset(BALLOON_DIR, "val")
dataset_val.prepare()
 
#打印出验证集信息
print("Val Image Count: {}".format(len(dataset_val.image_ids)))
print("Class Count: {}".format(dataset_val.num_classes))
for i, info in enumerate(dataset_val.class_info):
    print("{:3}. {:50}".format(i, info['name']))
 
#从验证集中选出4张用与显示验证集是否正确
image_ids = np.random.choice(dataset_val.image_ids, 2)
for image_id in image_ids:
    image = dataset_val.load_image(image_id)
    mask, class_ids = dataset_val.load_mask(image_id)
    visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)



#创建一个训练模型
model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=MODEL_DIR)
 
#设定权重的初始化方式,有imagenet,coco,last三种
init_with = "coco"
if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    model.load_weights(model.find_last()[1], by_name=True)



#设定训练参数,如学习率,epoch等
model.train(dataset_train, dataset_val, learning_rate=config.LEARNING_RATE, epochs=500, layers="heads")


# Save weights
# Typically not needed because callbacks save after every epoch
# Uncomment to save manually
model_path = os.path.join(MODEL_DIR, "mask_rcnn_cocoNew.h5")
model.keras_model.save_weights(model_path)

等训练完成后,会在模型文件夹保存mask_rcnn_cocoNew.h5权重文件。

本文代码在ubuntu下运行验证,windows下运行可能遇到标定信息文件读取编码问题,可尝试修改文件encoding参数。

运行中出现什么问题,欢迎留言探讨!