基于大幅遥感图像的目标检测划框预测过程（水平框）

原创

wx62b9325dd56a7 2022-06-27 15:54:34 博主文章分类：目标检测 ©著作权

文章标签 ide 数据 sed 文章分类 OpenStack 云计算

©著作权归作者所有：来自51CTO博客作者wx62b9325dd56a7的原创作品，请联系作者获取转载授权，否则将追究法律责任

前言

在遥感影像场景中，我们所面对的数据的尺寸大多都是成千上万像素的，这大大超出了目前显卡所能承受的范围。因此这篇博文将简单介绍下如何在大幅遥感影像中进行目标检测。
基于大幅遥感图像的目标检测划框预测过程（水平框）_sed

划框预测

简单描述下预测的过程：

设置划框的大小a，划框之间的重叠度b；
计算框的移动长度c，c=a-b;
对原图像进行两次镜像延展，第一次延展的长度为能够包含原图像的c的最大整数倍与原图像size的差值，第二次延展为重叠度b；
按照从左往右，从上到下的顺序进行划框预测，步长为c;
汇总所有的box,label,score,进行非极大值抑制；
对框进行过滤，对于左上角（xmin,ymin）的点，舍弃超出原图范围的框；
对框的大小进行限制，对于右下角（xmax,ymax）不能超出原图范围。

predict.py

import cv2
import gc
import os

from torch.autograd import Variable
from eval.dataset_eval import build_dataloader
from effdet import EfficientDet, DetBenchEval
from effdet.config import get_efficientdet_config
from effdet.efficientdet import HeadNet
from eval.wbf import *
from itertools import product
from eval.tta import *


def load_net(cfg):
    config = get_efficientdet_config(cfg.model_name)
    net = EfficientDet(config, pretrained_backbone=False)

    config.num_classes = cfg.num_classes
    config.image_size = cfg.image_size
    net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01))

    checkpoint = torch.load(cfg.checkpoint_path)
    net.load_state_dict(checkpoint['model_state_dict'])

    del checkpoint
    gc.collect()

    net = DetBenchEval(net, config)
    net.eval()
    return net.cuda()


def make_predictions(images, net, score_threshold=0.11):
    images = Variable(torch.from_numpy(np.array(images)).cuda().float())
    predictions = []
    with torch.no_grad():
        det = net(images, torch.tensor([1]*images.shape[0]).float().cuda())
        for i in range(images.shape[0]):
            boxes = det[i].detach().cpu().numpy()[:, :4]
            scores = det[i].detach().cpu().numpy()[:, 4]
            indexes = np.where(scores > score_threshold)[0]
            boxes = boxes[indexes]
            boxes[:, 2] = boxes[:, 2] + boxes[:, 0]
            boxes[:, 3] = boxes[:, 3] + boxes[:, 1]
            predictions.append({
                'boxes': boxes[indexes],
                'scores': scores[indexes],
            })
    return [predictions]


def make_tta_predictions(images, net, image_size, score_threshold=0.5):
    tta_transforms = []
    for tta_combination in product([TTAHorizontalFlip(image_size), None],
                                   [TTAVerticalFlip(image_size), None],
                                   [TTARotate90(image_size), None]):
        tta_transforms.append(TTACompose([tta_transform for tta_transform in tta_combination if tta_transform]))
    with torch.no_grad():
        images = Variable(torch.from_numpy(np.array(images)).cuda().float())
        predictions = []
        for tta_transform in tta_transforms:
            result = []
            det = net(tta_transform.batch_augment(images.clone()), torch.tensor([1]*images.shape[0]).float().cuda())

            for i in range(images.shape[0]):
                boxes = det[i].detach().cpu().numpy()[:,:4]
                scores = det[i].detach().cpu().numpy()[:,4]
                labels = det[i].detach().cpu().numpy()[:,5]
                indexes = np.where(scores > score_threshold)[0]
                boxes = boxes[indexes]
                boxes[:, 2] = boxes[:, 2] + boxes[:, 0]
                boxes[:, 3] = boxes[:, 3] + boxes[:, 1]
                boxes = tta_transform.deaugment_boxes(boxes.copy())
                result.append({
                    'boxes': boxes,
                    'scores': scores[indexes],
                    'labels': labels[indexes],
                })
            predictions.append(result)
    return predictions


def run_wbf(predictions, image_index, image_size=512, iou_thr=0.44, skip_box_thr=0.43, weights=None):
    boxes = [(prediction[image_index]['boxes']/(image_size-1)).tolist()  for prediction in predictions]
    scores = [prediction[image_index]['scores'].tolist() for prediction in predictions]
    labels = [prediction[image_index]['labels'].tolist() for prediction in predictions]
    boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    boxes = boxes*(image_size-1)
    return boxes, scores, labels


def run_wbf2(boxes, scores, labels, image_size, iou_thr=0.44, skip_box_thr=0.43):
    boxes = [(box/(image_size-1)).tolist() for box in boxes]
    scores = [score.tolist() for score in scores]
    labels = [label.tolist() for label in labels]
    boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    boxes = boxes*(image_size-1)
    return boxes, scores, labels


# Color map for bounding boxes of detected objects from https://sashat.me/2017/01/11/list-of-20-simple-distinct-colors/
distinct_colors = {1: (0, 0, 255), 2: (0, 255, 0), 3: (255, 0, 0), 4: (255, 255, 0), 5: (0, 255, 255),
                   6: (50, 50, 50), 7: (0, 50, 50), 8: (50, 0, 50), 9: (100, 255, 100), 10: (255, 100, 255),
                   11: (0, 50, 50), 12: (0, 0, 50), 13: (50, 0, 0), 14: (100, 0, 100), 15: (0, 100, 255),
                   16: (0, 150, 255), 17: (150, 255, 0), 18: (255, 150, 0), 19: (255, 255, 150), 20: (150, 255, 255),
                   }


def get_key(dct, value):
     return [k for (k, v) in dct.items() if v == value]


def mkdir(path):
    if not os.path.exists(path):
        os.mkdir(path)


if __name__ == '__main__':
    import matplotlib.pyplot as plt
    import glob
    import tqdm
    from eval.config_eval import Config

    cfg = Config()
    imglist = glob.glob(f'{cfg.DATA_ROOT_PATH}/*.jpg')
    mkdir(cfg.out_dir)
    net = load_net(cfg)
    font = cv2.FONT_HERSHEY_SIMPLEX  # 定义字体
    font_size = 1
    frame_size = cfg.image_size - cfg.gap
    for j, imgPath in tqdm.tqdm(enumerate(imglist)):
        image_name = os.path.split(imgPath)[-1].split('.')[0]
        image = cv2.imread(imgPath, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        raw_image = image.copy()
        raw_h, raw_w = image.shape[:2]
        row = raw_h // frame_size + 1
        col = raw_w // frame_size + 1
        radius_h = row * frame_size - raw_h
        radius_w = col * frame_size - raw_w
        image = cv2.copyMakeBorder(image, 0, radius_h, 0, radius_w, cv2.BORDER_REFLECT)
        image = cv2.copyMakeBorder(image, 0, cfg.gap, 0, cfg.gap, cv2.BORDER_REFLECT)
        sample = raw_image.copy()
        boxes_, scores_, labels_ = [], [], []
        for i in tqdm.tqdm(range(row)):
            for j in range(col):
                image1 = image.copy()
                subImg = image1[i * frame_size:(i + 1) * frame_size + cfg.gap,
                                j * frame_size:(j + 1) * frame_size + cfg.gap, :]
                subImg /= 255.0
                subImg = np.transpose(subImg, (2, 0, 1))
                predictions = make_tta_predictions([subImg], net, cfg.image_size)
                index = 0
                # subImg = subImg.transpose(1, 2, 0)

                boxes, scores, labels = run_wbf(predictions, image_index=index, image_size=cfg.image_size)
                print(labels)
                boxes = boxes.astype(np.int32).clip(min=0, max=cfg.image_size - 1)
                boxes[:, 0] = boxes[:, 0] + j * frame_size
                boxes[:, 1] = boxes[:, 1] + i * frame_size
                boxes[:, 2] = boxes[:, 2] + j * frame_size
                boxes[:, 3] = boxes[:, 3] + i * frame_size
                boxes_.append(boxes)
                scores_.append(scores)
                labels_.append(labels)

                # fig, ax = plt.subplots(1, 1, figsize=(16, 8))
                #
                # for box, score, label in zip(boxes, scores, labels):
                #     color = distinct_colors[label]
                #     cv2.rectangle(sample, (box[0], box[1]), (box[2], box[3]), color, 3)
                #     text_location = (box[0] + 2, box[1] - 4)
                #     key = get_key(cfg.class_dict, label)[0]
                #     sample = cv2.putText(sample, f'{key} {score * 100:.2f}%', text_location, font,
                #                          fontScale=0.5, color=color)
                #
                # plt.subplot(131)
                # plt.imshow(subImg)
                # plt.subplot(132)
                # plt.imshow(sample.astype(np.uint8))
                # plt.subplot(133)
                # plt.imshow(image.astype(np.uint8))
                # plt.show()

        boxes, scores, labels = run_wbf2(boxes_, scores_, labels_, image_size=cfg.image_size)
        fig, ax = plt.subplots(1, 1, figsize=(16, 8))
        all_annotations = np.array([[box[0], box[1], box[2], box[3], score, label] for box, score, label in zip(boxes, scores, labels)])

        # 丢弃原图像边界外的框
        keep = (all_annotations[:, 0] < raw_w) & (all_annotations[:, 1] < raw_h)
        result_annotations = all_annotations[keep]
        # 限制xmax和ymax的值
        result_annotations[:, 2] = np.clip(result_annotations[:, 2], 0, raw_w)
        result_annotations[:, 3] = np.clip(result_annotations[:, 3], 0, raw_h)

        for ann in result_annotations:
            color = distinct_colors[int(ann[5])]
            cv2.rectangle(sample, (int(ann[0]), int(ann[1])), (int(ann[2]), int(ann[3])), color, 3)
            text_location = (int(ann[0]) + 2, int(ann[1]) - 4)
            key = get_key(cfg.class_dict, ann[5])[0]
            sample = cv2.putText(sample, f'{key} {ann[4]*100:.2f}%', text_location, font,
                                 fontScale=0.5, color=color)

        plt.imshow(sample.astype(np.uint8))
        plt.show()

wbf.py

# coding: utf-8
__author__ = 'ZFTurbo: https://kaggle.com/zfturbo'


import numpy as np


def bb_intersection_over_union(A, B):
    xA = max(A[0], B[0])
    yA = max(A[1], B[1])
    xB = min(A[2], B[2])
    yB = min(A[3], B[3])

    # compute the area of intersection rectangle
    interArea = max(0, xB - xA) * max(0, yB - yA)

    if interArea == 0:
        return 0.0

    # compute the area of both the prediction and ground-truth rectangles
    boxAArea = (A[2] - A[0]) * (A[3] - A[1])
    boxBArea = (B[2] - B[0]) * (B[3] - B[1])

    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou


def prefilter_boxes(boxes, scores, labels, weights, thr):
    # Create dict with boxes stored by its label
    new_boxes = dict()
    for t in range(len(boxes)):
        for j in range(len(boxes[t])):
            score = scores[t][j]
            if score < thr:
                continue
            label = int(labels[t][j])
            box_part = boxes[t][j]
            b = [int(label), float(score) * weights[t], float(box_part[0]), float(box_part[1]), float(box_part[2]), float(box_part[3])]
            if label not in new_boxes:
                new_boxes[label] = []
            new_boxes[label].append(b)

    # Sort each list in dict by score and transform it to numpy array
    for k in new_boxes:
        current_boxes = np.array(new_boxes[k])
        new_boxes[k] = current_boxes[current_boxes[:, 1].argsort()[::-1]]

    return new_boxes


def get_weighted_box(boxes, conf_type='avg'):
    """
    Create weighted box for set of boxes
    :param boxes: set of boxes to fuse 
    :param conf_type: type of confidence one of 'avg' or 'max'
    :return: weighted box
    """

    box = np.zeros(6, dtype=np.float32)
    conf = 0
    conf_list = []
    for b in boxes:
        box[2:] += (b[1] * b[2:])
        conf += b[1]
        conf_list.append(b[1])
    box[0] = boxes[0][0]
    if conf_type == 'avg':
        box[1] = conf / len(boxes)
    elif conf_type == 'max':
        box[1] = np.array(conf_list).max()
    box[2:] /= conf
    return box


def find_matching_box(boxes_list, new_box, match_iou):
    best_iou = match_iou
    best_index = -1
    for i in range(len(boxes_list)):
        box = boxes_list[i]
        if box[0] != new_box[0]:
            continue
        iou = bb_intersection_over_union(box[2:], new_box[2:])
        if iou > best_iou:
            best_index = i
            best_iou = iou

    return best_index, best_iou


def weighted_boxes_fusion(boxes_list, scores_list, labels_list, weights=None, iou_thr=0.55, skip_box_thr=0.0, conf_type='avg', allows_overflow=False):
    '''
    :param boxes_list: list of boxes predictions from each model, each box is 4 numbers. 
    It has 3 dimensions (models_number, model_preds, 4)
    Order of boxes: x1, y1, x2, y2. We expect float normalized coordinates [0; 1]
    :param scores_list: list of scores for each model 
    :param labels_list: list of labels for each model
    :param weights: list of weights for each model. Default: None, which means weight == 1 for each model
    :param iou_thr: IoU value for boxes to be a match
    :param skip_box_thr: exclude boxes with score lower than this variable  
    :param conf_type: how to calculate confidence in weighted boxes. 'avg': average value, 'max': maximum value
    :param allows_overflow: false if we want confidence score not exceed 1.0 
    
    :return: boxes: boxes coordinates (Order of boxes: x1, y1, x2, y2). 
    :return: scores: confidence scores
    :return: labels: boxes labels
    '''

    if weights is None:
        weights = np.ones(len(boxes_list))
    if len(weights) != len(boxes_list):
        print('Warning: incorrect number of weights {}. Must be: {}. Set weights equal to 1.'.format(len(weights), len(boxes_list)))
        weights = np.ones(len(boxes_list))
    weights = np.array(weights)

    if conf_type not in ['avg', 'max']:
        print('Unknown conf_type: {}. Must be "avg" or "max"'.format(conf_type))
        exit()

    filtered_boxes = prefilter_boxes(boxes_list, scores_list, labels_list, weights, skip_box_thr)
    if len(filtered_boxes) == 0:
        return np.zeros((0, 4)), np.zeros((0,)), np.zeros((0,))

    overall_boxes = []
    for label in filtered_boxes:
        boxes = filtered_boxes[label]
        new_boxes = []
        weighted_boxes = []

        # Clusterize boxes
        for j in range(0, len(boxes)):
            index, best_iou = find_matching_box(weighted_boxes, boxes[j], iou_thr)
            if index != -1:
                new_boxes[index].append(boxes[j])
                weighted_boxes[index] = get_weighted_box(new_boxes[index], conf_type)
            else:
                new_boxes.append([boxes[j].copy()])
                weighted_boxes.append(boxes[j].copy())

        # Rescale confidence based on number of models and boxes
        for i in range(len(new_boxes)):
            if not allows_overflow:
                weighted_boxes[i][1] = weighted_boxes[i][1] * min(weights.sum(), len(new_boxes[i])) / weights.sum()
            else:
                weighted_boxes[i][1] = weighted_boxes[i][1] * len(new_boxes[i]) / weights.sum()
        overall_boxes.append(np.array(weighted_boxes))

    overall_boxes = np.concatenate(overall_boxes, axis=0)
    overall_boxes = overall_boxes[overall_boxes[:, 1].argsort()[::-1]]
    boxes = overall_boxes[:, 2:]
    scores = overall_boxes[:, 1]
    labels = overall_boxes[:, 0]
    return boxes, scores, labels