opencvdnn模块实例 dnn opencv

转载

mob64ca140a59b0 2024-04-28 22:49:21

文章标签 opencvdnn模块实例 DNN #include 数据 文章分类 计算机视觉人工智能

在学习过caffemodel加载之后，回头看看这个dnn里面都编译了哪些函数？

先看blob头文件：

#ifndef __OPENCV_DNN_DNN_BLOB_HPP__
#define __OPENCV_DNN_DNN_BLOB_HPP__
#include <opencv2/core.hpp>
#include <vector>
#include <ostream>

namespace cv
{
namespace dnn
{
//该类用来存储和处理blob
    struct BlobShape
    {
        explicit BlobShape(int ndims = 4, int fill = 1);    //!< Creates n-dim shape and fill its by @p fill
        BlobShape(int num, int cn, int rows, int cols);     //创建4-dim shape [num,cn,rows,cols]
        BlobShape(int ndims, const int *sizes);             //创建n-dim的数组
        BlobShape(const std::vector<int> &sizes);           //创建n-dim的vector
        template<int n>
        BlobShape(const Vec<int, n> &shape);                //!< Creates n-dim shape from @ref cv::Vec

	//返回维度的数目
        int dims() const;
	//返回axis的尺度地址，最后的一个axis，为-1，如果不存在，则报错
        int &size(int axis);

        //返回axis的大小
        int size(int axis) const;
	//和size(axis)操作一样
        int operator[](int axis) const; 
	//和size(int) const操作一样
        int &operator[](int axis);

	//和size(int) const操作一样，但是如果不存在axis就返回1.
        int xsize(int axis) const;

	//返回axes的所有大小
        ptrdiff_t total();
	//返回连续数组的第一个元素的指针
        const int *ptr() const;

	//判断两个blob是否相等
        bool equal(const BlobShape &other) const;

        bool operator== (const BlobShape &r) const;

    private:
        cv::AutoBuffer<int,4> sz;
    };

    //提供连续n维cpu和gpu数组的计算方法，该方法支持CPU和GPU的切换和同步
    class CV_EXPORTS Blob
    {
    public:
        explicit Blob();
	//重构blob的尺寸和类型
        explicit Blob(const BlobShape &shape, int type = CV_32F);

        /** @brief Constucts 4-dimensional blob (so-called batch) from image or array of images.
         * @param image 2-dimensional multi-channel or 3-dimensional single-channel image (or array of images)
         * @param dstCn specify size of second axis of ouptut blob
        */
        explicit Blob(InputArray image, int dstCn = -1);
	//创建一个特定维度和类型的blob
        void create(const BlobShape &shape, int type = CV_32F);
	//创建从cv::Mat或cv::UMat获取的数据blob
        void fill(InputArray in);
 
	//如果dedpCopy为false这CPU数据不被分配，然后创建blob
        void fill(const BlobShape &shape, int type, void *data, bool deepCopy = true);

        Mat& matRef();                      //返回cv::mat的地址，包含blob数据
        const Mat& matRefConst() const;     //返回cv::mat的地址，包含blob只读数据
        UMat &umatRef();                    //返回cv::umat的地址，包含blob数据（没有补充数据）
        const UMat &umatRefConst() const;   //返回cv::umat的地址，包含只读blob数据（没有补充数据）
        //返回blob的维度
        int dims() const;
        int size(int axis) const;
        int xsize(int axis) const;
	//计算维度区间，左边是第一维的值包括该值，右边则不包括最后一维的值
        size_t total(int startAxis = 0, int endAxis = INT_MAX) const;

        
	//如果axis范围在0到dims范围之内，则将axis索引转化为标准格式
        int canonicalAxis(int axis) const;

        //返回blob的shape
        BlobShape shape() const;

	//判断两个blob是否相同
        bool equalShape(const Blob &other) const;
	//获取blob的前两维层
        Mat getPlane(int n, int cn);

	//4维blob的shape获取
        int cols() const;       //返回第四维的列数
        int rows() const;       //返回第三维的行数
        int channels() const;   //返回第二维通道数大小
        int num() const;        //返回第一维blob的大小
        Size size2() const;     //返回行和列的尺寸
        Vec4i shape4() const;   //返回前四个blob的axes

        //返回blob中的元素线性索引的坐标
	//如果n<dims()然后未规定的坐标用0，填充
	//如果n>dims()然后多余的坐标被忽视
        template<int n>
        size_t offset(const Vec<int, n> &pos) const;
        //重载offset
        size_t offset(int n = 0, int cn = 0, int row = 0, int col = 0) const;

	//CPU指针的获取
	//返回存储在CPU位置的blob元素指针
	//n与第一个axis相连，cn是第二个axis
	//如果dims()>4然后未规定的坐标用0，填充
	//如果dims()<4然后多余的坐标被忽视
        uchar *ptr(int n = 0, int cn = 0, int row = 0, int col = 0);
        //重载
        template<typename TFloat>
        TFloat *ptr(int n = 0, int cn = 0, int row = 0, int col = 0);
        
	//重载浮点指针
        float *ptrf(int n = 0, int cn = 0, int row = 0, int col = 0);
        //TODO: add const ptr methods
	//和其他blob共享数据，并返回this指针
        Blob &shareFrom(const Blob &blob);

        //重新构建blob的shape
        Blob &reshape(const BlobShape &shape);

        //返回blob的类型
        int type() const;

    private:
        const int *sizes() const;

        Mat m;
    };

//! @}
}
}

#include "blob.inl.hpp"

#endif

其实存储少不了的就是字典，对于查询和使用相当方便，所以看看dict.hpp：

#ifndef __OPENCV_DNN_DNN_DICT_HPP__
#define __OPENCV_DNN_DNN_DICT_HPP__

#include <opencv2/core.hpp>
#include <map>
#include <ostream>

namespace cv
{
namespace dnn
{
//这个结构体将存储标量值或者数组，有一下类型：double类型，cv::String类型，int64类型（使用很少，因为double可以至少存储2^52整数）
struct DictValue
{
    DictValue(const DictValue &r);
    DictValue(int p = 0)        : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = p; }       //!< Constructs integer scalar
    DictValue(unsigned p)       : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = p; }       //!< Constructs integer scalar
    DictValue(double p)         : type(Param::REAL), pd(new AutoBuffer<double,1>) { (*pd)[0] = p; }     //!< Constructs floating point scalar
    DictValue(const String &p)  : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = p; }   //!< Constructs string scalar
    
    template<typename TypeIter>
    static DictValue arrayInt(TypeIter begin, int size);    //!< Constructs integer array
    template<typename TypeIter>
    static DictValue arrayReal(TypeIter begin, int size);   //!< Constructs floating point array
    template<typename TypeIter>
    static DictValue arrayString(TypeIter begin, int size); //!< Constructs array of strings
    
    template<typename T>
    T get(int idx = -1) const; //将带索引的数组元素转换为要求的类型.

    int size() const;
    //判断类型函数
    bool isInt() const;
    bool isString() const;
    bool isReal() const;

    DictValue &operator=(const DictValue &r);

    friend std::ostream &operator<<(std::ostream &stream, const DictValue &dictv);

    ~DictValue();

private:

    int type;

    union
    {
        AutoBuffer<int64, 1> *pi;
        AutoBuffer<double, 1> *pd;
        AutoBuffer<String, 1> *ps;
        void *p;
    };

    DictValue(int _type, void *_p) : type(_type), p(_p) {}
    void release();
};

/** @brief This class implements name-value dictionary, values are instances of DictValue. */
class CV_EXPORTS Dict
{
    typedef std::map<String, DictValue> _Dict;
    _Dict dict;

public:

    
    //检查字典中key值是否存在
    bool has(const String &key);
    //如果key在字典中，然后返回指针的值，否则返回空
    DictValue *ptr(const String &key);
    //如果key在字典中，然后返回指针的值，否则返回错误
    const DictValue &get(const String &key) const;

    //重载
    template <typename T>
    T get(const String &key) const;

    //如果key在字典中，然后返回指针的值，否则返回错误值
    template <typename T>
    T get(const String &key, const T &defaultValue) const;

    //设置新的键值为key或者添加新的key-value对到字典
    template<typename T>
    const T &set(const String &key, const T &value);

    friend std::ostream &operator<<(std::ostream &stream, const Dict &dict);
};

//! @}
}
}

#endif

然后看看dnn.hpp头文件有什么特别之处：

#ifndef __OPENCV_DNN_DNN_HPP__
#define __OPENCV_DNN_DNN_HPP__

#include <vector>
#include <opencv2/core.hpp>
#include <opencv2/dnn/dict.hpp>
#include <opencv2/dnn/blob.hpp>

namespace cv
{
namespace dnn //! This namespace is used for dnn module functionlaity.
{
    //dnn的初始化和layers的建立
    CV_EXPORTS void initModule();

    //这个类为初始化网络提供所有的数据，它包括标量参数字典（参数通过字典地址来读取），blob参数和可选择的元信息（name和layer对象）
    struct CV_EXPORTS LayerParams : public Dict
    {
        std::vector<Blob> blobs; //学习的参数list存储为blob.

        String name; //layer层的名字
        String type; //通过layer工厂创建的layer层名字类型
    };
    //这个接口类允许创建新的layer。从layer派生的每个类必须实现allocate()内存方法来声明它的输出和前向传播计算的输出。在使用新的类之前必须要在layer工厂中注册新的
    //类
    struct CV_EXPORTS Layer
    {
        //学习的参数必须存储在Net::getParam()能够读取到的地方
        std::vector<Blob> blobs;
	//内存的缓冲和blobs的输出必须考虑到输入的shape，参数input是输入blob的vector，out是输出blobs的vector（必须分配内存）
	//这个方法根据输入blob和internal层的参数shape要求产生blob，如果这个函数第一次使用，然后输出包含空的blob vector的尺寸有输出链接决定的。如果输入的blob尺
	//寸被改变了，然后这个方法称为多尺度（翻译不是很恰当）
        virtual void allocate(const std::vector<Blob*> &input, std::vector<Blob> &output) = 0;
	//前向传播，in是输入，out是输出结果
        virtual void forward(std::vector<Blob*> &input, std::vector<Blob> &output) = 0;
	//返回输入数组的索引
	//输入参数是blob的label名字
	//每一层的输入输出通过%<layer_name%>[.output_name]被标记
	//这种方法将输入的label与输入的vector映射起来
        virtual int inputNameToIndex(String inputName);
	//返回输出数组的index
        virtual int outputNameToIndex(String outputName);

        String name; //!< Name of the layer instance, can be used for logging or other internal purposes.
        String type; //!< Type name which was used for creating layer by layer factory.

        Layer();
        explicit Layer(const LayerParams ¶ms); //!< Initialize only #name, #type and #blobs fields.
        virtual ~Layer();
    };
    //这个类允许创建和操作复杂的神经网络。神经网络是一种有向图，顶点是layer的实例化，边是输入输出层之间的关系
    //每个网络层有唯一的ID和唯一的name。
    //LayerId可以存储每个layer的name和layer的id
    class CV_EXPORTS Net
    {
    public:

        Net();  //!< Default constructor.
        ~Net(); //!< Destructor frees the net only if there aren't references to the net anymore.
	//添加一个新的layer到net类中。
	//参数name是添加layer的name。
	//参数类型是经过类型注册后的layer的类型名字。
	//params是初始化layer的。
	//返回创建的layer的ID或者失败返回-1.
        int addLayer(const String &name, const String &type, LayerParams ¶ms);
	//添加层和链接前一层的输出与后一层的输入的添加层
        int addLayerToPrev(const String &name, const String &type, LayerParams ¶ms);
	//转换layer的名字为整数ID，返回值为layer的id，如果layer错误返回-1.
        int getLayerId(const String &layer);
	//封装字符串和整数
        typedef DictValue LayerId;
        //删除layer层
        void deleteLayer(LayerId layer);

        /** 
         * Descriptors have the following template <DFN><layer_name>[.input_number]</DFN>:
         * - the first part of the template <DFN>layer_name</DFN> is sting name of the added layer.
         *   If this part is empty then the network input pseudo layer will be used;
         * - the second optional part of the template <DFN>input_number</DFN>
         *   is either number of the layer input, either label one.
         *   If this part is omitted then the first layer input will be used.
         *
         *  @see setNetInputs(), Layer::inputNameToIndex(), Layer::outputNameToIndex()
         */
	//链接上一层和下一层，上一层的输出作为下一层的输入。
        void connect(String outPin, String inpPin);
        void connect(int outLayerId, int outNum, int inpLayerId, int inpNum);
        // As any other layer, this layer can label its outputs and this function provides an easy way to do this.
	//设置网络输入虚拟层的输出名字，每个网络允许有自己的网络输入虚拟层，id为0.
	//这一层存储使用者的数据，不进行任何计算。
	//事实上，这一层layer提供唯一的方法将自己的数据输入到网络。
	//
        void setNetInputs(const std::vector<String> &inputBlobNames);
        void forward();
        void forward(LayerId toLayer);
	//从网络开始层进行计算，前向传播到输出层
        void forward(LayerId startLayer, LayerId toLayer);
     
        void forward(const std::vector<LayerId> &startLayers, const std::vector<LayerId> &toLayers);
	//优化前向传播，不实现网络，在前面的前向传播之后，进行这些层的前向传播是不变的。
        void forwardOpt(LayerId toLayer);
        /** @overload */
        void forwardOpt(const std::vector<LayerId> &toLayers);
	//设置新的值给输出的blob。
	//outputName是更新层输出blob的描述
	//blob是新的blob
	//如果更新blob不是空，这blob必须有相同shape。
        void setBlob(String outputName, const Blob &blob);
	//返回输出层
	//outputName输出层blob的名字
        Blob getBlob(String outputName);
	//设置新的值给学习参数层。
	//numParam在blob的数组中layer参数的索引。
	//新的blob的值
	//如果新的blob的shape与前面的blob的shape不同，前向传播可能失败
        void setParam(LayerId layer, int numParam, const Blob &blob);
	//返回blob的layer参数
        Blob getParam(LayerId layer, int numParam = 0);

    private:

        struct Impl;
        Ptr<Impl> impl;
    };

    /** @brief Small interface class for loading trained serialized models of different dnn-frameworks. */
    //导入不同的训练好的模型
    class Importer
    {
    public:

	//添加加载的layer到net和layers之间的连接
        virtual void populateNet(Net net) = 0;

        virtual ~Importer();
    };

    //创建网络框架，参数prototxt为配置文件路劲；caffemodel为模型路径。返回导入接口指针，失败则返回空。
    CV_EXPORTS Ptr<Importer> createCaffeImporter(const String &prototxt, const String &caffeModel = String());

    /** @brief Creates the importer of <a href="http://torch.ch">Torch7</a> framework network.
     *  @param filename path to the file, dumped from Torch by using torch.save() function.
     *  @param isBinary specifies whether the network was serialized in ascii mode or binary.
     *  @returns Pointer to the created importer, NULL in failure cases.
     *
     *  @warning Torch7 importer is experimental now, you need explicitly set CMake opencv_dnn_BUILD_TORCH_IMPORTER flag to compile its.
     *
     *  @note Ascii mode of Torch serializer is more preferable, because binary mode extensively use long type of C language,
     *  which has different bit-length on different systems.
     *
     * The loading file must contain serialized <a href="https://github.com/torch/nn/blob/master/doc/module.md">nn.Module</a> object
     * with importing network. Try to eliminate a custom objects from serialazing data to avoid importing errors.
     *
     * List of supported layers (i.e. object instances derived from Torch nn.Module class):
     * - nn.Sequential
     * - nn.Parallel
     * - nn.Concat
     * - nn.Linear
     * - nn.SpatialConvolution
     * - nn.SpatialMaxPooling, nn.SpatialAveragePooling
     * - nn.ReLU, nn.TanH, nn.Sigmoid
     * - nn.Reshape
     *
     * Also some equivalents of these classes from cunn, cudnn, and fbcunn may be successfully imported.
     */
    //这个是torch导入接口，因为torch不熟，所以就不多介绍。
    CV_EXPORTS Ptr<Importer> createTorchImporter(const String &filename, bool isBinary = true);

    /** @brief Loads blob which was serialized as torch.Tensor object of Torch7 framework.
     *  @warning This function has the same limitations as createTorchImporter().
     */
    CV_EXPORTS Blob readTorchBlob(const String &filename, bool isBinary = true);

//! @}
}
}

#include <opencv2/dnn/layer.hpp>
#include <opencv2/dnn/dnn.inl.hpp>

#endif  /* __OPENCV_DNN_DNN_HPP__ */

最后一部分是layer.hpp:

#ifndef __OPENCV_DNN_LAYER_HPP__
#define __OPENCV_DNN_LAYER_HPP__
#include <opencv2/dnn.hpp>

namespace cv
{
namespace dnn
{
//注册新的layer的工厂模式
class CV_EXPORTS LayerFactory
{
public:
    //每一个layer类必须提供这个函数给factory
    typedef Ptr<Layer>(*Constuctor)(LayerParams ¶ms);
    //注册layer类的类型名字和构造器
    static void registerLayer(const String &type, Constuctor constructor);

    //没有注册的layer类的type name
    static void unregisterLayer(const String &type);
    //创建注册类的实例化，参数有layer名字，初始化layer的参数
    static Ptr<Layer> createLayerInstance(const String &type, LayerParams& params);

private:
    LayerFactory();

    struct Impl;
    static Ptr<Impl> impl();
};

//注册类构造器的运行时间，输入参数有layer的名字，创建注册layer的constructor函数的指针，此宏必须放置在函数代码中。
#define REG_RUNTIME_LAYER_FUNC(type, constuctorFunc) \
    LayerFactory::registerLayer(#type, constuctorFunc);
//注册类的运行时间，输入参数有layer的名字，class是c++ 类，来自layer，此宏必须放置在函数代码中。
#define REG_RUNTIME_LAYER_CLASS(type, class) \
    LayerFactory::registerLayer(#type, _layerDynamicRegisterer<class>);
//在模块加载时注册层构造函数，输入参数有layer的名字，创建注册layer的constructor函数的指针，此宏必须放置在函数代码中。
#define REG_STATIC_LAYER_FUNC(type, constuctorFunc) \
static _LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constuctorFunc);
//在模块加载时注册层构造函数，输入参数有layer的名字，class是c++ 类，来自layer，此宏必须放置在函数代码中。
#define REG_STATIC_LAYER_CLASS(type, class)                         \
Ptr<Layer> __LayerStaticRegisterer_func_##type(LayerParams ¶ms) \
    { return Ptr<Layer>(new class(params)); }                       \
static _LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, __LayerStaticRegisterer_func_##type);
//下面的类模板是动态注册
template<typename LayerClass>
Ptr<Layer> _layerDynamicRegisterer(LayerParams ¶ms)
{
    return Ptr<Layer>(new LayerClass(params));
}
//允许在模块加载是自动注册创建layer
struct _LayerStaticRegisterer
{
    String type;

    _LayerStaticRegisterer(const String &type, LayerFactory::Constuctor constuctor)
    {
        this->type = type;
        LayerFactory::registerLayer(type, constuctor);
    }

    ~_LayerStaticRegisterer()
    {
        LayerFactory::unregisterLayer(type);
    }
};

}
}
#endif

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。