1. 简介

NVIDIA Performance Primitives library (NPP)是一系列为加速进程的库函数,NPP的原始目标是针对图像和视频进程,而现在以被开发工程师应用于其它很多领域,包括信号处理。与同级别的纯CPU 函数相比,这些函数最高可实现5 - 10倍性能提升。利用NPP,开发者能够利用 (CUDA4.1中)2000多个图像处理与信号处理基元,在数小时之内即可实现应用程序的大幅性能提升。

无论用GPU加速的版本代替CPU基元还是将NPP基元与现有的GPU加速流水线相结合,NPP都能够实现极高的性能,同时可缩短开发时间。

2. 例子

这里用npp做一个图片resize

main.cpp

#include <stdlib.h>
#include <stdio.h>
#include <opencv/cv.h>
#include <opencv/highgui.h>
#include <opencv2/opencv.hpp>

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include "npp.h"

using namespace cv;

int main()
{
Mat image = imread("../lena_1.jpg");
imshow("src",image);

int outwidth = 1024;
int outheight = 720;
int outsize = outwidth * outheight * sizeof(uchar3);

int inwidth = image.cols;
int inheight = image.rows;
int memSize = inwidth * inheight * sizeof(uchar3);

NppiSize srcsize = {inwidth, inheight};
NppiRect srcroi = {0, 0, inwidth, inheight};
NppiSize dstsize = {outwidth, outheight};
NppiRect dstroi = {0, 0, outwidth, outheight};

uchar3* d_src = NULL;
uchar3* d_dst = NULL;
cudaMalloc((void**)&d_src, memSize);
cudaMalloc((void**)&d_dst, outsize);
cudaMemcpy(d_src, image.data, memSize, cudaMemcpyHostToDevice);

// nvidia npp 图像处理
nppiResize_8u_C3R( (Npp8u*)d_src, inwidth * 3, srcsize, srcroi,
(Npp8u*)d_dst, outwidth * 3, dstsize, dstroi,
NPPI_INTER_LINEAR );

Mat newimage(outheight, outwidth, CV_8UC3);

cudaMemcpy(newimage.data, d_dst, outsize, cudaMemcpyDeviceToHost);
imshow("gpu", newimage);
waitKey(0);

cudaFree(d_src);
cudaFree(d_dst);
return 0;
}


cmakelists.txt

cmake_minimum_required(VERSION 2.8.0)
project(demo)

set(CMAKE_BUILD_TYPE Debug)
# OPENCV
find_package(OpenCV REQUIRED)

include_directories(${OpenCV_INCLUDE_DIRS})

find_package(CUDA REQUIRED)

CUDA_ADD_EXECUTABLE(demo main.cpp)
target_link_libraries(demo ${OpenCV_LIBS}
libnppig.so # resize 函数
)


3. 结果

【nvidia npp】——图像resize_#include