摘要:

本文主要关注opencv CPU版和cuda版的模板匹配算法,网上cuda版的资料不多,这里做个记录,以后用到也好有个参考。


文章目录

  • 摘要:
  • 1. opencv cuda版配置
  • 2. 源码
  • 3. 结果
  • 4. 总结
  • 5. 2023年1月13日补充测试


1. opencv cuda版配置

opencv cuda版需要自己用cmake编译,编译过程并不复杂,cmake编译成vs的项目,然后用vs编译成opencv_worldXXX.dll.编译过程可参考link1,link2

  • 编译后的opencv 目录
  • vs项目配置,包含目录,库目录,连接器


    注:这里的opencv_world341.lib对应的opencv_world341.dll有两种办法加入到项目中,一是:在系统环境变量中添加它的路径,二是:把它直接复制到项目.exe所在路径。如果有第三种方法,比如在vs项目的什么地方添加一下什么路径就行的话,还请告知,因为我总觉得这两者都不怎么高效,特别是需要移植到不同计算机上时。

2. 源码

  • 包含cpu、gpu版的模板匹配算法demo示例,还有一个速度对比测试。
# include <opencv.hpp>
# include <iostream>
# include <time.h>

int gpuTemplateMatch(const cv::Mat &srcImage, const cv::Mat &tempImage, cv::Mat &result,
	double &matchVal, cv::Point &matchLoc, int mode)
{
	if (srcImage.empty() || tempImage.empty())
	{
		std::cout << "ERROR:In function gpuTemplateMatch: input image is empty! \n";
		return -1;
	}
	cv::cuda::GpuMat d_result;
	cv::cuda::GpuMat d_srcImage;
	cv::cuda::GpuMat d_tempImage;
	d_srcImage.upload(srcImage);
	d_tempImage.upload(tempImage);

	cv::Ptr<cv::cuda::TemplateMatching> alg ;
	switch (mode)
	{
	case 0:
		//R = sum (t-Roi)^2
		alg = cv::cuda::createTemplateMatching(srcImage.type(), cv::TM_SQDIFF);
		alg->match(d_srcImage, d_tempImage, d_result);
		d_result.download(result);
		cv::minMaxLoc(result, &matchVal, NULL, &matchLoc, NULL);
		break;
	case 1:
		//R = sum (t-Roi)^2/(sqrt(sum t^2   *  sum Roi^2))
		alg = cv::cuda::createTemplateMatching(srcImage.type(), cv::TM_SQDIFF_NORMED);
		alg->match(d_srcImage, d_tempImage, d_result);
		d_result.download(result);
		cv::minMaxLoc(result, &matchVal, NULL, &matchLoc, NULL);
		break;
	case 2:
		//R = sum t*Roi
		alg = cv::cuda::createTemplateMatching(srcImage.type(), cv::TM_CCORR);
		alg->match(d_srcImage, d_tempImage, d_result);
		d_result.download(result);
		cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
		break;
	case 3:
		//R = sum t*Roi / (sqrt(sum t^2   *  sum Roi^2))
		alg = cv::cuda::createTemplateMatching(srcImage.type(), cv::TM_CCORR_NORMED);
		alg->match(d_srcImage, d_tempImage, d_result);
		d_result.download(result);
		cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
		break;
	case 4:
		//R = sum t1*Roi1
		//t1 = t - t_mean
		//Roi1 = Roi - Roi_mean
		alg = cv::cuda::createTemplateMatching(srcImage.type(), cv::TM_CCOEFF);
		alg->match(d_srcImage, d_tempImage, d_result);
		d_result.download(result);
		cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
		break;
	case 5:
		//R = sum t1*Roi1 / (sqrt(sum t1^2   *  sum Roi1^2))
		//t1 = t - t_mean
		//Roi1 = Roi - Roi_mean
		alg = cv::cuda::createTemplateMatching(srcImage.type(), cv::TM_CCOEFF_NORMED);
		alg->match(d_srcImage, d_tempImage, d_result);
		d_result.download(result);
		cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
		break;
	default:
		//cv::matchTemplate(srcImage, tempImage, result, cv::TM_CCOEFF_NORMED);
		alg = cv::cuda::createTemplateMatching(srcImage.type(), cv::TM_CCOEFF_NORMED);
		alg->match(d_srcImage, d_tempImage, d_result);
		d_result.download(result);
		cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
		break;
	}

	return 0;
}


int cpuTemplateMatch(const cv::Mat &srcImage, const cv::Mat &tempImage, cv::Mat &result,
	double &matchVal, cv::Point &matchLoc, int mode)
{
	//https://docs.opencv.org/3.4.12/de/da9/tutorial_template_matching.html
	if (srcImage.empty() || tempImage.empty())
	{
		std::cout << "ERROR:In function cpuTemplateMatch: input image is empty! \n";
		return -1;
	}

	//cv::Mat result;

	int result_w = srcImage.cols - tempImage.cols;
	int result_h = srcImage.rows - tempImage.rows;
	if (result_w < 0 || result_h < 0)
	{
		std::cout << "ERROR:in function opencvTemplateMatch: roi image's size should be larger than tamplate's \n";
		return -1;
	}
	//result.create(result_h, result_w, CV_32FC1);
	switch (mode)
	{
	case 0:
		//R = sum (t-Roi)^2
		cv::matchTemplate(srcImage, tempImage, result, cv::TM_SQDIFF);
		cv::minMaxLoc(result, &matchVal, NULL, &matchLoc, NULL);
		break;
	case 1:
		//R = sum (t-Roi)^2/(sqrt(sum t^2   *  sum Roi^2))
		cv::matchTemplate(srcImage, tempImage, result, cv::TM_SQDIFF_NORMED);
		cv::minMaxLoc(result, &matchVal, NULL, &matchLoc, NULL);
		break;
	case 2:
		//R = sum t*Roi
		cv::matchTemplate(srcImage, tempImage, result, cv::TM_CCORR);
		cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
		break;
	case 3:
		//R = sum t*Roi / (sqrt(sum t^2   *  sum Roi^2))
		cv::matchTemplate(srcImage, tempImage, result, cv::TM_CCORR_NORMED);
		cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
		break;
	case 4:
		//R = sum t1*Roi1
		//t1 = t - t_mean
		//Roi1 = Roi - Roi_mean
		cv::matchTemplate(srcImage, tempImage, result, cv::TM_CCOEFF);
		cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
		break;
	case 5:
		//R = sum t1*Roi1 / (sqrt(sum t1^2   *  sum Roi1^2))
		//t1 = t - t_mean
		//Roi1 = Roi - Roi_mean
		cv::matchTemplate(srcImage, tempImage, result, cv::TM_CCOEFF_NORMED);
		cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
		break;
	default:
		cv::matchTemplate(srcImage, tempImage, result, cv::TM_CCOEFF_NORMED);
		cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
		break;
	}
	
	return 0;
}


int speedTest()
{
	std::map<int, std::string> matchMode =
	{
		{0,"TM_SQDIFF"},
		{1,"TM_SQDIFF_NORMED"},
		{2,"TM_CCORR"},
		{3,"TM_CCORR_NORMED"},
		{4,"TM_CCOEFF"},
		{5,"cv::TM_CCOEFF_NORMED"}
	};
	/*std::string srcPath = "K:\\imageData\\totalBoard\\image2\\00000103_1-1.png";
	std::string tempPath = "K:\\imageData\\totalBoard\\image2\\00000103_1-2.png";
	cv::Mat srcImage = cv::imread(srcPath, 0);
	cv::Mat tempImage = cv::imread(tempPath, 0);*/

	cv::Mat srcImage = cv::Mat::zeros(cv::Size(200,200),CV_8UC1);
	cv::Mat tempImage = cv::Mat::ones(cv::Size(20, 20), CV_8UC1);
	clock_t start, end;
	double matchVal;
	cv::Point matchLoc;
	cv::Mat result;
	cv::cuda::GpuMat d_srcImage, d_tempImage, d_result;
	cv::Ptr<cv::cuda::TemplateMatching> alg;
	int TIMES = 10;

	for (int mode = 0; mode < 6; mode++)
	{
		for (int size = 100; size < 1000; size += 100)
		{
			//resize the image 
			cv::resize(srcImage, srcImage, cv::Size(2000 , 2000 ));
			cv::resize(tempImage, tempImage, cv::Size(size, size));
			cv::randu(srcImage,cv::Scalar(0),cv::Scalar(255));
			cv::randu(tempImage,cv::Scalar(0),cv::Scalar(255));
			d_srcImage.upload(srcImage);
			d_tempImage.upload(tempImage);
			//gpu match
			start = clock();
			for (int times = 0; times < TIMES; times++)
			{
				gpuTemplateMatch(srcImage, tempImage, result, matchVal, matchLoc, mode);
			}
		   end = clock();
			auto runtime_gpu = (end - start) / TIMES * 1000 / CLOCKS_PER_SEC;

			//cpu match
			start = clock();
			for (int times = 0; times < TIMES; times++)
			{
				cpuTemplateMatch(srcImage, tempImage, result, matchVal, matchLoc, mode);
			}
			end = clock();
			auto runtime_cpu = (end - start) / TIMES * 1000 / CLOCKS_PER_SEC;

			//gpu compute only
			start = clock();
			for (int times = 0; times < TIMES; times++)
			{
				alg = cv::cuda::createTemplateMatching(srcImage.type(), mode);//
				alg->match(d_srcImage, d_tempImage, d_result);
			}
			end = clock();
			auto runtime_gpuComputing = (end - start) / TIMES * 1000 / CLOCKS_PER_SEC;
			
			printf("[+++++++++++++++++++++++++++++++++++++++++++++]\n");
			printf("srcSize=[%d,%d], tempSize=[%d,%d]\n", srcImage.rows, srcImage.cols, tempImage.rows, tempImage.cols);
			printf("match mode:%s\n", matchMode[mode].c_str());
			printf("gpu total runtime:%d ms\n", runtime_gpu);
			printf("cpu total runtime:%d ms\n",runtime_cpu);
			printf("cpuT / gpuT :%3f\n", double(runtime_cpu) / double(runtime_gpu));
			printf("gpu compute time:%dms\n", runtime_gpuComputing);
		}
	}
	return 0;
}


int gpuTemplateMatchDemo()
{
	std::string srcPath = "K:\\imageData\\totalBoard\\image2\\00000103_1-1.png";
	std::string tempPath = "K:\\imageData\\totalBoard\\image2\\00000103_1-2.png";
	cv::Mat srcImage = cv::imread(srcPath, 0);
	cv::Mat tempImage = cv::imread(tempPath, 0);

	//match
	double matchVal;
	cv::Point matchLoc;
	cv::Mat result;
	int mode = 3;
	gpuTemplateMatch(srcImage, tempImage, result, matchVal, matchLoc, mode);

	//show result
	std::cout << "matchVal = " << matchVal << std::endl;
	cv::Point topLeft = matchLoc;
	cv::Point bottomRight = cv::Point(topLeft.x + tempImage.cols, topLeft.y + tempImage.rows);
	cv::Mat drawImage = cv::imread(srcPath);
	cv::rectangle(drawImage, cv::Rect(topLeft, bottomRight), cv::Scalar(0, 255, 0),2);
	cv::imshow("srcImage", srcImage);
	cv::imshow("tempImage", tempImage);
	cv::imshow("drawImage", drawImage);
	//show results
	cv::normalize(result, result, 0, 1, cv::NORM_MINMAX);
	cv::imshow("result", result);
	cv::waitKey(0);
	cv::destroyAllWindows();

	return 0;
}


int cpuTemplateMatchDemo()
{
	//prepare image and template
	std::string srcPath = "K:\\imageData\\totalBoard\\image2\\00000103_1-1.png";
	std::string tempPath = "K:\\imageData\\totalBoard\\image2\\00000103_1-2.png";
	cv::Mat srcImage = cv::imread(srcPath, 0);
	cv::Mat tempImage = cv::imread(tempPath, 0);

	//match
	double matchVal;
	cv::Point matchLoc;
	cv::Mat result;
	int mode = 1;
	cpuTemplateMatch(srcImage, tempImage, result, matchVal, matchLoc, mode);

	//show result
	std::cout << "matchVal = " << matchVal << std::endl;
	cv::Point topLeft = matchLoc;
	cv::Point bottomRight = cv::Point(topLeft.x + tempImage.cols, topLeft.y + tempImage.rows);
	cv::Mat drawImage = cv::imread(srcPath);
	cv::rectangle(drawImage, cv::Rect(topLeft, bottomRight), cv::Scalar(0, 255, 0),2); 
	cv::imshow("srcImage", srcImage);
	cv::imshow("tempImage", tempImage);
	cv::imshow("drawImage", drawImage);
	//show results
	cv::normalize(result, result, 0, 1, cv::NORM_MINMAX);
	cv::imshow("result", result);
	cv::waitKey(0);
	cv::destroyAllWindows();

	return 0;
}


int main()
{
	gpuTemplateMatchDemo();
	cpuTemplateMatchDemo();
	speedTest();
	return 0;
}

3. 结果

  • demo
  • 速度对比(部分结果)
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[100,100]
match mode:TM_SQDIFF_NORMED
gpu total runtime:103 ms
cpu total runtime:106 ms
cpuT / gpuT :1.029126
gpu compute time:91ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[200,200]
match mode:TM_SQDIFF_NORMED
gpu total runtime:103 ms
cpu total runtime:95 ms
cpuT / gpuT :0.922330
gpu compute time:90ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[300,300]
match mode:TM_SQDIFF_NORMED
gpu total runtime:101 ms
cpu total runtime:99 ms
cpuT / gpuT :0.980198
gpu compute time:89ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[400,400]
match mode:TM_SQDIFF_NORMED
gpu total runtime:101 ms
cpu total runtime:97 ms
cpuT / gpuT :0.960396
gpu compute time:90ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[500,500]
match mode:TM_SQDIFF_NORMED
gpu total runtime:100 ms
cpu total runtime:94 ms
cpuT / gpuT :0.940000
gpu compute time:90ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[600,600]
match mode:TM_SQDIFF_NORMED
gpu total runtime:111 ms
cpu total runtime:91 ms
cpuT / gpuT :0.819820
gpu compute time:102ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[700,700]
match mode:TM_SQDIFF_NORMED
gpu total runtime:111 ms
cpu total runtime:91 ms
cpuT / gpuT :0.819820
gpu compute time:102ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[800,800]
match mode:TM_SQDIFF_NORMED
gpu total runtime:110 ms
cpu total runtime:89 ms
cpuT / gpuT :0.809091
gpu compute time:102ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[900,900]
match mode:TM_SQDIFF_NORMED
gpu total runtime:108 ms
cpu total runtime:85 ms
cpuT / gpuT :0.787037
gpu compute time:101ms
  • 速度对比测试时的GPU状态

4. 总结

GPU加速模板匹配看起来效果并不是很好,测试了不同大小的图片有的情况速度会超过CPU,本来觉得应该会有几倍的加速效果,但是其实并没有,大多数情况下反而是变慢了。开始觉得是cpu向gpu传图的过程耗时较多,后面去掉传图的过程只看匹配过程,它的计算就是比cpu的慢,不知道是不是因为这块GPU太低端了。

5. 2023年1月13日补充测试

2023年1月13日补充测试

  • 平台

opencv 4.4.0
cuda 11.1
GPU RTX 3090
CPU i9 10900x @3.7GHZ

  • 结论

在用RTX 3090测试opencv模板匹配的时候,对于比较大的图片搜索图2000 * 2000,模板300 * 300~500 * 500之间时GPU版的模板匹配的速度是CPU版的5倍左右,但是在搜索图为200 * 200,模板在20 * 20~60 * 60的情况下,CPU版的速度是GPU版的2倍左右。也就是说当搜索图和模板图都较小的情况下想用GPU加速是没用的

[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[100,100]
match mode:TM_SQDIFF
gpu total runtime:20 ms
cpu total runtime:129 ms
cpuT / gpuT :6.450000
gpu compute time:8ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[300,300]
match mode:TM_SQDIFF
gpu total runtime:20 ms
cpu total runtime:255 ms
cpuT / gpuT :12.750000
gpu compute time:15ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[500,500]
match mode:TM_SQDIFF
gpu total runtime:20 ms
cpu total runtime:150 ms
cpuT / gpuT :7.500000
gpu compute time:10ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[700,700]
match mode:TM_SQDIFF
gpu total runtime:25 ms
cpu total runtime:147 ms
cpuT / gpuT :5.880000
gpu compute time:11ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[900,900]
match mode:TM_SQDIFF
gpu total runtime:24 ms
cpu total runtime:150 ms
cpuT / gpuT :6.250000
gpu compute time:8ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[100,100]
match mode:TM_SQDIFF_NORMED
gpu total runtime:21 ms
cpu total runtime:141 ms
cpuT / gpuT :6.714286
gpu compute time:21ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[300,300]
match mode:TM_SQDIFF_NORMED
gpu total runtime:34 ms
cpu total runtime:272 ms
cpuT / gpuT :8.000000
gpu compute time:27ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[500,500]
match mode:TM_SQDIFF_NORMED
gpu total runtime:18 ms
cpu total runtime:163 ms
cpuT / gpuT :9.055556
gpu compute time:11ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[700,700]
match mode:TM_SQDIFF_NORMED
gpu total runtime:25 ms
cpu total runtime:159 ms
cpuT / gpuT :6.360000
gpu compute time:21ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[900,900]
match mode:TM_SQDIFF_NORMED
gpu total runtime:33 ms
cpu total runtime:149 ms
cpuT / gpuT :4.515152
gpu compute time:10ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[100,100]
match mode:TM_CCORR
gpu total runtime:16 ms
cpu total runtime:88 ms
cpuT / gpuT :5.500000
gpu compute time:9ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[300,300]
match mode:TM_CCORR
gpu total runtime:19 ms
cpu total runtime:225 ms
cpuT / gpuT :11.842105
gpu compute time:10ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[500,500]
match mode:TM_CCORR
gpu total runtime:24 ms
cpu total runtime:125 ms
cpuT / gpuT :5.208333
gpu compute time:19ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[700,700]
match mode:TM_CCORR
gpu total runtime:18 ms
cpu total runtime:122 ms
cpuT / gpuT :6.777778
gpu compute time:3ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[900,900]
match mode:TM_CCORR
gpu total runtime:12 ms
cpu total runtime:126 ms
cpuT / gpuT :10.500000
gpu compute time:5ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[100,100]
match mode:TM_CCORR_NORMED
gpu total runtime:27 ms
cpu total runtime:139 ms
cpuT / gpuT :5.148148
gpu compute time:20ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[300,300]
match mode:TM_CCORR_NORMED
gpu total runtime:24 ms
cpu total runtime:264 ms
cpuT / gpuT :11.000000
gpu compute time:23ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[500,500]
match mode:TM_CCORR_NORMED
gpu total runtime:35 ms
cpu total runtime:161 ms
cpuT / gpuT :4.600000
gpu compute time:22ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[700,700]
match mode:TM_CCORR_NORMED
gpu total runtime:35 ms
cpu total runtime:154 ms
cpuT / gpuT :4.400000
gpu compute time:11ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[900,900]
match mode:TM_CCORR_NORMED
gpu total runtime:29 ms
cpu total runtime:152 ms
cpuT / gpuT :5.241379
gpu compute time:10ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[100,100]
match mode:TM_CCOEFF
gpu total runtime:20 ms
cpu total runtime:115 ms
cpuT / gpuT :5.750000
gpu compute time:16ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[300,300]
match mode:TM_CCOEFF
gpu total runtime:29 ms
cpu total runtime:244 ms
cpuT / gpuT :8.413793
gpu compute time:31ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[500,500]
match mode:TM_CCOEFF
gpu total runtime:32 ms
cpu total runtime:143 ms
cpuT / gpuT :4.468750
gpu compute time:17ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[700,700]
match mode:TM_CCOEFF
gpu total runtime:25 ms
cpu total runtime:138 ms
cpuT / gpuT :5.520000
gpu compute time:8ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[900,900]
match mode:TM_CCOEFF
gpu total runtime:20 ms
cpu total runtime:138 ms
cpuT / gpuT :6.900000
gpu compute time:8ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[100,100]
match mode:cv::TM_CCOEFF_NORMED
gpu total runtime:27 ms
cpu total runtime:143 ms
cpuT / gpuT :5.296296
gpu compute time:25ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[300,300]
match mode:cv::TM_CCOEFF_NORMED
gpu total runtime:29 ms
cpu total runtime:275 ms
cpuT / gpuT :9.482759
gpu compute time:43ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[500,500]
match mode:cv::TM_CCOEFF_NORMED
gpu total runtime:22 ms
cpu total runtime:166 ms
cpuT / gpuT :7.545455
gpu compute time:15ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[700,700]
match mode:cv::TM_CCOEFF_NORMED
gpu total runtime:30 ms
cpu total runtime:160 ms
cpuT / gpuT :5.333333
gpu compute time:27ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[900,900]
match mode:cv::TM_CCOEFF_NORMED
gpu total runtime:30 ms
cpu total runtime:155 ms
cpuT / gpuT :5.166667
gpu compute time:13ms

D:\myAPP\vs-proj\cudaTemplateMatch\x64\Release>D:\myAPP\vs-proj\cudaTemplateMatch\x64\Release\cudaTemplateMatch.exe
matchVal = 0.999751
matchVal = 0.00049773
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[10,10]
match mode:TM_SQDIFF
gpu total runtime:0 ms
cpu total runtime:3 ms
cpuT / gpuT :inf
gpu compute time:0ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[30,30]
match mode:TM_SQDIFF
gpu total runtime:5 ms
cpu total runtime:2 ms
cpuT / gpuT :0.400000
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[50,50]
match mode:TM_SQDIFF
gpu total runtime:3 ms
cpu total runtime:2 ms
cpuT / gpuT :0.666667
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[70,70]
match mode:TM_SQDIFF
gpu total runtime:4 ms
cpu total runtime:1 ms
cpuT / gpuT :0.250000
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[90,90]
match mode:TM_SQDIFF
gpu total runtime:4 ms
cpu total runtime:1 ms
cpuT / gpuT :0.250000
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[10,10]
match mode:TM_SQDIFF_NORMED
gpu total runtime:2 ms
cpu total runtime:3 ms
cpuT / gpuT :1.500000
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[30,30]
match mode:TM_SQDIFF_NORMED
gpu total runtime:4 ms
cpu total runtime:2 ms
cpuT / gpuT :0.500000
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[50,50]
match mode:TM_SQDIFF_NORMED
gpu total runtime:3 ms
cpu total runtime:2 ms
cpuT / gpuT :0.666667
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[70,70]
match mode:TM_SQDIFF_NORMED
gpu total runtime:4 ms
cpu total runtime:2 ms
cpuT / gpuT :0.500000
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[90,90]
match mode:TM_SQDIFF_NORMED
gpu total runtime:3 ms
cpu total runtime:2 ms
cpuT / gpuT :0.666667
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[10,10]
match mode:TM_CCORR
gpu total runtime:0 ms
cpu total runtime:2 ms
cpuT / gpuT :inf
gpu compute time:0ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[30,30]
match mode:TM_CCORR
gpu total runtime:2 ms
cpu total runtime:1 ms
cpuT / gpuT :0.500000
gpu compute time:1ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[50,50]
match mode:TM_CCORR
gpu total runtime:2 ms
cpu total runtime:1 ms
cpuT / gpuT :0.500000
gpu compute time:1ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[70,70]
match mode:TM_CCORR
gpu total runtime:2 ms
cpu total runtime:2 ms
cpuT / gpuT :1.000000
gpu compute time:1ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[90,90]
match mode:TM_CCORR
gpu total runtime:2 ms
cpu total runtime:1 ms
cpuT / gpuT :0.500000
gpu compute time:1ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[10,10]
match mode:TM_CCORR_NORMED
gpu total runtime:1 ms
cpu total runtime:2 ms
cpuT / gpuT :2.000000
gpu compute time:1ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[30,30]
match mode:TM_CCORR_NORMED
gpu total runtime:4 ms
cpu total runtime:2 ms
cpuT / gpuT :0.500000
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[50,50]
match mode:TM_CCORR_NORMED
gpu total runtime:4 ms
cpu total runtime:2 ms
cpuT / gpuT :0.500000
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[70,70]
match mode:TM_CCORR_NORMED
gpu total runtime:5 ms
cpu total runtime:2 ms
cpuT / gpuT :0.400000
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[90,90]
match mode:TM_CCORR_NORMED
gpu total runtime:4 ms
cpu total runtime:2 ms
cpuT / gpuT :0.500000
gpu compute time:3ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[10,10]
match mode:TM_CCOEFF
gpu total runtime:2 ms
cpu total runtime:3 ms
cpuT / gpuT :1.500000
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[30,30]
match mode:TM_CCOEFF
gpu total runtime:4 ms
cpu total runtime:1 ms
cpuT / gpuT :0.250000
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[50,50]
match mode:TM_CCOEFF
gpu total runtime:4 ms
cpu total runtime:2 ms
cpuT / gpuT :0.500000
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[70,70]
match mode:TM_CCOEFF
gpu total runtime:4 ms
cpu total runtime:2 ms
cpuT / gpuT :0.500000
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[90,90]
match mode:TM_CCOEFF
gpu total runtime:4 ms
cpu total runtime:1 ms
cpuT / gpuT :0.250000
gpu compute time:2ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[10,10]
match mode:cv::TM_CCOEFF_NORMED
gpu total runtime:3 ms
cpu total runtime:3 ms
cpuT / gpuT :1.000000
gpu compute time:5ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[30,30]
match mode:cv::TM_CCOEFF_NORMED
gpu total runtime:7 ms
cpu total runtime:2 ms
cpuT / gpuT :0.285714
gpu compute time:4ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[50,50]
match mode:cv::TM_CCOEFF_NORMED
gpu total runtime:5 ms
cpu total runtime:2 ms
cpuT / gpuT :0.400000
gpu compute time:4ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[70,70]
match mode:cv::TM_CCOEFF_NORMED
gpu total runtime:5 ms
cpu total runtime:2 ms
cpuT / gpuT :0.400000
gpu compute time:3ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[200,200], tempSize=[90,90]
match mode:cv::TM_CCOEFF_NORMED
gpu total runtime:5 ms
cpu total runtime:2 ms
cpuT / gpuT :0.400000
gpu compute time:4ms

D:\myAPP\vs-proj\cudaTemplateMatch\x64\Release>