最近也在学习深度学习,第一步就是把卷积运算搞定了,用C代码实现多通道多输出的卷积运算非常有助于理解卷积原理,然后算法就可以跨平台运行啦。我是从嵌入式转到做深度学习的,现在的工作内容也是把别人做好的算法移植到嵌入式平台上。现在AI芯片可以直接跑caffe网络了,省去了一点一点移植caffe网络啦。自己写一遍代码对Blob数据存取也非常有帮助。

     1、用c code实现8通道,16输出的卷积运算

#define FRAME_WIDTH 95
 #define FRAME_HIGHT 53
 #define FRAME_DEPTH 8
 #define FRAME_WIDTH_PADD (FRAME_WIDTH+2)
 #define FRAME_HIGHT_PADD (FRAME_HIGHT+2)
 #define IN_STEP (FRAME_WIDTH*FRAME_DEPTH)

 #define OUT_FRAME_WIDTH FRAME_WIDTH
 #define OUT_FRAME_HIGHT FRAME_HIGHT
 #define OUT_FRAME_DEPTH 16

 #define KERNEL_WIDTH 3
 #define KERNEL_HIGHT 3    首先初始化参数,我源代码做的是卷积核为3X3,8通道-16输出的卷积运算,你可以改成任意的参数。因为网上已经有很多入门教程了,一些基本的知识我就不再重复了。例如输出的尺寸是怎么确定的、卷积运算过程定义、还有多通道卷积的运算。
void init(void)
 {
 // Initializ input, weight, bias, bnA, bnB with numbers betwen -1.0 to +1.0
 // output is set to 0.0
 srand(0);
 for (int i = 0; i <OUT_FRAME_WIDTH*OUT_FRAME_HIGHT*OUT_FRAME_DEPTH; i++)
 {
 output[i] = 0.0f; // 输出先初始化为0
 }
 for (int i = 0; i < FRAME_WIDTH_PADD*FRAME_HIGHT_PADD*FRAME_DEPTH; i++)
 {
 input[i] = ((float)(rand() % 20000) / 10000.0f) - 1.0f; // 对于输入你可以初始化为随机数,也可以初始化为特定的0-1 矩阵
 //if(i%5)
 //{
 //    input[i] = 1;
 //}
 //else
 //{
 //   input[i] = 2;
 //}
 }
 //对于权重的初始化也一样
 for (int i = 0; i < KERNEL_WIDTH*KERNEL_HIGHT*FRAME_DEPTH*OUT_FRAME_DEPTH; i++)
 {
 weight[i] = ((float)(rand() % 20000) / 10000.0f) - 1.0f;
 }
 //bias 的初始化
 for (int i = 0; i < OUT_FRAME_DEPTH; i++)
 {
 bias[i] = 0;//((float)(rand() % 20000) / 10000.0f);// - 1.0f;
 bnA[i]  = ((float)(rand() % 20000) / 10000.0f) - 1.0f;
 bnB[i]  = ((float)(rand() % 20000) / 10000.0f) - 1.0f;
 }
 return;}
//caffe的运算就是矩阵的乘加运算,过程还是比较好理解的,就是取数的时候需要注意一点,因为是从一维数组中取数注意一下下标,其实是和多维数组的原理是一样的,PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8这个参数是没有用到的,之前调试用的
int conv2D_BN_LK_padd_execute_float32_to_float32(void)
 {
 float* p_input = input;// (float*)this->_inputData[0].data;
 float* p_output = output;// (float*)this->_outputData[0].data +
 //this->_outputData[0].depth * ((this->_outputData[0].step * this->_outputData[0].padd_top) +
 //(this->_outputData[0].padd_left));
 int padd_validX = 0;//this->_padd_type == 1 ? this->_kernel_sizeX - 1 : 0;
 int padd_validY = 0;//this->_padd_type == 1 ? this->_kernel_sizeY - 1 : 0;
 float* p_weights;
 int strideY = 1;//this->_strideY;
 int strideX = 1;//this->_strideX;
 int depth_offset = 1;
 int depth_offset_factor = FRAME_DEPTH;// this->_inputData[0].depth;
 p_weights = weight;
 float* p_output_line = p_output;// -->output
 for (int n = 0; n < OUT_FRAME_DEPTH /*this->_outputData[0].depth*/; n ++)//PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8)
 {
 float conv2D_output[1];//[PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8];
 float output[PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8];
 for (int l = 0; l < OUT_FRAME_HIGHT; l = l + strideY)
 {
 //float* p_output_line = p_output;// -->output
 p_output += OUT_STEP;// this->_outputData[0].depth * this->_outputData[0].step;
 for (int m = 0; m < OUT_FRAME_WIDTH; m = m + strideX)
 {
 //p_weights = weight;// (float*)this->_p_weights;

                 for (int ind = 0; ind < PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8; ind++)
             {
             //conv2D_output[ind] = _isBias ? this->_p_bias[ind + n] : 0;
             conv2D_output[ind] = (IS_BIAS==1) ? bias[ind + n] : 0;
             }
                 for (int k = 0; k < FRAME_DEPTH/*this->_inputData[0].depth*/; k++)
 {
      for (int i = 0; i < KERNEL_HIGHT/*this->_kernel_sizeY*/; i++)
     {
     for (int j = 0; j < KERNEL_WIDTH/*this->_kernel_sizeX*/; j++)
     {


     //for (int k = 0; k < FRAME_DEPTH/*this->_inputData[0].depth*/; k++)
     {
     // Do PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8 filters at a time
     // load the bits of the input, they should be the same for all the filters;                       FRAME_DEPTH
     float* p_inputVal = &p_input[(((k) * FRAME_HIGHT_PADD/*this->_inputData[0].step*/ + (l + i)) * FRAME_WIDTH_PADD) + (m +j * depth_offset)];
     float inPix = (float)*p_inputVal;
     {
     conv2D_output[0] += p_weights[((n * FRAME_DEPTH + k) * KERNEL_HIGHT + i ) * KERNEL_WIDTH + j]  *  inPix;//(*p_weights) * inPix;
     }
     }
     }
     }
 // Do BN, Leaky and poolMax
 #if 0
 for (int ind = 0; ind < PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8; ind++)
 {
 // BatchNorm
 float val = (float)(conv2D_output[ind]);
 float new_val = ((float)(val)) * bnA/*this->_p_bnA*/[n + ind] + bnB/*this->_p_bnB*/[n + ind];
 // Leaky
 float leaky = YI_MAX((float)(0.1) * new_val, new_val);
 // poolMax
 output[ind] = (float)leaky;
 }
 #endif
 }
 for (int ind = 0; ind < PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8; ind++)
 {
 p_output_line[ind] = conv2D_output[ind];//output[ind];//;
 //DEBUG_UPDATE_BW_OUTPUT(4)
 }
 p_output_line += PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8;
 }
 }
 }
 return 0;}
一个辅助性的函数,把数组存到txt文件,好久没用过C++编程了,都很陌生了
int OneToThreeFile(char * filename,int h,int w,int d,float *data)
 {
 // Write the output to txt file. 
 ofstream myfile;
 myfile.open(filename);
 if (!myfile.is_open())
 return 0;
 #if 1   // 上下两个的不同点就是数据的排列方式不同,根据caffe中数据存储方式是按行进行存储的
      for (int k = 0; k < d; k++)
      {
         for (int i = 0; i < h; i++)
     {
     for (int j = 0; j < w; j++)
     {
     myfile << data[(k * h + i) * w + j] << " ";
     }
     myfile << endl;
     }
     myfile << endl;
      }
 #else     
      {
         for (int i = 0; i < h; i++)
     {
     for (int j = 0; j < w; j++)
     {
       for (int k = 0; k < d; k++)
       {
                  myfile << data[(i * w + j) * d + k] << " ";
       }
     }
     myfile << endl;
     }
     myfile << endl;
      }
 #endif
 myfile.close();
 return 1;}
//把weight保存成16个文件,因为caffe中对于权重的加载时按输出进行的,16个输出就有16个权重矩阵
int OneToFourFile(int h,int w,int d,int num,float *data)
 {
 // Write the output to txt file. 
         ofstream myfile;
          char fname[30] ;
 //myfile.open(filename);
 //if (!myfile.is_open())
 // return 0;
     for(int n = 0 ; n < num; n++)
     {
         sprintf(fname,"0629/weight/%s%d.txt","randweight-",n+1);
         myfile.open(fname);
     if (!myfile.is_open())
     return 0;
 for (int k = 0; k < d; k++)
          {
             for (int i = 0; i < h; i++)
         {
         for (int j = 0; j < w; j++)
         {
         myfile << data[((n * d +k) * h + i) * w + j] << " ";
         }
         myfile << endl;
         }
         myfile << endl;
          }
     myfile.close();    
     }
 return 1;
 }int _tmain(int argc, _TCHAR* argv[])
 {
     init();
 if(!OneToThreeFile("0629/16randinput0629-1.txt",FRAME_HIGHT_PADD,FRAME_WIDTH_PADD,FRAME_DEPTH,input))
 cout << "Cannot open file for input writing" << endl;
 // 把输入保存成文件,方便用Python读取给caffe网络验证// 同样把weight保存 把输出保存方便对比结果 结果是一个16
if(!OneToFourFile(KERNEL_WIDTH,KERNEL_HIGHT,FRAME_DEPTH,OUT_FRAME_DEPTH,weight))
 cout << "Cannot open file for weight writing" << endl;
 conv2D_BN_LK_padd_execute_float32_to_float32();
 if(!OneToThreeFile("0629/16randoutput0629-c4.txt",OUT_FRAME_HIGHT,OUT_FRAME_WIDTH,OUT_FRAME_DEPTH,output))
 cout << "Cannot open file for output writing" << endl;
 return 0;}

2 用Python调用caffe网络进行结果验证

caffe的环境安装请自行百度吧,在ubantu下编译caffe还挺方便的,下边直接上代码了。一些基本知识例如修改caffemodel参数、caffe网络的参数介绍,现在做caffe的已经很多 了。只要自己喜欢做就能学到。

import numpy as np
 import matplotlib.pyplot as plt
 #%matplotlib inline
 import matplotlib
 # Make sure that caffe is on the python path:
 caffe_root = '/home/caffe-master'  # this file is expected to be in {caffe_root}/examples
 import sys
 import os
 os.chdir(caffe_root)
 sys.path.insert(0, caffe_root + 'python')import caffe
# configure plotting
 plt.rcParams['figure.figsize'] = (10, 10)
 plt.rcParams['image.interpolation'] = 'nearest'
 plt.rcParams['image.cmap'] = 'gray'# Load the net, list its data and params, and filter an example image.
 caffe.set_mode_cpu()
 net = caffe.Net('Net_surgery_result/conv_test.prototxt', caffe.TEST)
 print("blobs {}\nparams {}".format(net.blobs.keys(), net.params.keys()))# load image and prepare as a single input batch for Caffe
 #plt.figure(1)
 #im = np.array(caffe.io.load_image('images/cat_gray.jpg', color=False)).squeeze()
 #print 'im shape{} '.format(im.shape)#360*480
 #plt.title("original image")
 #plt.imshow(im)
 #plt.axis('off')
 #plt.show()
 im = np.loadtxt('Net_surgery_result/16randinput0629-1.txt')
 print 'im_input1 shape{} '.format(im.shape)#1*1*360*480
 im_3d = np.array(im).reshape(8,55,97)
 print 'im_input2 shape{} '.format(im_3d.shape)#1*1*360*480
 im_input = im_3d[np.newaxis, :]
 #im_input = im[np.newaxis,np.newaxis,:, :]
 print 'im_input3 shape{} '.format(im_input.shape)#1*1*360*480
 #np.savetxt("Net_surgery_result/input.txt",im_input[0][0])
 net.blobs['data'].reshape(*im_input.shape)
 net.blobs['data'].data[...] = im_inputweight = np.loadtxt('Net_surgery_result/weight/randweight-1.txt')
 weight = np.array(weight).reshape(8,3,3)
 net.params['conv'][0].data[0] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-2.txt')
 weight = np.array(weight).reshape(8,3,3)
 net.params['conv'][0].data[1] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-3.txt')
 weight = np.array(weight).reshape(8,3,3)
 net.params['conv'][0].data[2] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-4.txt')
 weight = np.array(weight).reshape(8,3,3)
 net.params['conv'][0].data[3] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-5.txt')
 weight = np.array(weight).reshape(8,3,3)
 net.params['conv'][0].data[4] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-6.txt')
 weight = np.array(weight).reshape(8,3,3)
 net.params['conv'][0].data[5] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-7.txt')
 weight = np.array(weight).reshape(8,3,3)
 net.params['conv'][0].data[6] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-8.txt')
 weight = np.array(weight).reshape(8,3,3)
 net.params['conv'][0].data[7] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-9.txt')
 weight = np.array(weight).reshape(8,3,3)
 net.params['conv'][0].data[8] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-10.txt')
 weight = np.array(weight).reshape(8,3,3)
 net.params['conv'][0].data[9] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-11.txt')
 weight = np.array(weight).reshape(8,3,3)
 net.params['conv'][0].data[10] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-12.txt')
 weight = np.array(weight).reshape(8,3,3)
 net.params['conv'][0].data[11] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-13.txt')
 weight = np.array(weight).reshape(8,3,3)
 net.params['conv'][0].data[12] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-14.txt')
 weight = np.array(weight).reshape(8,3,3)
 net.params['conv'][0].data[13] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-15.txt')
 weight = np.array(weight).reshape(8,3,3)
 net.params['conv'][0].data[14] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-16.txt')
 weight = np.array(weight).reshape(8,3,3)
 net.params['conv'][0].data[15] = weight #bias = np.array((1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),dtype=np.float32).reshape((1,16))
 net.params['conv'][1].data[0:] = 0
 print("load net done......")
 net.forward()
 np.savetxt('Net_surgery_result/output/output0628-1.txt',net.blobs['conv'].data[0,0])
 np.savetxt('Net_surgery_result/output/output0628-2.txt',net.blobs['conv'].data[0,1])
 np.savetxt('Net_surgery_result/output/output0628-3.txt',net.blobs['conv'].data[0,2])
 np.savetxt('Net_surgery_result/output/output0628-4.txt',net.blobs['conv'].data[0,3])
 np.savetxt('Net_surgery_result/output/output0628-5.txt',net.blobs['conv'].data[0,4])
 np.savetxt('Net_surgery_result/output/output0628-6.txt',net.blobs['conv'].data[0,5])
 np.savetxt('Net_surgery_result/output/output0628-7.txt',net.blobs['conv'].data[0,6])
 np.savetxt('Net_surgery_result/output/output0628-8.txt',net.blobs['conv'].data[0,7])
 np.savetxt('Net_surgery_result/output/output0628-9.txt',net.blobs['conv'].data[0,8])
 np.savetxt('Net_surgery_result/output/output0628-10.txt',net.blobs['conv'].data[0,9])
 np.savetxt('Net_surgery_result/output/output0628-11.txt',net.blobs['conv'].data[0,10])
 np.savetxt('Net_surgery_result/output/output0628-12.txt',net.blobs['conv'].data[0,11])
 np.savetxt('Net_surgery_result/output/output0628-13.txt',net.blobs['conv'].data[0,12])
 np.savetxt('Net_surgery_result/output/output0628-14.txt',net.blobs['conv'].data[0,13])
 np.savetxt('Net_surgery_result/output/output0628-15.txt',net.blobs['conv'].data[0,14])
 np.savetxt('Net_surgery_result/output/output0628-16.txt',net.blobs['conv'].data[0,15])
 # helper show filter outputs
 def show_filters(net):
     net.forward()
     print 'net.forward'
     plt.figure()
     filt_min, filt_max = net.blobs['conv'].data.min(), net.blobs['conv'].data.max()
     print filt_max
     print filt_min
     for i in range(3):
         plt.subplot(1,4,i+2)
         plt.title("filter #{} output".format(i))
         plt.imshow(net.blobs['conv'].data[0, i])#, vmin=filt_min, vmax=filt_max)
         #S = '\n'.join(str(num)[1:-1] for num in net.blobs['conv'].data[0, i])
         #open(r'output.txt','w').write(S)
         #np.savetxt('001',net.blobs['conv'].data[0, i])
         #np.savetxt("filter#{}output.txt".format(i),net.blobs['conv'].data[0, i])
         plt.tight_layout()
         plt.axis('off')
         #plt.show()
 # filter the image with initial 
 #show_filters(net)