最近也在学习深度学习,第一步就是把卷积运算搞定了,用C代码实现多通道多输出的卷积运算非常有助于理解卷积原理,然后算法就可以跨平台运行啦。我是从嵌入式转到做深度学习的,现在的工作内容也是把别人做好的算法移植到嵌入式平台上。现在AI芯片可以直接跑caffe网络了,省去了一点一点移植caffe网络啦。自己写一遍代码对Blob数据存取也非常有帮助。
1、用c code实现8通道,16输出的卷积运算
#define FRAME_WIDTH 95
#define FRAME_HIGHT 53
#define FRAME_DEPTH 8
#define FRAME_WIDTH_PADD (FRAME_WIDTH+2)
#define FRAME_HIGHT_PADD (FRAME_HIGHT+2)
#define IN_STEP (FRAME_WIDTH*FRAME_DEPTH)
#define OUT_FRAME_WIDTH FRAME_WIDTH
#define OUT_FRAME_HIGHT FRAME_HIGHT
#define OUT_FRAME_DEPTH 16
#define KERNEL_WIDTH 3
#define KERNEL_HIGHT 3 首先初始化参数,我源代码做的是卷积核为3X3,8通道-16输出的卷积运算,你可以改成任意的参数。因为网上已经有很多入门教程了,一些基本的知识我就不再重复了。例如输出的尺寸是怎么确定的、卷积运算过程定义、还有多通道卷积的运算。
void init(void)
{
// Initializ input, weight, bias, bnA, bnB with numbers betwen -1.0 to +1.0
// output is set to 0.0
srand(0);
for (int i = 0; i <OUT_FRAME_WIDTH*OUT_FRAME_HIGHT*OUT_FRAME_DEPTH; i++)
{
output[i] = 0.0f; // 输出先初始化为0
}
for (int i = 0; i < FRAME_WIDTH_PADD*FRAME_HIGHT_PADD*FRAME_DEPTH; i++)
{
input[i] = ((float)(rand() % 20000) / 10000.0f) - 1.0f; // 对于输入你可以初始化为随机数,也可以初始化为特定的0-1 矩阵
//if(i%5)
//{
// input[i] = 1;
//}
//else
//{
// input[i] = 2;
//}
}
//对于权重的初始化也一样
for (int i = 0; i < KERNEL_WIDTH*KERNEL_HIGHT*FRAME_DEPTH*OUT_FRAME_DEPTH; i++)
{
weight[i] = ((float)(rand() % 20000) / 10000.0f) - 1.0f;
}
//bias 的初始化
for (int i = 0; i < OUT_FRAME_DEPTH; i++)
{
bias[i] = 0;//((float)(rand() % 20000) / 10000.0f);// - 1.0f;
bnA[i] = ((float)(rand() % 20000) / 10000.0f) - 1.0f;
bnB[i] = ((float)(rand() % 20000) / 10000.0f) - 1.0f;
}
return;}
//caffe的运算就是矩阵的乘加运算,过程还是比较好理解的,就是取数的时候需要注意一点,因为是从一维数组中取数注意一下下标,其实是和多维数组的原理是一样的,PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8这个参数是没有用到的,之前调试用的
int conv2D_BN_LK_padd_execute_float32_to_float32(void)
{
float* p_input = input;// (float*)this->_inputData[0].data;
float* p_output = output;// (float*)this->_outputData[0].data +
//this->_outputData[0].depth * ((this->_outputData[0].step * this->_outputData[0].padd_top) +
//(this->_outputData[0].padd_left));
int padd_validX = 0;//this->_padd_type == 1 ? this->_kernel_sizeX - 1 : 0;
int padd_validY = 0;//this->_padd_type == 1 ? this->_kernel_sizeY - 1 : 0;
float* p_weights;
int strideY = 1;//this->_strideY;
int strideX = 1;//this->_strideX;
int depth_offset = 1;
int depth_offset_factor = FRAME_DEPTH;// this->_inputData[0].depth;
p_weights = weight;
float* p_output_line = p_output;// -->output
for (int n = 0; n < OUT_FRAME_DEPTH /*this->_outputData[0].depth*/; n ++)//PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8)
{
float conv2D_output[1];//[PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8];
float output[PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8];
for (int l = 0; l < OUT_FRAME_HIGHT; l = l + strideY)
{
//float* p_output_line = p_output;// -->output
p_output += OUT_STEP;// this->_outputData[0].depth * this->_outputData[0].step;
for (int m = 0; m < OUT_FRAME_WIDTH; m = m + strideX)
{
//p_weights = weight;// (float*)this->_p_weights;
for (int ind = 0; ind < PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8; ind++)
{
//conv2D_output[ind] = _isBias ? this->_p_bias[ind + n] : 0;
conv2D_output[ind] = (IS_BIAS==1) ? bias[ind + n] : 0;
}
for (int k = 0; k < FRAME_DEPTH/*this->_inputData[0].depth*/; k++)
{
for (int i = 0; i < KERNEL_HIGHT/*this->_kernel_sizeY*/; i++)
{
for (int j = 0; j < KERNEL_WIDTH/*this->_kernel_sizeX*/; j++)
{
//for (int k = 0; k < FRAME_DEPTH/*this->_inputData[0].depth*/; k++)
{
// Do PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8 filters at a time
// load the bits of the input, they should be the same for all the filters; FRAME_DEPTH
float* p_inputVal = &p_input[(((k) * FRAME_HIGHT_PADD/*this->_inputData[0].step*/ + (l + i)) * FRAME_WIDTH_PADD) + (m +j * depth_offset)];
float inPix = (float)*p_inputVal;
{
conv2D_output[0] += p_weights[((n * FRAME_DEPTH + k) * KERNEL_HIGHT + i ) * KERNEL_WIDTH + j] * inPix;//(*p_weights) * inPix;
}
}
}
}
// Do BN, Leaky and poolMax
#if 0
for (int ind = 0; ind < PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8; ind++)
{
// BatchNorm
float val = (float)(conv2D_output[ind]);
float new_val = ((float)(val)) * bnA/*this->_p_bnA*/[n + ind] + bnB/*this->_p_bnB*/[n + ind];
// Leaky
float leaky = YI_MAX((float)(0.1) * new_val, new_val);
// poolMax
output[ind] = (float)leaky;
}
#endif
}
for (int ind = 0; ind < PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8; ind++)
{
p_output_line[ind] = conv2D_output[ind];//output[ind];//;
//DEBUG_UPDATE_BW_OUTPUT(4)
}
p_output_line += PARALLEL_COMPUTATIONAL_NUMBER_EQUAL_8;
}
}
}
return 0;}
一个辅助性的函数,把数组存到txt文件,好久没用过C++编程了,都很陌生了
int OneToThreeFile(char * filename,int h,int w,int d,float *data)
{
// Write the output to txt file.
ofstream myfile;
myfile.open(filename);
if (!myfile.is_open())
return 0;
#if 1 // 上下两个的不同点就是数据的排列方式不同,根据caffe中数据存储方式是按行进行存储的
for (int k = 0; k < d; k++)
{
for (int i = 0; i < h; i++)
{
for (int j = 0; j < w; j++)
{
myfile << data[(k * h + i) * w + j] << " ";
}
myfile << endl;
}
myfile << endl;
}
#else
{
for (int i = 0; i < h; i++)
{
for (int j = 0; j < w; j++)
{
for (int k = 0; k < d; k++)
{
myfile << data[(i * w + j) * d + k] << " ";
}
}
myfile << endl;
}
myfile << endl;
}
#endif
myfile.close();
return 1;}
//把weight保存成16个文件,因为caffe中对于权重的加载时按输出进行的,16个输出就有16个权重矩阵
int OneToFourFile(int h,int w,int d,int num,float *data)
{
// Write the output to txt file.
ofstream myfile;
char fname[30] ;
//myfile.open(filename);
//if (!myfile.is_open())
// return 0;
for(int n = 0 ; n < num; n++)
{
sprintf(fname,"0629/weight/%s%d.txt","randweight-",n+1);
myfile.open(fname);
if (!myfile.is_open())
return 0;
for (int k = 0; k < d; k++)
{
for (int i = 0; i < h; i++)
{
for (int j = 0; j < w; j++)
{
myfile << data[((n * d +k) * h + i) * w + j] << " ";
}
myfile << endl;
}
myfile << endl;
}
myfile.close();
}
return 1;
}int _tmain(int argc, _TCHAR* argv[])
{
init();
if(!OneToThreeFile("0629/16randinput0629-1.txt",FRAME_HIGHT_PADD,FRAME_WIDTH_PADD,FRAME_DEPTH,input))
cout << "Cannot open file for input writing" << endl;
// 把输入保存成文件,方便用Python读取给caffe网络验证// 同样把weight保存 把输出保存方便对比结果 结果是一个16
if(!OneToFourFile(KERNEL_WIDTH,KERNEL_HIGHT,FRAME_DEPTH,OUT_FRAME_DEPTH,weight))
cout << "Cannot open file for weight writing" << endl;
conv2D_BN_LK_padd_execute_float32_to_float32();
if(!OneToThreeFile("0629/16randoutput0629-c4.txt",OUT_FRAME_HIGHT,OUT_FRAME_WIDTH,OUT_FRAME_DEPTH,output))
cout << "Cannot open file for output writing" << endl;
return 0;}
2 用Python调用caffe网络进行结果验证
caffe的环境安装请自行百度吧,在ubantu下编译caffe还挺方便的,下边直接上代码了。一些基本知识例如修改caffemodel参数、caffe网络的参数介绍,现在做caffe的已经很多 了。只要自己喜欢做就能学到。
import numpy as np
import matplotlib.pyplot as plt
#%matplotlib inline
import matplotlib
# Make sure that caffe is on the python path:
caffe_root = '/home/caffe-master' # this file is expected to be in {caffe_root}/examples
import sys
import os
os.chdir(caffe_root)
sys.path.insert(0, caffe_root + 'python')import caffe
# configure plotting
plt.rcParams['figure.figsize'] = (10, 10)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'# Load the net, list its data and params, and filter an example image.
caffe.set_mode_cpu()
net = caffe.Net('Net_surgery_result/conv_test.prototxt', caffe.TEST)
print("blobs {}\nparams {}".format(net.blobs.keys(), net.params.keys()))# load image and prepare as a single input batch for Caffe
#plt.figure(1)
#im = np.array(caffe.io.load_image('images/cat_gray.jpg', color=False)).squeeze()
#print 'im shape{} '.format(im.shape)#360*480
#plt.title("original image")
#plt.imshow(im)
#plt.axis('off')
#plt.show()
im = np.loadtxt('Net_surgery_result/16randinput0629-1.txt')
print 'im_input1 shape{} '.format(im.shape)#1*1*360*480
im_3d = np.array(im).reshape(8,55,97)
print 'im_input2 shape{} '.format(im_3d.shape)#1*1*360*480
im_input = im_3d[np.newaxis, :]
#im_input = im[np.newaxis,np.newaxis,:, :]
print 'im_input3 shape{} '.format(im_input.shape)#1*1*360*480
#np.savetxt("Net_surgery_result/input.txt",im_input[0][0])
net.blobs['data'].reshape(*im_input.shape)
net.blobs['data'].data[...] = im_inputweight = np.loadtxt('Net_surgery_result/weight/randweight-1.txt')
weight = np.array(weight).reshape(8,3,3)
net.params['conv'][0].data[0] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-2.txt')
weight = np.array(weight).reshape(8,3,3)
net.params['conv'][0].data[1] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-3.txt')
weight = np.array(weight).reshape(8,3,3)
net.params['conv'][0].data[2] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-4.txt')
weight = np.array(weight).reshape(8,3,3)
net.params['conv'][0].data[3] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-5.txt')
weight = np.array(weight).reshape(8,3,3)
net.params['conv'][0].data[4] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-6.txt')
weight = np.array(weight).reshape(8,3,3)
net.params['conv'][0].data[5] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-7.txt')
weight = np.array(weight).reshape(8,3,3)
net.params['conv'][0].data[6] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-8.txt')
weight = np.array(weight).reshape(8,3,3)
net.params['conv'][0].data[7] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-9.txt')
weight = np.array(weight).reshape(8,3,3)
net.params['conv'][0].data[8] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-10.txt')
weight = np.array(weight).reshape(8,3,3)
net.params['conv'][0].data[9] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-11.txt')
weight = np.array(weight).reshape(8,3,3)
net.params['conv'][0].data[10] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-12.txt')
weight = np.array(weight).reshape(8,3,3)
net.params['conv'][0].data[11] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-13.txt')
weight = np.array(weight).reshape(8,3,3)
net.params['conv'][0].data[12] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-14.txt')
weight = np.array(weight).reshape(8,3,3)
net.params['conv'][0].data[13] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-15.txt')
weight = np.array(weight).reshape(8,3,3)
net.params['conv'][0].data[14] = weightweight = np.loadtxt('Net_surgery_result/weight/randweight-16.txt')
weight = np.array(weight).reshape(8,3,3)
net.params['conv'][0].data[15] = weight #bias = np.array((1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),dtype=np.float32).reshape((1,16))
net.params['conv'][1].data[0:] = 0
print("load net done......")
net.forward()
np.savetxt('Net_surgery_result/output/output0628-1.txt',net.blobs['conv'].data[0,0])
np.savetxt('Net_surgery_result/output/output0628-2.txt',net.blobs['conv'].data[0,1])
np.savetxt('Net_surgery_result/output/output0628-3.txt',net.blobs['conv'].data[0,2])
np.savetxt('Net_surgery_result/output/output0628-4.txt',net.blobs['conv'].data[0,3])
np.savetxt('Net_surgery_result/output/output0628-5.txt',net.blobs['conv'].data[0,4])
np.savetxt('Net_surgery_result/output/output0628-6.txt',net.blobs['conv'].data[0,5])
np.savetxt('Net_surgery_result/output/output0628-7.txt',net.blobs['conv'].data[0,6])
np.savetxt('Net_surgery_result/output/output0628-8.txt',net.blobs['conv'].data[0,7])
np.savetxt('Net_surgery_result/output/output0628-9.txt',net.blobs['conv'].data[0,8])
np.savetxt('Net_surgery_result/output/output0628-10.txt',net.blobs['conv'].data[0,9])
np.savetxt('Net_surgery_result/output/output0628-11.txt',net.blobs['conv'].data[0,10])
np.savetxt('Net_surgery_result/output/output0628-12.txt',net.blobs['conv'].data[0,11])
np.savetxt('Net_surgery_result/output/output0628-13.txt',net.blobs['conv'].data[0,12])
np.savetxt('Net_surgery_result/output/output0628-14.txt',net.blobs['conv'].data[0,13])
np.savetxt('Net_surgery_result/output/output0628-15.txt',net.blobs['conv'].data[0,14])
np.savetxt('Net_surgery_result/output/output0628-16.txt',net.blobs['conv'].data[0,15])
# helper show filter outputs
def show_filters(net):
net.forward()
print 'net.forward'
plt.figure()
filt_min, filt_max = net.blobs['conv'].data.min(), net.blobs['conv'].data.max()
print filt_max
print filt_min
for i in range(3):
plt.subplot(1,4,i+2)
plt.title("filter #{} output".format(i))
plt.imshow(net.blobs['conv'].data[0, i])#, vmin=filt_min, vmax=filt_max)
#S = '\n'.join(str(num)[1:-1] for num in net.blobs['conv'].data[0, i])
#open(r'output.txt','w').write(S)
#np.savetxt('001',net.blobs['conv'].data[0, i])
#np.savetxt("filter#{}output.txt".format(i),net.blobs['conv'].data[0, i])
plt.tight_layout()
plt.axis('off')
#plt.show()
# filter the image with initial
#show_filters(net)