RNN图像识别 cnn实现图像识别

转载

mob64ca140a59b0 2024-04-08 10:38:15

文章标签 RNN图像识别 tensorflow xml 卷积 文章分类 机器学习人工智能

在此之前我已经大概说过tensorflow的简单流程，中间应用函数我会在例子中加以注释(更详细的可以查阅tensorflow中的函数讲解)。应用cnn实现的视频中人物识别，本想先讲一下cnn的原理，但基于时间和别人都以将的很详细在此就多说，直接上例子

1 简单图片中的人脸检测

在刚开始学时需要有兴趣，并且能快速实现结果。从结果到原因，再从原因到结果才是最好的学习方法(纯属个人观点)

## 基于haar特征
## 图片中的人物识别
import cv2
 
#读取图像，支持 bmp、jpg、png、tiff 等常用格式
img = cv2.imread("C:/Users/test2/Desktop/1.jpg")
# "D:\Test\2.jpg"
#创建窗口并显示图像
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)#将图片转化成灰度

path="F:/机器学习-物体识别/haar_like/haarcascade_frontalface_alt2.xml"
path="F:/haar_like/haarcascade_frontalface_alt2.xml"
face_cascade = cv2.CascadeClassifier("haarcascade_frontalface_alt2.xml")
face_cascade.load(path)  #一定要告诉编译器文件所在的具体位置
 #'''此文件是opencv的haar人脸特征分类器'''
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x,y,w,h) in faces:
    img = cv2.rectangle(img,(x,y),(x+w,y+h),(0,0,255),5)


cv2.imshow('img',img)
cv2.waitKey()

2 从摄像头中识别人脸

## 基于haar特征
## 通过摄像头可以识别出视频中的人物
import cv2

cap = cv2.VideoCapture(0) # 使用第5个摄像头（我的电脑插了5个摄像头）
path="F:\haar_like\haarcascades\haarcascade_frontalface_default.xml"
face_cascade = cv2.CascadeClassifier(path) # 加载人脸特征库

while(True):
    ret, frame = cap.read() # 读取一帧的图像
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)    # 转灰

    faces = face_cascade.detectMultiScale(frame, scaleFactor = 1.15, minNeighbors = 5, minSize = (5, 5)) # 检测人脸
    for(x, y, w, h) in faces:
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 3) # 用矩形圈出人脸

    cv2.imshow('Face Recognition', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release() # 释放摄像头
cv2.destroyAllWindows()

3 基于cnn+tensorflow实现摄像头中人脸的识别

有上述2个简单例子实现人脸检测你肯定不满足吧！对，所以我自己通过cnn+tensorflow搭建识别摄像头中人脸的算法。

#通过摄像头拍摄获取人脸训练集
import os
import random
import numpy as np
import cv2

#创建文件夹函数
def createdir(*args):
    ''' create dir'''
    for item in args:
        #判断路径是否存在
        if not os.path.exists(item):
            #不存在就创建
            os.makedirs(item)

#照片的尺寸
IMGSIZE = 64

#获取照片的大小将其裁剪为正方形
def getpaddingSize(shape):
    #照片的长宽
    h, w = shape
    longest = max(h, w)
    #将最长的边进行处理
    result = (np.array([longest]*4, int) - np.array([h, h, w, w], int)) // 2
    return result.tolist()



#处理照片函数
def dealwithimage(img, h=64, w=64):
    #获取照片的尺寸
    top, bottom, left, right = getpaddingSize(img.shape[0:2])
    #扩充图像
    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])
    #对图像进行倍数的放大和缩小  也可以直接的输入尺寸大小
    img = cv2.resize(img, (h, w))
    return img #返回图像

#图像的增强
def relight(imgsrc, alpha=1, bias=0):
    #astype实现图片像素的数据类型转换
    imgsrc = imgsrc.astype(float)
    #对像素点的值进行变换
    imgsrc = imgsrc * alpha + bias
    imgsrc[imgsrc < 0] = 0
    imgsrc[imgsrc > 255] = 255
    imgsrc = imgsrc.astype(np.uint8)
    return imgsrc #返回转变后的像素值

#得到面部照片并保存
def getfacefromcamera(outdir):
    createdir(outdir)
    camera = cv2.VideoCapture(0)
    path="F:\haar_like\haarcascades\haarcascade_frontalface_default.xml"
    haar = cv2.CascadeClassifier(path)
    n = 1
    while 1:
        if (n <= 200):
            print('It`s processing %s image.' % n)
            # 读帧
            success, img = camera.read()
            #对图像进行灰度处理
            gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            #检测出人脸用vector保存各个人脸的坐标、大小（用矩形表示）
            faces = haar.detectMultiScale(gray_img, 1.3, 5)
            for f_x, f_y, f_w, f_h in faces:
                face = img[f_y:f_y+f_h, f_x:f_x+f_w]
                face = cv2.resize(face, (IMGSIZE, IMGSIZE))
                #处理训练图片
                face = relight(face, random.uniform(0.5, 1.5), random.randint(-50, 50))
                #给图片添加标号
                cv2.imwrite(os.path.join(outdir, str(n)+'.jpg'), face)
                #显示名字
                cv2.putText(img, 'haha', (f_x, f_y - 20), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2)  
                img = cv2.rectangle(img, (f_x, f_y), (f_x + f_w, f_y + f_h), (0, 0, 255), 3)
                n+=1
            cv2.imshow('img', img)
            key = cv2.waitKey(30) & 0xff
            if key == 27:
                break
        else:
            break
    camera.release()
    cv2.destroyAllWindows()

if __name__ == '__main__':
    #输入保存照片的类别
    name = input('please input yourename: ')  
    #将照片保存
    getfacefromcamera(os.path.join('F:/ml/image/trainfaces', name))

#tensorflow_face_conv.py文件
#coding=utf-8
import os
import logging as log
import matplotlib.pyplot as plt
import common
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import cv2

SIZE = 64
#用于得到传递进来的真实的训练样本
x_data = tf.placeholder(tf.float32, [None, SIZE, SIZE, 3])
y_data = tf.placeholder(tf.float32, [None, None])

keep_prob_5 = tf.placeholder(tf.float32)
keep_prob_75 = tf.placeholder(tf.float32)

#权重函数
def weightVariable(shape):
    #从服从指定正太分布的数值中取出指定大小的数组
    init = tf.random_normal(shape, stddev=0.01)
    #定义了变量后的初始化变量
    return tf.Variable(init)

#bais函数
def biasVariable(shape):
    #从服从指定正太分布的数值中取出指定大小的数组
    init = tf.random_normal(shape)
    #定义了变量后的初始化变量
    return tf.Variable(init)

#卷积函数
def conv2d(x, W):
    #以W为卷积核，步长为1没有填充进行卷积
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') #返回卷积后的值

#池化层函数
def maxPool(x):
    #以2x2为池化曾核的大小，以步长为2进行下采样
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# 减少过拟合函数，随机让某些权重不更新
def dropout(x, keep):
 
    return tf.nn.dropout(x, keep)

#卷积层构建函数
def cnnLayer(classnum):
    # 第一层
    W1 = weightVariable([3, 3, 3, 32]) # 卷积核大小(3,3),输入通道(3),输出通道/卷积核的个数(32)
    b1 = biasVariable([32])            # 设置权重
    conv1 = tf.nn.relu(conv2d(x_data, W1) + b1) # 进行卷积并对其进行非线性化处理
    pool1 = maxPool(conv1)                      # 进行池化曾操作
    # 减少过拟合，随机让某些权重不更新
    drop1 = dropout(pool1, keep_prob_5) # 32 * 32 * 32 多个输入channel 被filter内积掉了

    # 第二层
    W2 = weightVariable([3, 3, 32, 64])
    b2 = biasVariable([64])
    conv2 = tf.nn.relu(conv2d(drop1, W2) + b2)
    pool2 = maxPool(conv2)
    drop2 = dropout(pool2, keep_prob_5) # 64 * 16 * 16

    # 第三层
    W3 = weightVariable([3, 3, 64, 64])
    b3 = biasVariable([64])
    conv3 = tf.nn.relu(conv2d(drop2, W3) + b3)
    pool3 = maxPool(conv3)
    drop3 = dropout(pool3, keep_prob_5) # 64 * 8 * 8

    # 全连接层
    Wf = weightVariable([8*8*64, 512])
    bf = biasVariable([512])
    drop3_flat = tf.reshape(drop3, [-1, 8*16*32])
    dense = tf.nn.relu(tf.matmul(drop3_flat, Wf) + bf)
    dropf = dropout(dense, keep_prob_75)

    # 输出层
    Wout = weightVariable([512, classnum])
    bout = weightVariable([classnum])
    #out = tf.matmul(dropf, Wout) + bout
    out = tf.add(tf.matmul(dropf, Wout), bout)
    return out

#训练函数的设置
def train(train_x, train_y, tfsavepath):
    ''' train'''
    log.debug('train')  
    out = cnnLayer(train_y.shape[1])  #进行卷及处理
    #softmax_cross_entropy_with_logits计算loss是代价值，也就是我们要最小化的值
    #对所有损失求平均
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=out, labels=y_data))
    #最速下降法让交叉熵下降，步长为0.01
    train_step = tf.train.AdamOptimizer(0.01).minimize(cross_entropy)
    #查看目标判断是否准确
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(y_data, 1)), tf.float32))
    #训练模型的保存
    saver = tf.train.Saver()
    with tf.Session() as sess:  #开启一个会话
        sess.run(tf.global_variables_initializer()) #初始化模型的所有参数
        batch_size = 10
        num_batch = len(train_x) // 10  #返回其整数结果
        for n in range(10):
            r = np.random.permutation(len(train_x)) #返回一个新的数组
            train_x = train_x[r, :]
            train_y = train_y[r, :]

            for i in range(num_batch):
                batch_x = train_x[i*batch_size : (i+1)*batch_size]
                batch_y = train_y[i*batch_size : (i+1)*batch_size]
                _, loss = sess.run([train_step, cross_entropy],\
                                   feed_dict={x_data:batch_x, y_data:batch_y,
                                              keep_prob_5:0.75, keep_prob_75:0.75})

                print(n*num_batch+i, loss)

        # 获取测试数据的准确率
        acc = accuracy.eval（{x_data:train_x, y_data:train_y, keep_prob_5:1.0, keep_prob_75:1.0})
        print('after 10 times run: accuracy is ', acc)
        saver.save(sess, tfsavepath)

def validate(test_x, tfsavepath):
    ''' validate '''
    output = cnnLayer(2)
    #predict = tf.equal(tf.argmax(output, 1), tf.argmax(y_data, 1))
    predict = output

    saver = tf.train.Saver()
    with tf.Session() as sess:
        
        saver.restore(sess, tfsavepath)
        res = sess.run([predict, tf.argmax(output, 1)],
                       feed_dict={x_data: test_x,
                                  keep_prob_5:1.0, keep_prob_75: 1.0})
        return res

if __name__ == '__main__':
    pass

#实现人脸检测的程序 defined_face.py
#coding=utf-8

import os
import logging as log
import matplotlib.pyplot as plt
import common
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import cv2
import tensorflow_face_conv as myconv

def createdir(*args):
    ''' create dir'''
    for item in args:
        if not os.path.exists(item):
            os.makedirs(item)

IMGSIZE = 64


def getpaddingSize(shape):
    ''' get size to make image to be a square rect '''
    h, w = shape
    longest = max(h, w)
    result = (np.array([longest]*4, int) - np.array([h, h, w, w], int)) // 2
    return result.tolist()

def dealwithimage(img, h=64, w=64):
    ''' dealwithimage '''
    #img = cv2.imread(imgpath)
    top, bottom, left, right = getpaddingSize(img.shape[0:2])
    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])
    img = cv2.resize(img, (h, w))
    return img

def relight(imgsrc, alpha=1, bias=0):
    '''relight'''
    imgsrc = imgsrc.astype(float)
    imgsrc = imgsrc * alpha + bias
    imgsrc[imgsrc < 0] = 0
    imgsrc[imgsrc > 255] = 255
    imgsrc = imgsrc.astype(np.uint8)
    return imgsrc

def getface(imgpath, outdir):
    ''' get face from path file'''
    filename = os.path.splitext(os.path.basename(imgpath))[0]
    img = cv2.imread(imgpath)
    path="F:\haar_like\haarcascades\haarcascade_frontalface_default.xml"
    haar = cv2.CascadeClassifier(path)
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = haar.detectMultiScale(gray_img, 1.3, 5)
    n = 0
    for f_x, f_y, f_w, f_h in faces:
        n += 1
        face = img[f_y:f_y+f_h, f_x:f_x+f_w]
        # may be do not need resize now
        #face = cv2.resize(face, (64, 64))
        face = dealwithimage(face, IMGSIZE, IMGSIZE)
        for inx, (alpha, bias) in enumerate([[1, 1], [1, 50], [0.5, 0]]):
            facetemp = relight(face, alpha, bias)
            cv2.imwrite(os.path.join(outdir, '%s_%d_%d.jpg' % (filename, n, inx)), facetemp)

#从文件目录获取所有文件的函数
def getfilesinpath(filedir):
    #获得一个路径下面所有的文件路径
    for (path, dirnames, filenames) in os.walk(filedir):
        for filename in filenames:
            #判断是否为.jpg结尾的图片
            if filename.endswith('.jpg'):
                yield os.path.join(path, filename)
        for diritem in dirnames:
            getfilesinpath(os.path.join(path, diritem))

#得到图片中的面部函数
def generateface(pairdirs):
    ''' generate face '''
    for inputdir, outputdir in pairdirs:
        for name in os.listdir(inputdir):
            inputname, outputname = os.path.join(inputdir, name), os.path.join(outputdir, name)
            if os.path.isdir(inputname):
                createdir(outputname)
                for fileitem in getfilesinpath(inputname):
                    getface(fileitem, outputname)

#读取图片并生成列表
def readimage(pairpathlabel):
    '''read image to list'''
    imgs = []
    labels = []
    for filepath, label in pairpathlabel:
        for fileitem in getfilesinpath(filepath):
            #从路径中读取图片
            img = cv2.imread(fileitem)
            #进行列表拼接
            imgs.append(img)
            labels.append(label)
    return np.array(imgs), np.array(labels)#返回数组列表

#获得一个矩阵
def onehot(numlist):
    b = np.zeros([len(numlist), max(numlist)+1])
    b[np.arange(len(numlist)), numlist] = 1
    return b.tolist()

def getfileandlabel(filedir):

    dictdir = dict([[name, os.path.join(filedir, name)] \
                    for name in os.listdir(filedir) if os.path.isdir(os.path.join(filedir, name))])

    dirnamelist, dirpathlist = dictdir.keys(), dictdir.values()
    indexlist = list(range(len(dirnamelist)))

    return list(zip(dirpathlist, onehot(indexlist))), dict(zip(indexlist, dirnamelist))

def main(_):
    savepath = 'F:/ml/image/checkpoint/face.ckpt'
    isneedtrain = False
    if os.path.exists(savepath+'.meta') is False:
        isneedtrain = True
    if isneedtrain:
        #first generate all face
        log.debug('generateface')
        generateface([['F:/ml/image/trainfaces', 'F:/ml/image/trainfaces']])
        pathlabelpair, indextoname = getfileandlabel('F:/ml/image/trainfaces')

        train_x, train_y = readimage(pathlabelpair)
        train_x = train_x.astype(np.float32) / 255.0
        log.debug('len of train_x : %s', train_x.shape)
        myconv.train(train_x, train_y, savepath)
        log.debug('training is over, please run again')
    else:
        testfromcamera(savepath)
        
        #print(np.column_stack((out, argmax)))

def testfromcamera(chkpoint):
    camera = cv2.VideoCapture(0)
    path="F:\haar_like\haarcascades\haarcascade_frontalface_default.xml"
    haar = cv2.CascadeClassifier(path)
    pathlabelpair, indextoname = getfileandlabel('F:/ml/image/trainfaces')
    output = myconv.cnnLayer(len(pathlabelpair))
    predict = output

    saver = tf.train.Saver()
    with tf.Session() as sess:
        #sess.run(tf.global_variables_initializer())
        saver.restore(sess, chkpoint)
        n = 1
        while 1:
            if (n <= 20000):
                print('It`s processing %s image.' % n)
                # 读帧
                success, img = camera.read()

                gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                faces = haar.detectMultiScale(gray_img, 1.3, 5)
                for f_x, f_y, f_w, f_h in faces:
                    face = img[f_y:f_y+f_h, f_x:f_x+f_w]
                    face = cv2.resize(face, (IMGSIZE, IMGSIZE))
                    #could deal with face to train
                    test_x = np.array([face])
                    test_x = test_x.astype(np.float32) / 255.0
                    
                    res = sess.run([predict, tf.argmax(output, 1)],\
                                   feed_dict={myconv.x_data: test_x,\
                                   myconv.keep_prob_5:1.0, myconv.keep_prob_75: 1.0})
                    print(res)

                    cv2.putText(img, indextoname[res[1][0]], (f_x, f_y - 20), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2)  #显示名字
                    img = cv2.rectangle(img, (f_x, f_y), (f_x + f_w, f_y + f_h), (0, 0, 255), 3)
                    n+=1
                cv2.imshow('img', img)
                key = cv2.waitKey(30) & 0xff
                if key == 27:
                    break
            else:
                break
    camera.release()
    cv2.destroyAllWindows()

if __name__ == '__main__':
    # first generate all face
    main(0)
    testfromcamera(checkpoint)

在此基于tensorflow框架的搭建已经完成，但我还是不满足，我想基于代码不借助框架实现图片的识别。后面我会继续更新不解与框架识别的代码。

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。