1、什么是基于内容的图像检索
1、构建基于内容的图像检索系统步骤
(1)定义你的图像描述符:在这个阶段你需要决定你想描述的图像的哪个方面。你对图像的颜色感兴趣吗?图像中物体的形状?或者你想表征纹理?
(2)特征提取和索引您的数据集: 现在您已经定义了图像描述符,您的工作是将此图像描述符应用于数据集中的每个图像,从这些图像提取特征并将特征写入存储(例如,CSV文件,RDBMS ,Redis等),以便稍后可以比较它们的相似性。此外,您需要考虑是否将使用任何专门的数据结构来促进更快的搜索。
(3)定义您的相似性度量: 我们现在有一个(可能是指数)特征向量的集合。但你如何比较它们的相似性呢?常用选项包括欧几里得距离,余弦距离和
距离,但实际选择高度依赖于(1)数据集和(2)您提取的特征类型。
(4)搜索: 最后一步是执行实际搜索。用户将向系统提交查询图像(例如从上传表单或通过移动应用程序),并且您的工作将(1)从该查询图像中提取特征,然后(2)应用您的相似性函数进行比较该查询的功能已针对已编入索引的功能。从那里,你只需根据你的相似度函数返回最相关的结果。
2、CBIR和机器学习/图像分类有何不同
(1)机器学习包括使计算机完成诸如预测,分类,识别等智能人工任务的方法。此外,机器学习管理算法,使计算机能够执行这些智能任务 而不需要明确编程。
CBIR确实利用了一些机器学习技术 - 即降维和聚类,但是CBIR系统不执行任何实际学习。
(2)主要的 区别在于CBIR不 直接试图理解和解释图像的内容。相反,CBIR系统依赖于:
- 通过提取特征向量来量化图像。
- 假设特征向量的比较 - 具有相似特征向量的图像具有相似的视觉内容。
基于这两个组件,图像搜索引擎能够将查询与图像数据集进行比较,并返回最相关的结果,而不必实际“知道”图像的内容。
(3)在机器学习和图像分类中,能够学习和理解图像的内容需要一些训练集的概念 - 一组标记数据用于教计算机数据集中每个可视对象的外观。
(4)CBIR系统不需要标记数据。他们只需拍摄图像数据集,从每幅图像中提取特征,并使数据集可以在视觉上搜索。在某些方面,您可以将CBIR系统视为一种“哑”图像分类器,它没有标签概念来使自己更加智能 - 它仅依赖于(1)从图像中提取的特征和(2)相似性函数用于给用户提供有意义的结果。
2、构建CBIR系统
1、目录结构及作用
|---pyimagesearch
||---__init__.py
||---cbir
|||----__init__.py
|||---dists.py 作用:包含我们的距离度量/相似度函数,用于比较两个图像的相似度
|||---hsvdescriptor.py 作用:实现我们的颜色描述符,用于从图像中提取特征向量
|||---resultsmontage.py 作用:用于显示搜索到我们的屏幕结果的实用工具类
|||---searcher.py 作用:将封装用于执行实际搜索的Searcher类
|---index.py 作用:用于从我们的UKBench数据集中提取特征
|---search.py 作用:将接受查询图像,调用 搜索器 ,然后将结果显示在屏幕上
疑问:1、highlight=resultID inqueryRelevant
2、对第三条运行结果存在疑问
运行命令:1、python search.py--index index.csv--dataset../ukbench--relevant../ukbench/relevant.json --query../ukbench/ukbench00644.jpg
python search.py--index index.csv--dataset../ukbench--relevant../ukbench/relevant.json --query../ukbench/ukbench00996.jpg
python search.py--index index.csv--dataset../ukbench--relevant../ukbench/relevant.json --query../ukbench/ukbench00568.jpg
search.py
from __future__ import print_function
from pyimagesearch.cbir.resultsmontage import ResultsMontage
from pyimagesearch.cbir.hsvdescriptor import HSVDescriptor
from pyimagesearch.cbir.searcher import Searcher
import argparse
import imutils
import json
import cv2
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--index", required = True, help = "Path to where the features index will be stored")
ap.add_argument("-q", "--query", required = True, help = "Path to the query image")
ap.add_argument("-d", "--dataset", required = True, help = "Path to the original dataset directory")
ap.add_argument("-r", "--relevant", required = True, help = "Path to relevant dictionary")
args = vars(ap.parse_args())
desc = HSVDescriptor((4, 6, 3))
montage = ResultsMontage((240, 320), 5, 20)
relevant = json.loads(open(args["relevant"]).read())
queryFilename = args["query"][args["query"].rfind("/") + 1:]
queryRelevant = relevant[queryFilename]
query = cv2.imread(args["query"])
print("[INFO] describing query...")
cv2.imshow("Query", imutils.resize(query, width = 320))
features = desc.describe(query)
print("[INFO] searching...")
searcher = Searcher(args["index"])
results = searcher.search(features, numResults = 20)
for (i, (score, resultID)) in enumerate(results):
print("[INFO] {result_num}.{result} - score:.2f".format(result_num = i + 1, result = resultID, score = score))
result = cv2.imread("{}/{}".format(args["dataset"], resultID))
print ("resultID")
print (resultID)
montage.addResult(result, text = "#{}".format(i + 1), highlight = resultID in queryRelevant)
cv2.imshow("Results", imutils.resize(montage.montage, height = 700))
cv2.imwrite("mo.png",montage.montage)
cv2.waitKey(0)
index.py
from __future__ import print_function
from pyimagesearch.cbir.hsvdescriptor import HSVDescriptor
from imutils import paths
import progressbar
import argparse
import cv2
ap= argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required = True, help = "Path to the directory that contains the images to be indexed")
ap.add_argument("-i", "--index", required = True, help = "Path to where the features index will be stored")
args = vars(ap.parse_args())
desc = HSVDescriptor((4, 6, 3))
output = open(args["index"], "w")
imagePaths = list(paths.list_images(args["dataset"]))
widgets = ["Indexing:", progressbar.Percentage(), "", progressbar.Bar(), "", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval = len(imagePaths), widgets = widgets)
pbar.start()
for (i, imagePath) in enumerate(imagePaths):
filename = imagePath[imagePath.rfind("/") + 1:]
image = cv2.imread(imagePath)
features = desc.describe(image)
features = [str(x) for x in features]
output.write("{}, {}\n".format(filename, ",".join(features)))
pbar.update(i)
pbar.finish()
print("[INFO] indexed {} images".format(len(imagePaths)))
output.close()
dist.py
import numpy as np
def chi2_distance(histA, histB, eps = 1e-10):
d = 0.5 * np.sum(((histA - histB)**2)/(histA + histB + eps))
return d
hsvdescriptor.py
import numpy as np
import cv2
import imutils
class HSVDescriptor:
def __init__(self, bins):
self.bins = bins
def describe(self, image):
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
features = []
(h, w) = image.shape[:2]
(cX, cY) = (int(w * 0.5), int(h * 0.5))
segments = [(0, cX, 0, cY), (cX, w, 0, cY), (cX, w, cY, h), (0, cX, cY, h)]
(axesX, axesY) = (int(w * 0.75)//2, int(h * 0.75)//2)
ellipMask = np.zeros(image.shape[:2], dtype = "uint8")
cv2.ellipse(ellipMask, (cX, cY), (axesX, axesY), 0, 0, 360, 255, - 1)
for (startX, endX, startY, endY) in segments:
cornerMask = np.zeros(image.shape[:2], dtype = "uint8")
cv2.rectangle(cornerMask, (startX, startY), (endX, endY), 255, - 1)
cornerMask = cv2.subtract(cornerMask, ellipMask)
hist = self.histogram(image, cornerMask)
features.extend(hist)
hist = self.histogram(image, ellipMask)
features.extend(hist)
return np.array(features)
def histogram(self, image, mask = None):
hist = cv2.calcHist([image], [0, 1, 2], mask, self.bins, [ 0 , 180, 0, 256, 0, 256])
if imutils.is_cv2():
hist = cv2.normalize(hist).flatten()
else:
hist = cv2.normalize(hist, hist).flatten()
return hist
resultsmontage.py
import numpy as np
import cv2
class ResultsMontage:
def __init__(self, imageSize, imagesPerRow, numResults):
self.imageW = imageSize[0]
self.imageH = imageSize[1]
self.imagesPerRow = imagesPerRow
numCols = numResults // imagesPerRow
self.montage = np.zeros((numCols * self.imageW, imagesPerRow * self.imageH, 3), dtype="uint8")
self.counter = 0
self.row = 0
self.col = 0
def addResult(self, image, text = None, highlight = False):
if self.counter != 0 and self.counter %self.imagesPerRow == 0:
self.col = 0
self.row += 1
image = cv2.resize(image, (self.imageH, self.imageW))
(startY, endY) = (self.row * self.imageW, (self.row + 1) * self.imageW)
(startX, endX) = (self.col * self.imageH, (self.col + 1) * self.imageH)
self.montage[startY:endY, startX:endX] = image
if text is not None:
cv2.putText(self.montage, text, (startX + 10, startY + 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 3)
print ("text")
if highlight:
cv2.rectangle(self.montage, (startX + 3, startY + 3), (endX - 3, endY - 3), (0, 255, 0), 4)
print ("hig")
self.col += 1
self.counter +=1
searcher.py
from . import dists
import csv
class Searcher:
def __init__(self, dbPath):
self.dbPath = dbPath
def search(self, queryFeatures, numResults = 10):
results = {}
with open(self.dbPath) as f:
reader = csv.reader(f)
for row in reader:
features = [float(x) for x in row[1:]]
d = dists.chi2_distance(features, queryFeatures)
results[row[0]] = d
f.close()
results=sorted([(v,k) for (k,v) in results.items()])
return results[:numResults]
2、特征提取和索引
1、描述图像的三个方面:
- 颜色: 表征图像颜色的图像描述符试图模拟图像每个通道中像素强度的分布。这些方法包括基本颜色统计,如平均值,标准偏差和偏度,以及颜色直方图,“平面”和多维。
- 纹理: 纹理描述符试图模拟图像中物体的感觉,外观和整体触觉质量。一些(但不是全部)纹理描述符将图像转换为灰度,然后计算灰度共生矩阵(GLCM)并计算该矩阵的统计量,包括对比度,相关性和熵等(Haralick纹理)。更先进的纹理描述符,如局部二进制模式,尝试模型 模式也是如此。甚至还有更高级的纹理描述符,例如傅立叶和小波变换也存在,但仍然使用灰度图像。
- 形状: 绝大多数形状描述符方法依靠提取图像中对象的轮廓(即轮廓)。一旦我们有了轮廓,我们就可以计算简单的统计数据来表征轮廓,这正是Hu Moments和Zernike Moments所做的。这些统计数据可用于表示图像中对象的形状(轮廓)。在机器学习和对象识别的背景下, 面向梯度的直方图 也是一个不错的选择。
2、特征提取的定义
定义: 特征提取 是通过应用图像描述符从数据集中每个图像中提取特征来量化数据集的过程。通常,这些功能存储在磁盘上供 以后使用,并 使用专门的数据结构(例如倒排索引,kd树或随机投影林)进行索引,以加快查询速度。
3、定义相似度
1、常用距离度量
欧几里德:
from scipy.spatial import distance as dists
dists.euclidean(A, B)
曼哈顿/城市大厦
dists.cityblock(A, B)
直方图交点
def histogram_intersection(H1, H2):
return np.sum(np.minimum(H1, H2))
距离
def chi2_distance(histA, histB, eps=1e-10):
return 0.5 * np.sum(((histA - histB) ** 2) / (histA + histB + eps))
chi2_distance(A, B)
余弦
dists.cosine(A, B)
海明
dists.hamming(A, B)
4、提取关键点和局部不变描述符
1、文件结构及作用:
|---pyimagesearch
||---__init__.py
||---descriptors 作用:包含了实现从我们的图像数据集提取关键点和本地不变描述
|||----__init__.py
|||---detectanddescribe.py 作用:以便使用任意检测器和描述符轻松检测关键点并提取功能
||---indexer 作用:包含我们面向HDF5数据集的面向对象的接口来存储特征
|||----__init__.py
|||---baseindexer.py
|||---featureindexer.py
|---index_features.py 作用:驱动程序脚本,用于将所有碎片粘合在一起
疑问:1、@staticmethod静态函数定义的方法?
运行命令:python index_features.py--dataset../ukbench_sample--features-db output/features.hdf5
index_features.py
#coding=utf-8
from __future__ import print_function
from pyimagesearch.descriptors.detectanddescribe import DetectAndDescribe
from pyimagesearch.indexer.featureindexer import FeatureIndexer
from imutils.feature import FeatureDetector_create, DescriptorExtractor_create
from imutils import paths
import argparse
import imutils
import cv2
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True,
help="Path to the directory that contains the images to be indexed") #图像目录路径
ap.add_argument("-f", "--features-db", required=True,
help="Path to where the features database will be stored")#制定HDF5数据库储存在磁盘上的路径
ap.add_argument("-a", "--approx-images", type=int, default=500,
help="Approximate # of images in the dataset")#该(可选)开关允许我们指定数据集中图像的近似数量
ap.add_argument("-b", "--max-buffer-size", type=int, default=50000,
help="Maximum buffer size for # of features to be stored in memory")#一次一个地写入HDF5的特征向量 效率非常低。相反,将特征向量收集到内存中的一个大数组中然后在缓冲区满时将它们转储到HDF5会更有效。的值 -最大-缓冲器-大小 指定许多如何 特征向量可以被存储在存储器中,直到缓冲器被刷新到HDF5
args = vars(ap.parse_args())
detector = FeatureDetector_create("SURF")#获取关键点
descriptor = DescriptorExtractor_create("RootSIFT")#定义提取关键点特征方法
dad = DetectAndDescribe(detector, descriptor)#获取关键点和关键点特征向量
fi = FeatureIndexer(args["features_db"], estNumImages=args["approx_images"],maxBufferSize=args["max_buffer_size"], verbose=True)
for (i, imagePath) in enumerate(paths.list_images(args["dataset"])):
if i > 0 and i%10 == 0:
fi._debug("processed {} images".format(i), msgType = "[PROGRESS]")
filename = imagePath[imagePath.rfind("/") + 1:]
image = cv2.imread(imagePath)
image = imutils.resize(image, width = 320)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
(kps, descs) = dad.describe(image)
if kps is None or descs is None:
continue
fi.add(filename, kps, descs)
fi.finish()
detectanddescribe.py
import numpy as np
class DetectAndDescribe:
def __init__(self, detector, descriptor):
self.detector = detector
self.descriptor = descriptor
def describe(self, image, useKpList = True):
kps = self.detector.detect(image)
(kps, descs) = self.descriptor.compute(image, kps)
if len(kps) == 0:
return (None, None)
if useKpList:
kps = np.int0([kp.pt for kp in kps])
return (kps, descs)
baseindexer.py
from __future__ import print_function
import numpy as np
import datetime
class BaseIndexer(object):
def __init__(self, dbPath, estNumImages = 500, maxBufferSize = 50000, dbResizeFactor = 2, verbose = True):
self.dbPath = dbPath
self.estNumImages = estNumImages
self.maxBufferSize = maxBufferSize
self.dbResizeFactor = dbResizeFactor
self.verbose = verbose
self.idxs = {}
def _wrieBuffers(self):
pass
def _writeBuffer(self, dataset, datasetName, buf, idxName, sparse = False):
if type(buf) is list:
end = self.idxs[idxName] + len(buf)
else:
end = self.idxs[idxName] + buf.shape[0]
if end > dataset.shape[0]:
self._debug("triggering '{}' db resize".format(datasetName))
self._resizeDataset(dataset, datasetName, baseSize = end)
if sparse:
buf = buf.toarray()
self._debug("writing '{}' buffer".format(datasetName))
dataset[self.idxs[idxName]:end] = buf
def _resizeDataset(self, dataset, dbName, baseSize = 0, finished = 0):
origSize = dataset.shape[0]
if finished > 0:
newSize = finished
else:
newSize = baseSize * self.dbResizeFactor
shape = list(dataset.shape)
shape[0] = newSize
dataset.resize(tuple(shape))
self._debug("old size of '{}':{:,};new size:{:,}".format(dbName, origSize, newSize))
def _debug(self, msg, msgType = "[INFO]"):
if self.verbose:
print("{} {} - {}".format(msgType, msg, datetime.datetime.now()))
@staticmethod
def featureStack(array, accum = None, stackMethod = np.vstack):
if accum is None:
accum = array
else:
accum = stackMethod([accum, array])
return accum
featureindexer.py
#coding=utf-8
from .baseindexer import BaseIndexer
import numpy as np
import h5py
import sys
class FeatureIndexer(BaseIndexer):
def __init__(self, dbPath, estNumImages=500, maxBufferSize=50000, dbResizeFactor=2,verbose=True):
super(FeatureIndexer, self).__init__(dbPath, estNumImages=estNumImages,
maxBufferSize=maxBufferSize, dbResizeFactor=dbResizeFactor,
verbose=verbose)
self.db = h5py.File(self.dbPath, mode="w")
self.imageIDDB = None
self.indexDB = None
self.featuresDB = None
self.imageIDBuffer = []
self.indexBuffer = []
self.featuresBuffer = None
self.totalFeatures = 0
self.idxs = {"index": 0, "features": 0}
def add(self, imageID, kps, features):
start = self.idxs["features"] + self.totalFeatures
end = start + len(features)
self.imageIDBuffer.append(imageID)
self.featuresBuffer = BaseIndexer.featureStack(np.hstack([kps, features]),self.featuresBuffer)
self.indexBuffer.append((start, end))
self.totalFeatures += len(features)
if self.totalFeatures >= self.maxBufferSize:
if None in (self.imageIDDB, self.indexDB, self.featuresDB):
self._debug("initial buffer full")
self._createDatasets()
self._writeBuffers()
def _createDatasets(self):
avgFeatures = self.totalFeatures/float(len(self.imageIDBuffer))
approxFeatures = int(avgFeatures * self.estNumImages)
fvectorSize = self.featuresBuffer.shape[1]
if sys.version_info[0] < 3:
dt = h5py.special_dtype(vlen = unicode)
else:
dt = h5py.special_dtype(vlen = str)
self._debug("creating datasets...")
self.imageIDDB = self.db.create_dataset("image_ids", (self.estNumImages, ), maxshape = (None, ), dtype = dt)
self.indexDB = self.db.create_dataset("index", (self.estNumImages, 2), maxshape = (None, 2), dtype = "int")
self.featuresDB = self.db.create_dataset("features", (approxFeatures, fvectorSize), maxshape = (None, fvectorSize), dtype = "float")
def _writeBuffers(self):
self._writeBuffer(self.imageIDDB, "image_ids", self.imageIDBuffer,"index")
self._writeBuffer(self.indexDB, "index", self.indexBuffer, "index")
self._writeBuffer(self.featuresDB, "features", self.featuresBuffer,"features")
self.idxs["index"] += len(self.imageIDBuffer)
self.idxs["features"] += self.totalFeatures
self.imageIDBuffer = []
self.indexBuffer = []
self.featuresBuffer = None
self.totalFeatures = 0
def finish(self):
if None in (self.imageIDDB, self.indexDB, self.featuresDB):
self._debug("minimum init buffer not reached", msgType = "[WARN]")
self._createDatasets()
self._debug("writing un - empty buffers...")
self._writeBuffers()
self._debug("compacting datasets...")
self._resizeDataset(self.imageIDDB, "image_ids", finished = self.idxs["index"])
self._resizeDataset(self.indexDB, "index", finished = self.idxs["index"])
self._resizeDataset(self.featuresDB, "features", finished = self.idxs["features"])
self.db.close()
5、集群功能组成一个码本
1、文件结构及作用
多添加俩个新文件:cluster_features.py和vocabulary.py
|---pyimagesearch
||---__init__.py
||---descriptors
|||----__init__.py
|||---detectanddescribe.py
||---indexer
|||----__init__.py
|||---baseindexer.py
|||---featureindexer.py
||---ir
|||----__init__.py
|||---vocabulary.py 作用:用于摄取的功能HDF5数据集,然后返回一个字典的视觉(即聚类中心)话
|---cluster_features.py 作用:驱动脚本将启动 词汇表 制定过程
|---index_features.py
运行命令:python cluster_features.py --features-db output/features.hdf5 --codebook output/vocab.cpickle --clusters 1536 --percentage 0.25
cluster_features.py
#coding=utf-8
from __future__ import print_function
from pyimagesearch.ir.vocabulary import Vocabulary
import argparse
import pickle
ap= argparse.ArgumentParser()
ap.add_argument("-f", "--features-db", required = True, help = "Path to where the features database will be stored")
ap.add_argument("-c", "--codebook", required = True, help = "Path to the output codebook")
ap.add_argument("-k", "--clusters", type = int, default = 64, help = "# of clusters to generate")#小批量k-均值将生成的簇的数量(即可视词)
ap.add_argument("-p", "--percentage",type = float, default = 0.25, help = "Percentage of total features to use when clustering" )
args = vars(ap.parse_args()) # 控制特征向量样本量的大小
voc = Vocabulary(args["features_db"])
vocab = voc.fit(args["clusters"], args["percentage"]) #获得生成的视觉词 - 整个采样和聚类过程由词汇 类抽象
print ("[INFO] storing cluster centers...")
f = open(args["codebook"], "wb")
f.write(pickle.dumps(vocab))
f.close()
vocabulary.py
from __future__ import print_function
from sklearn.cluster import MiniBatchKMeans
import numpy as np
import datetime
import h5py
class Vocabulary:
def __init__(self, dbPath, verbose = True):
self.dbPath = dbPath
self.verbose = verbose
def fit(self, numClusters, samplePercent, randomState = None):
db = h5py.File(self.dbPath)
totalFeatures = db["features"].shape[0]
sampleSize = int(np.ceil(samplePercent * totalFeatures))
print("sampleSize")
print (sampleSize)
idxs = np.random.choice(np.arange(0, totalFeatures), (sampleSize), replace = False)
idxs.sort()
data = []
self._debug("starting sampling...")
for i in idxs:
data.append(db["features"][i][2:])
self._debug("sampled {:,} features from a population of {:,}".format(len(idxs), totalFeatures))
self._debug("clustering with k = {:,}".format(numClusters))
clt = MiniBatchKMeans(n_clusters = numClusters, random_state = randomState)
clt.fit(data)
self._debug("cluster shape:{}".format(clt.cluster_centers_.shape))
db.close()
return clt.cluster_centers_
def _debug(self, msg, msgType = "[INFO]"):
if self.verbose:
print("{} {} - {}".format(msgType, msg, datetime.datetime.now()))
6、可视化码本中的单词
注:在前面构建出关键点和局部不变特征描述符、建立集群码本后方可实现
作用:将码本中K-means分类后的直方图,转化为可视化图片
visuluze_centers.py
#coding=utf-8
from __future__ import print_function
from pyimagesearch.resultsmontage import ResultsMontage
from sklearn.metrics import pairwise
import numpy as np
import progressbar
import argparse
import pickle
import h5py
import cv2
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required = True, help = "Path to the directory of indexed images")
ap.add_argument("-f", "--features-db", required = True, help = "Path to the features database")#提取关键和局部不变描述符的数据记路经
ap.add_argument("-c", "--codebook", required = True, help = "Path to the codebook")#可视话词汇表中的视觉词汇路经
ap.add_argument("-o", "--output", required = True, help = "Path to output directory")
args = vars(ap.parse_args())
vocab = pickle.loads(open(args["codebook"], "rb").read())
featuresDB = h5py.File(args["features_db"], mode = "r")
print("[INFO] starting distance distance computations...")
vis = {i:[] for i in np.arange(0, len(vocab))}
widgets = ["Comparing:", progressbar.Percentage(), "", progressbar.Bar(), "", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval = featuresDB["image_ids"].shape[0], widgets = widgets).start()
for (i, imageID) in enumerate(featuresDB["image_ids"]):
(start, end) = featuresDB["index"][i]
rows = featuresDB["features"][start:end]
(kps, descs) = (rows[:, :2], rows[:, 2:])
for (kp, features) in zip(kps, descs):
features = features.reshape(1, - 1)
D = pairwise.euclidean_distances(features, Y = vocab)[0]
for j in np.arange(0, len(vocab)):
topResults = vis.get(j)
topResults.append((D[j], kp, imageID))
topResults = sorted(topResults, key = lambda r:r[0])[:16]
vis[j] = topResults
pbar.update(i)
pbar.finish()
featuresDB.close()
print("[INFO] writing visualizations to file...")
for (vwID, results) in vis.items():
montage = ResultsMontage((64, 64), 4, 16)
for (_, (x, y), imageID) in results:
p = "{}/{}".format(args["dataset"], imageID)
image = cv2.imread(p)
(h, w) = image.shape[:2]
(startX, endX) = (max(0, int(x) - 32), min(w, int(x) + 32))
(startY, endY) = (max(0, int(y) - 32), min(h, int(y) + 32))
roi = image[startY:endY, startX, endX]
montage.addResult(roi)
p = "{}/vis_{}.jpg".format(args["output"], vwID)
cv2.imwrite(p, cv2.cvtColor(montage.montage, cv2.COLOR_BGR2GRAY))
7、矢量量化
7.1、从多个特征到单个直方图
bagofvisualwords.py
from sklearn.metrics import pairwise
from scipy.sparse import csr_matrix
import numpy as np
class BagOfVisualWords:
def __init__(self, codebook, sparse = True):
self.codebook = codebook
self.sparse = sparse
def describe(self, features):
D = pairwise.euclidean_distances(features, Y = self.codebook)
(words, counts) = np.unique(np.argmin(D, axis = 1), return_counts = True)
if self.sparse:
hist = csr_matrix((counts, (np.zeros((len(words), )), words)), shape = (1, len(self.codebook)), dtype = "float")
else:
hist = np.zeros((len(self.codebook), ), dtype = "float")
hist[words] = counts
return hist
quantize_example.py
from __future__ import print_function
from pyimagesearch.ir.bagofvisualwords import BagOfVisualWords
from sklearn.metrics import pairwise
import numpy as np
np.random.seed(42)
vocab = np.random.uniform(size = (3, 6))
features = np.random.uniform(size = (10, 6))
print("[INFO] vocabulary:\n{}\n".format(vocab))
print("[INFO] features:\n{}\n".format(features))
hist = np.zeros((3,), dtype = "int32")
for (i, f) in enumerate(features):
D = pairwise.euclidean_distances(f.reshape(1, -1), Y = vocab)
j = np.argmin(D)
print("[INFO] Closest visual word to feature #{}:{}".format(i, j))
hist[j] += 1
print("[INFO] Updated histogram:{}".format(hist))
bovw = BagOfVisualWords(vocab, sparse = False)
hist = bovw.describe(features)
print("[INFO] BOVW histogram:{}".format(hist))
7.2、形成BOVW
运行命令:python extract_bovw.py--features-db output/features.hdf5--codebook output/vocab.cpickle --bovw-db output/bovw.hdf5--idf output/idf.cpickle
extract_bovw.py
from pyimagesearch.ir.bagofvisualwords import BagOfVisualWords
from pyimagesearch.indexer.bovwindexer import BOVWIndexer
import argparse
import pickle
import h5py
ap = argparse.ArgumentParser()
ap.add_argument("-f", "--features-db", required = True, help = "Path to the features database") #关键点和局部不变描述符课程中构建的HDF5数据集的路径。该数据库应包含与数据集中每个图像相关的图像ID,索引和原始关键点/特征向量
ap.add_argument("-c", "--codebook", required = True, help = "Path to the codebook")#我们需要可视化码本的路径
ap.add_argument("-b", "--bovw-db", required = True, help = "Path to where the bag-of-visual-words database will be stored")#将BOVW表示存储在一个单独的HDF5数据库中的路经
ap.add_argument("-d", "--idf", required = True, help = "Path to inverse document frequency counts will be stored")
ap.add_argument("-s", "--max-buffer-size", type = int, default = 500, help = "Maximum buffer size for # of features to be stored in memory")#在写入HDF5数据集之前在内存中管理一个原始特征向量的缓冲区,我们将在这里做同样的事情 - 这次我们将管理一个BOVW直方图缓冲区
args = vars(ap.parse_args())
vocab = pickle.loads(open(args["codebook"], "rb").read())
bovw = BagOfVisualWords(vocab)
featuresDB = h5py.File(args["features_db"], mode = "r")
bi = BOVWIndexer(bovw.codebook.shape[0], args["bovw_db"], estNumImages = featuresDB["image_ids"].shape[0], maxBufferSize = args["max_buffer_size"])
for (i, (imageID, offset)) in enumerate(zip(featuresDB["image_ids"], featuresDB["index"])):
if i > 0 and i%10 == 0:
bi._debug("processed {} images".format(i), msgType = "[PROGRESS]")
features = featuresDB["features"][offset[0]:offset[1]][:, 2:]
hist = bovw.describe(features)
bi.add(hist)
featuresDB.close()
bi.finish()
f = open(args["idf"], "wb")
f.write(pickle.dumps(bi.df(method = "idf")))
f.close()
bovwindexer.py
from .baseindexer import BaseIndexer
from scipy import sparse
import numpy as np
import h5py
class BOVWIndexer(BaseIndexer):
def __init__(self, fvectorSize, dbPath, estNumImages = 500, maxBufferSize = 500, dbResizeFactor = 2, verbose = True):
super(BOVWIndexer, self).__init__(dbPath, estNumImages = estNumImages, maxBufferSize = maxBufferSize, dbResizeFactor = dbResizeFactor, verbose = verbose)
self.db = h5py.File(self.dbPath, mode = "w")
self.bovwDB = None
self.bovwBuffer = None
self.idxs = {"bovw":0}
self.fvectorSize = fvectorSize
self._df = np.zeros((fvectorSize, ), dtype = "float")
self.totalImages = 0
def add(self, hist):
self.bovwBuffer = BaseIndexer.featureStack(hist, self.bovwBuffer, stackMethod = sparse.vstack)
self._df[np.where(hist.toarray()[0] > 0)] += 1
if self.bovwBuffer.shape[0] >= self.maxBufferSize:
if self.bovwDB is None:
self._debug("initial buffer full")
self._createDatasets()
self._writeBuffers()
def _writeBuffers(self):
if self.bovwBuffer is not None and self.bovwBuffer.shape[0] > 0:
self._writeBuffer(self.bovwDB, "bovw", self.bovwBuffer, "bovw", sparse = True)
self.idxs["bovw"] += self.bovwBuffer.shape[0]
self.bovwBuffer = None
def _createDatasets(self):
self._debug("creating datasets...")
self.bovwDB = self.db.create_dataset("bovw", (self.estNumImages, self.fvectorSize), maxshape = (None, self.fvectorSize), dtype = "float")
def finish(self):
if self.bovwDB is None:
self._debug("minimum init buffer not reached", msgType = "[WARN]")
self._createDatasets()
self._debug("writing un-empty buffers...")
self._writeBuffers()
self._debug("compacting datasets...")
self._resizeDataset(self.bovwDB, "bovw", finished = self.idxs["bovw"])
self.totalImage = self.bovwDB.shape[0]
self.db.close()
def df(self, method = None):
if method == "idf":
return np.log(self.totalImages/(1.0 + self._df))
return sel._df
8、反转索引和搜索
8.1、建立倒排索引
1、文件结构
|---pyimagesearch
||----__init__.py
||---db
|||----__init__.py
|||---redisqueue.py
||---descriptors
|||----__init__.py
|||---detectanddescribe.py
||---indexer
|||----__init__.py
|||----baseindexer.py
|||----bovwindexer.py
|||----featureindexer.py
||---ir
|||----__init__.py
|||----bagofvisualwords.py
|||----vocabulary.py
|---build_redis_index.py
|---cluster_features.py
|---extract_bvow.py
|---index_features.py
|---visualize_centers.py
运行命令:
redisqueue.py
import numpy as np
class RedisQueue:
def __init__(self, redisDB):
self.redisDB = redisDB
def add(self, imageIdx, hist):#imageIdx:image_ids中HDF5数据集中图像的索引。hist:从图像中提取的BOVW直方图
p = self.redisDB.pipeline()
for i in np.where(hist > 0)[0]:
p.rpush("vw:{}".format(i), imageIdx)
p.execute()
def finish(self):
self.redisDB.save()
build_redis_index.py
from __future__ import print_function
from pyimagesearch.db.redisqueue import RedisQueue
from redis import Redis
import h5py
import argparse
ap = argparse.ArgumentParser()
ap.add_argument("-b", "--bovw-db", required = True, help = "Path to where the bag-of-visual-words database")
args = vars(ap.parse_args())
redisDB = Redis(host = "localhost", port = 6379, db = 0)
rq = RedisQueue(redisDB)
bovwDB = h5py.File(args["bovw_db"], mode = "r")
for (i, hist) in enumerate(bovwDB["bovw"]):
if i > 0 and i%10 == 0:
print("[PROGRESS] processed {} entries".format(i))
rq.add(i, hist)
bovwDB.close()
rq.finish()
8.2 执行搜索
文件目录结构:
|---pyimagesearch
||----__init__.py
||---db
|||----__init__.py
|||---redisqueue.py
||---descriptors
|||----__init__.py
|||---detectanddescribe.py
||---indexer
|||----__init__.py
|||----baseindexer.py
|||----bovwindexer.py
|||----featureindexer.py
||---ir
|||----__init__.py
|||----bagofvisualwords.py
|||----vocabulary.py
|||----dists.py
|||----searcher.py
|||----searchresult.py
|---build_redis_index.py
|---cluster_features.py
|---extract_bvow.py
|---index_features.py
|---visualize_centers.py
|---search.py
运行命令:python search.py --dataset ../ukbench --features-db output/features.hdf5 --bovw-db output/bovw.hdf5 --codebook output/vocab.cpickle --relevant ../ukbench/relevant.json --query ../ukbench/ukbench00258.jpg
search.py
from __future__ import print_function
from pyimagesearch.descriptors.detectanddescribe import DetectAndDescribe
from pyimagesearch.ir.bagofvisualwords import BagOfVisualWords
from pyimagesearch.ir.searcher import Searcher
from pyimagesearch.ir.dists import chi2_distance
from pyimagesearch.resultsmontage import ResultsMontage
from scipy.spatial import distance
from redis import Redis
from imutils.feature import FeatureDetector_create, DescriptorExtractor_create
import argparse
import pickle
import imutils
import json
import cv2
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required = True, help = "Path to the directory of indexed images")
ap.add_argument("-f", "--features-db", required = True, help = "Path to the features database")
ap.add_argument("-b", "--bovw-db", required = True, help = "Path to the bag-of-visual-words database")
ap.add_argument("-c", "--codebook", required = True, help = "Path to relevant dictionary")
ap.add_argument("-i", "--idf", type = str, help = "Path to inverted document frequencies array")
ap.add_argument("-r", "--relevant", required = True, help = "Path to relevant dictionary")
ap.add_argument("-q", "--query", required = True, help = "Path to the query image")
args = vars(ap.parse_args())
detector = FeatureDetector_create("SURF")
descriptor = DescriptorExtractor_create("RootSIFT")
dad = DetectAndDescribe(detector, descriptor)
distanceMetric = chi2_distance
idf = None
if args["idf"] is not None:
idf = pickle.loads(open(args["idf"], "rb").read())
vocab = pickle.loads(open(args["codebook"], "rb").read())
bovw = BagOfVisualWords(vocab)
relevant = json.loads(open(args["relevant"]).read())
queryFilename = args["query"][args["query"].rfind("/") + 1:]
queryRelevant = relevant[queryFilename]
queryImage = cv2.imread(args["query"])
queryImage = imutils.resize(queryImage, width = 320)
queryImage = cv2.cvtColor(queryImage, cv2.COLOR_BGR2GRAY)
(_, descs) = dad.describe(queryImage)
hist = bovw.describe(descs).tocoo()
redisDB = Redis(host = "localhost", port = 6379, db = 0)
searcher = Searcher(redisDB, args["bovw_db"], args["features_db"], idf = idf, distanceMetric = distanceMetric)
sr = searcher.search(hist, numResults = 20)
print("[INFO] search took:{:.2f}s".format(sr.search_time))
montage = ResultsMontage((240, 320), 5, 20)
for (i, (score, resultID, resultIdx)) in enumerate(sr.results):
print("[RESULT] {result_num}.{result} -{score:.2f}".format(result_num = i + 1, result = resultID, score = score))
result = cv2.imread("{}/{}".format(args["dataset"], resultID))
montage.addResult(result, text = "#{}".format(i + 1), highlight = resultID in queryRelevant)
cv2.imshow("Result", imutils.resize(montage.montage, height = 700))
cv2.waitKey(0)
searcher.finish()
searcher.py
from .searchresult import SearchResult
from .dists import chi2_distance
import numpy as np
import datetime
import h5py
class Searcher:
def __init__(self, redisDB, bovwDBPath, featuresDBPath, idf = None, distanceMetric = chi2_distance):
self.redisDB = redisDB
self.idf = idf
self.distanceMetric = distanceMetric
self.bovwDB = h5py.File(bovwDBPath, mode = "r")
self.featuresDB = h5py.File(featuresDBPath, "r")
def search(self, queryHist, numResults = 10, maxCandidates = 200):
startTime = datetime.datetime.now()
candidateIdxs = self.buildCandidates(queryHist, maxCandidates)
candidateIdxs.sort()
hists = self.bovwDB["bovw"][candidateIdxs]
queryHist = queryHist.toarray()
results = {}
if self.idf is not None:
queryHist *= self.idf
for (candidate, hist) in zip(candidateIdxs, hists):
if self.idf is not None:
hist *=self.idf
d = self.distanceMetric(hist, queryHist)
results[candidate] = d
results = sorted([(v, self.featuresDB["image_ids"][k], k)
for (k, v) in results.items()])
results = results = results[:numResults]
return SearchResult(results, (datetime.datetime.now() - startTime).total_seconds())
def buildCandidates(self, hist, maxCandidates):
p = self.redisDB.pipeline()
for i in hist.col:
p.lrange("vw:{}".format(i), 0, -1)
pipelineResults = p.execute()
candidates = []
for results in pipelineResults:
results = [int(r) for r in results]
candidates.extend(results)
(imageIdxs, counts) = np.unique(candidates, return_counts = True)
imageIdxs = [i for (c, i) in sorted(zip(counts, imageIdxs), reverse = True)]
return imageIdxs[:maxCandidates]
def finish(self):
self.bovwDB.close()
self.featuresDB.close()
dists.py
import numpy as np
def chi2_distance(histA, histB, eps = 1e-10):
d = 0.5 * np.sum(((histA - histB)**2)/(histA + histB + eps))
return d
searchersult.py
from collections import namedtuple
SearchResult = namedtuple("SearchResult", ["results", "search_time"])
9、评估
evaluate.py
from __future__ import print_function
from pyimagesearch.descriptors.detectanddescribe import DetectAndDescribe
from pyimagesearch.ir.bagofvisualwords import BagOfVisualWords
from pyimagesearch.ir.searcher import Searcher
from pyimagesearch.ir.dists import chi2_distance
from scipy.spatial import distance
from redis import Redis
from imutils.feature import FeatureDetector_create, DescriptorExtractor_create
import numpy as np
import progressbar
import argparse
import pickle
import imutils
import json
import cv2
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required = True, help = "Path to the directory of indexed images")
ap.add_argument("-f", "--features-db", required = True, help = "Path to the features database")
ap.add_argument("-b", "--bovw-db", required=True, help = "Path to the bag-of-visual-words database")
ap.add_argument("-c", "--codebook", required = True, help = "Path to the codebook")
ap.add_argument("-i", "--idf", type = str, help = "Path to inverted document frequencies array")
ap.add_argument("-r", "--relevant", required = True, help = "Path to relevant dictionary")
args = vars(ap.parse_args())
detector = FeatureDetector_create("SURF")
descriptor = DescriptorExtractor_create("RootSIFT")
dad = DetectAndDescribe(detector, descriptor)
distanceMetric = chi2_distance
idf = None
if args["idf"] is not None:
idf = pickle.loads(open(args["idf"], "rb").read())
distanceMetric = distance.cosine
vocab = pickle.loads(open(args["codebook"], "rb").read())
bovw = BagOfVisualWords(vocab)
redisDB = Redis(host = "localhost", port = 6379, db = 0)
searcher = Searcher(redisDB, args["bovw_db"], args["features_db"], idf = idf, distanceMetric = distanceMetric)
relevant = json.loads(open(args["relevant"]).read())
queryIDs = relevant.keys()
accuracies = []
timings = []
widgets = ["Evaluating:", progressbar.Percentage(), "", progressbar.Bar(), "", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval = len(queryIDs), widgets = widgets).start()
for (i, queryID) in enumerate(sorted(queryIDs)):
queryRelevant = relevant[queryID]
p = "{}/{}".format(args["dataset"], queryID)
queryImage = cv2.imread(p)
quertImage = imutils.resize(queryImage, width = 320)
queryImage = cv2.cvtColor(queryImage, cv2.COLOR_BGR2GRAY)
(_, descs) = dad.describe(queryImage)
hist = bovw.describe(descs).tocoo()
sr = searcher.search(hist, numResults = 4)
results = set([r[1] for r in sr.results])
inter = results.intersection(queryRelevant)
accuracies.append(len(inter))
timings.append(sr.search_time)
pbar.update(i)
searcher.finish()
pbar.finish()
accuracies = np.array(accuracies)
timings = np.array(timings)
print("[INFO] ACCURACY:u = {:.2f}, o = {:.2f}".format(accuracies.mean(), accuracies.std()))
print("[INFO] TIMINGS:u = {:.2f}, o = {:.2f}".format(timings.mean(), timings.std()))