媒体计算上机之BOF+K-means
BOF+kmeans
实现BOF+kmeans的以图搜图
main:主函数,图片由此输入
SIFT_feature:提取图库的总特征,并比较得出相似图片
get_sifts
get_countVectors
cos_simp
图库
image
搜图
search_image
%main.m
addpath(genpath('D:\Documents\MATLAB\siftDemoV4\image'))
addpath(genpath('D:\Documents\MATLAB\siftDemoV4\search_image'))
[image, X, locs] = sift('1.jpg'); %获得指定图片的sift特征
bofX = zeros(1,K);
for i = 1:size(X,1)
[ans0, t0] = cos_simp(X(i,:),Ctrs);
bofX(1,t0) = bofX(1,t0)+1;
end
%比对所有图片的特征相似度
[ans1,t1] = cos_simp(bofX,BOF);
imshow(char(img_paths1(t1)))
%SIFT_feature.m
addpath(genpath('D:\Documents\MATLAB\siftDemoV4\image'))
K = 10; %聚类的个数
[img_paths1,Feats, sum_sift] = get_sifts('./image_path.txt');%获取所有图片的特征
[Id,Ctrs,SumD,D] = kmeans(Feats,K,'Replicates',K,'Options',opts);%取得所有图片SIFT的聚点,Ctrs是所有聚类的特征,Id是每个点对应的聚类编号
%获取所有图片的特征向量
[img_paths2,BOF] = get_countVectors('./image_path.txt',Ctrs,K,Id,sum_sift);
%指定图片
%[image, X, locs] = sift('dangongqiao1.jpg'); %获得指定图片的sift特征
%%获取指定图片的特征向量
% bofX = zeros(1,K);
% for i = 1:size(X,1)
% [ans0, t0] = cos_simp(X(i,:),Ctrs);
% bofX(1,t0) = bofX(1,t0)+1;
% end
% %比对所有图片的特征相似度
% [ans1,t1] = cos_simp(bofX,BOF);
% imshow(char(img_paths1(t1)))
% %欧氏距离
% ans1 = 10000000;
% index1 = 1;
% T = [];
% hist(bofX)
% for m = 2:size(BOF,1)
% t1 = 0;
% for h = 1:size(BOF,2)
% t1 = t1 + (BOF(m,h)-bofX(1,h))^2;
% end
% T = [T,t1];
% if ans1 > t1
% ans1 = t1;
% index1 = m;
% end
% end
%get_sifts.m
function [ img_paths,Feats, sum_sift] = get_sifts( FullFilePaths )
% GET_SIFTS 用于提取图片库中所有图片的SIFT特征
% INPUT
% FullFilePaths ---记录所有图片路径的文件
% OUTPUT
% img_paths ---记录所有图片路径的结构体
% Feats ---所有图片的SIFT特征
img_paths = [];
img_paths = textread(FullFilePaths,'%s');
Feats = [];
sum_sift = [];
for N = 1:size(img_paths,1)
str = char(img_paths(N));
image = regexp(str, '\\', 'split');
len = length(image);
name = char(image(len));
[image, Feat, locs] = sift(name);
sum_sift(N) = size(Feat, 1);
Feats = [Feats;Feat];
% %[~,descr,~,~ ] = do_sift( img_paths{N}, 'Verbosity', 1, 'NumOctaves', 4, 'Threshold', 0.1/3/2 ) ; %0.04/3/2
% %descr = descr';
% %feat_count = size(descr,1);
% %descr = [descr,ones(feat_count,1)*N];
% Feats=[Feats;descr];
end
end
%getcountVector
function [ img_paths,BOF] = get_countVectors( FullFilePaths,Ctrs,K,Id,sum_sift)
% GET_SIFTS 用于提取图片库中所有图片的SIFT特征
% written by guochuan
% INPUT
% FullFilePaths ---记录所有图片路径的文件
% OUTPUT
% img_paths ---记录所有图片路径的结构体
% Feats ---所有图片的SIFT特征
img_paths = [];
img_paths = textread(FullFilePaths,'%s');
Feats = [];
BOF = zeros(size(img_paths,1)+1,K);
opts = statset('Display','final');
ind = 0;
for N = 1:size(img_paths,1)%处理当前图片
for i = 1:sum_sift(N)%处理该图片的一个SIFT点
ind = ind+1;
BOF(N,Id(ind)) = BOF(N,Id(ind))+1;
% str = char(img_paths(N));
% image = regexp(str, '\\', 'split');
% len = length(image);
% name = char(image(len));
%[image, feat, locs] = sift(name);
%[Idx,CtrsY,SumD,D] = kmeans(feat,K,'Replicates',K,'Options',opts);
% %[~,descr,~,~ ] = do_sift( img_paths{N}, 'Verbosity', 1, 'NumOctaves', 4, 'Threshold', 0.1/3/2 ) ; %0.04/3/2
% %descr = descr';
% %feat_count = size(descr,1);
% %descr = [descr,ones(feat_count,1)*N];
% Feats=[Feats;descr];
end
end
#cos_simp.m
function [ans, index] = cos_simp(X,Y)%余弦相似度
%输入一个向量X(1*K),一个矩阵(n*K),
%输出最相似的向量的行和相似度
ans = 0;
index = 1;
for i = 1:size(Y,1)
t = (X*Y(i,:)')/(norm(X)*norm(Y(i,:)));
if ans < t
ans = t;
index = i;
end
end
效果: