close all;
clear all;
tic;
fprintf('-->> Set parameters... ');
% Set properties
MinQSize=100;
Desc=num2str(MinQSize);
Proportion=0.5;
rng('default');
% Set paths and file names
Root='VisualSearch';
TrainCollec='msr2013devtrain';
TestCollec='msr2013devval';
TrainDir=fullfile(Root,TrainCollec);
TestDir=fullfile(Root,TestCollec);
% Set 'Annotations' directories
TrainAnnoDir=fullfile(TrainDir,'Annotations');
TestAnnoDir=fullfile(TestDir,'Annotations');
TrainAnnoName=strcat(TrainCollec,Desc);
TestAnnoName=strcat(TestCollec,Desc);
TrainConceptName=strcat('concepts',TrainAnnoName,'.txt');
TestConceptName=strcat('concepts',TestAnnoName,'.txt');
TrainConceptFile=fullfile(TrainAnnoDir,TrainConceptName);
TestConceptFile=fullfile(TestAnnoDir,TestConceptName);
TrainConceptDir=fullfile(TrainAnnoDir,'Image',TrainConceptName);
TestConceptDir=fullfile(TestAnnoDir,'Image',TestConceptName);
% Set 'ImageSets' directory
TestImageSetDir=fullfile(TestDir,'ImageSets');
TestImageSetName=strcat(TestCollec,Desc,'.txt');
TestImageSetFile=fullfile(TestImageSetDir,TestImageSetName);
fprintf('Done!\n ');
toc;
% Import raw data
tic;
fprintf('-->> Import raw data... ');
Filename='DevSetLabel.tsv';
[Query,Image,Rate]=importfile(Filename);
clear Filename
fprintf('Done!\n ');
toc;
% Select queries with number of samples >= 'MinQSize'
tic;
fprintf('-->> Select queries with number of samples >= ''MinQSize''... ');
[QIndex,~,QLevels]=grp2idx(Query);
NQueries=length(QLevels);
QSize=hist(QIndex,1:NQueries);
SelecQIndex=find(ismember(QIndex,find(QSize>=MinQSize)));
for i=1:NQueries
% Rename <Query> as <Concept#> to avoid weird file names
QLevels{i}=strcat('Concept',num2str(i));
end
Query=QLevels(QIndex);
clear QIndex QLevels NQueries QSize i
Query=Query(SelecQIndex);
Image=Image(SelecQIndex);
Rate=Rate(SelecQIndex);
clear SelecQIndex
for i=1:length(Image)
% Clear 'img' prefix
Image{i}(1:3)=[];
end
clear i
fprintf('Done!\n ');
toc;
% Obtain annotations
tic;
fprintf('-->> Obtain annotations... ');
Anno=strcmp('Excellent',Rate)+strcmp('Good',Rate)...
-strcmp('Bad',Rate);
clear Rate
fprintf('Done!\n ');
toc;
% Obtain 'Annotations'
tic;
fprintf('-->> Obtain ''Annotations''...\n');
if exist(TrainConceptFile,'file')
delete(TrainConceptFile);
end
if exist(TestConceptFile,'file')
delete(TestConceptFile);
end
if exist(TrainConceptDir,'dir')
rmdir(TrainConceptDir,'s');
end
if exist(TestConceptDir,'dir')
rmdir(TestConceptDir,'s');
end
[QIndex,QNames,QLevels]=grp2idx(Query);
NQueries=length(QLevels);
for i=1:NQueries
iQuery=QNames{i};
iImage=Image(QIndex==i);
iAnno=Anno(QIndex==i);
% Write 'concepts(...).txt' file
fprintf('---> Write ''');
fprintf(iQuery);
fprintf(''' to ''');
fprintf(TrainConceptName);
fprintf('''... ');
if ~exist(TrainAnnoDir,'dir')
mkdir(TrainAnnoDir);
end
FID=fopen(TrainConceptFile,'a');
fprintf(FID,'%s\n',iQuery);
fclose(FID);
fprintf('Done!\n');
fprintf('---> Write ''');
fprintf(iQuery);
fprintf(''' to ''');
fprintf(TestConceptName);
fprintf('''... ');
if ~exist(TestAnnoDir,'dir')
mkdir(TestAnnoDir);
end
FID=fopen(TestConceptFile,'a');
fprintf(FID,'%s\n',iQuery);
fclose(FID);
fprintf('Done!\n');
% Divide positve samples
PosiIndex=find(iAnno==1);
NPosi=length(PosiIndex);
TrainPosiIndex=PosiIndex(randsample(NPosi,round(NPosi*Proportion)));
TestPosiIndex=setdiff(PosiIndex,TrainPosiIndex);
% Write positive samples to '(Concept#).txt' file
fprintf('---> Write positive samples to ''');
fprintf(strcat(iQuery,'.txt'));
fprintf('''... ');
if ~exist(TrainConceptDir,'dir')
mkdir(TrainConceptDir);
end
FID=fopen(fullfile(TrainConceptDir,strcat(iQuery,'.txt')),'a');
Temp=[iImage(TrainPosiIndex),num2cell(iAnno(TrainPosiIndex))]';
fprintf(FID,'%s %d\n',Temp{:});
fclose(FID);
fprintf('Done!\n');
fprintf('---> Write positive samples to ''');
fprintf(strcat(iQuery,'.txt'));
fprintf('''... ');
if ~exist(TestConceptDir,'dir')
mkdir(TestConceptDir);
end
FID=fopen(fullfile(TestConceptDir,strcat(iQuery,'.txt')),'a');
Temp=[iImage(TestPosiIndex),num2cell(iAnno(TestPosiIndex))]';
fprintf(FID,'%s %d\n',Temp{:});
fclose(FID);
fprintf('Done!\n');
% Divide negative samples
NegaIndex=find(iAnno==-1);
NNega=length(NegaIndex);
TrainNegaIndex=NegaIndex(randsample(NNega,round(NNega*Proportion)));
TestNegaIndex=setdiff(NegaIndex,TrainNegaIndex);
% Write negative samples to '(Concept#).txt' file
fprintf('---> Write negative samples to ''');
fprintf(strcat(iQuery,'.txt'));
fprintf('''... ');
if ~exist(TrainConceptDir,'dir')
mkdir(TrainConceptDir);
end
FID=fopen(fullfile(TrainConceptDir,strcat(iQuery,'.txt')),'a');
Temp=[iImage(TrainNegaIndex),num2cell(iAnno(TrainNegaIndex))]';
fprintf(FID,'%s %d\n',Temp{:});
fclose(FID);
fprintf('Done!\n');
fprintf('---> Write negative samples to ''');
fprintf(strcat(iQuery,'.txt'));
fprintf('''... ');
if ~exist(TestConceptDir,'dir')
mkdir(TestConceptDir);
end
FID=fopen(fullfile(TestConceptDir,strcat(iQuery,'.txt')),'a');
Temp=[iImage(TestNegaIndex),num2cell(iAnno(TestNegaIndex))]';
fprintf(FID,'%s %d\n',Temp{:});
fclose(FID);
fprintf('Done!\n');
% Write test image label log
FID=fopen('TestImage.tmp','a');
fprintf(FID,'%s\n',iImage{[TestPosiIndex;TestNegaIndex]});
fclose(FID);
end
fprintf(' ');
toc;
% Obtain 'ImageSets'
tic;
fprintf('-->> Obtain ''ImageSets''...\n');
TestImage=sort(unique(importtestimage('TestImage.tmp')));
delete('TestImage.tmp');
if exist(TestImageSetFile,'file')
delete(TestImageSetFile);
end
fprintf('---> Write image labels to ''');
fprintf(TestImageSetName);
fprintf('''... ');
if ~exist(TestImageSetDir,'dir')
mkdir(TestImageSetDir);
end
FID=fopen(TestImageSetFile,'a');
fprintf(FID,'%d\n',TestImage);
fclose(FID);
fprintf('Done!\n');
fprintf(' ');
toc;
% Mission accomplished
fprintf('-->> Mission accomplished!\n');
beep();