function p = simvsgdtruth(sim, gdtruth)
% p = simvsgdtruth(sim, gdtruth)
%   Compare a similarity metric against (survey) ground truth
%   sim is an NxN matrix of artist similarities
%   gdtruth is ground truth from the musicseer data of the form
%       trial target chosen notchosen
%   where a single trial consists of several lines differing only 
%   in the <notchosen> field.
%   Return the proportion of trials for which the sim predicted the 
%   target as chosen by the user (first place agreement) and the 
%   average ranking of the users's choice under the sim (average ranking).
%   How to make gdtruth:
%     >> [name, sqlid] = textread('aset400.3-canon-musicseer.ids','%s %d');
%     >> sql2topset = zeros(1,7000);
%     >> sql2topset(sqlid+2) = 1:400;  % +2 because some sqlids are -1
%     >> [tr, sg, uid, trg, cho, nch] = textread('filtered-game.txt','%d %c %s %d %d %d');
%     >> gdtrG = [tr,sql2topset(trg+2)',sql2topset(cho+2)',sql2topset(nch+2)'];
%   i.e. artist IDs of 0 are ignored.
%
% 2003-08-12 dpwe@ee.columbia.edu

% How many actual trials are there?
%trials = [sort(gdtruth(:,1)'),-1];
%trials = trials(trials(1:end-1) ~= trials(2:end));
% ASSUME GDTRUTH SORTED BY TRIALS
trialstart = find([-1, gdtruth(1:end-1,1)'] ~= gdtruth(1:end,1)');
trials = gdtruth(trialstart, 1);
% Dummy at the end
trialstart = [trialstart, size(gdtruth,1)+1];

disp([num2str(length(trials)), ' trials, ', num2str(size(gdtruth,1)),' triplets']);

ngood = 0;
ngtri = 0;
avrank = 0;
emptynch = 0;

% OK, consider each trial
for tri = 1:length(trials)
%  trial = trials(tri);
%  data = gdtruth(gdtruth(:,1)==trial,:);
  data = gdtruth(trialstart(tri):(trialstart(tri+1)-1),:);
  target = data(1,2);
  chosen = data(1,3);
  nchosen = data(:,4);
  % Sanity check
%  if (sum(data(:,2)==target) + sum(data(:,3)==chosen)) ~= 2*length(nchosen)
%    error('badness');
%  end
  
  % skip this trial if target or chosen outside topset, or if 
  % no topset in nchosen
  if target > 0 & chosen > 0 
    % & sum(nchosen > 0) > 0

    ngtri = ngtri + 1;

    % Remove unknown nchosens
    nchosen = nchosen(nchosen > 0);

    if length(nchosen) == 0 
      % Special case if there are no unchosen alternates
      ngood = ngood + 1;
      avrank = avrank + 5.5;
      emptynch = emptynch + 1;
    else       
      % Prepare to Calculate rank agreement while we're here?
      list = [chosen, nchosen'];

      % Examine the SIM: is the chosen the most similar?
      maxnch = max(sim(target, nchosen));
      if sim(target, chosen) > maxnch
	ngood = ngood + 1;
      elseif sim(target, chosen) == maxnch
	% Partial credit for a tie
	ngood = ngood + 1/(1+sum(maxnch == sim(target, nchosen)));
      end

      % Randomize list so that ties average out (slow)
      list = list(randperm(length(list)));
      % Calculate the sims
      sims = sim(target, list);
      % Rank the sims
      [dummy, simorder] = sort(-sims);  % so sim in descending order...
      rank = find(list(simorder) == chosen);
      avrank = avrank + (1 + (rank-1)/(length(list)-1)*9);
    end
    
  end
end

p = [ngood/ngtri, avrank/ngtri];

disp([num2str(ngtri),' valid trials (', num2str(emptynch), ' with empty notchosen), ', sprintf('%.2f',100*p(1)),'% first place agreement, avrank=', num2str(p(2))]);
