%
% t-CWT Copyright (C) 2003-2015 Vladimir Bostanov. Type "tcwt_help".
%

% This file is part of t-CWT.
%
% t-CWT is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published
% by the Free Software Foundation, either version 3 of the License,
% or (at your option) any later version.
%
% t-CWT is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty
% of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
% See the GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with t-CWT; see the file COPYING or the file tcwt_gpl.m
% If not, see <http://www.gnu.org/licenses/>.

%: tcwt_f2pc(DSNL,Npc,Nstd,MinGood) Principal Component Transform (PCT),
%: Principal Component Analysis (PCA), and Multivariate Outlier Detection.
%:
%: The function iterates over DSNL reading and processing data from
%: the INPUT FILES and saving results to the corresponding OUTPUT FILES.
%:
%: FUNCTION ARGUMENTS
%:
%:   DSNL     DataSet Name List (DSN List) <tcwt_help dir2dsnl>
%:
%:   Npc      VALUE      defines   PCA CRITERION <tcwt_help math>
%:
%:             Npc > 1              the first Npc eigenvalues will be retained
%:             Npc = 1              all eigenvalues will be retained
%:             0 < Npc < 1          percentage of variance to be explained
%:             Npc = 0              average eigenvalue criterion
%:
%:   Nstd > 2 Number of standard deviations for the single-trial outlier
%:            detection criterion: D > Mean(D) + Nstd * StdDev(D), where
%:            D = single-trial Mahalanobis Distance from mean <tcwt_help math>
%:
%:   MinGood  Defines the dataset outlier detection criterion <tcwt_help math>:
%:            number_of_good_trials < MinGood * number_of_trials_in_dataset
%:
%: INPUT FILES      INPUT VARIABLES <tcwt_help t2f>
%:
%:   ./DSN.f.mat     Verp,CIerp
%:   ./DSN.ri0.mat   RIerp, DSNLpool <tcwt_help f2pool>
%:
%: OUTPUT FILES     OUTPUT VARIABLES
%:
%:   ./DSN.pc.mat    pcTerp,pcEV
%:   ./DSN.ri1.mat   RIerp, DSNLpool
%:   ./DSN.ri1.txt   Text output
%:
%: OUTPUT VARIABLES
%:
%:   pcTerp   Reduced PCT matrix. <tcwt_help math>. The principal components
%:            pcVerp of the ERP sample Verp are: pcVerp = Verp * pcTerp
%:
%:   pcEV     Reduced PCT eigenvalues <tcwt_help math>
%:
%:   RIerp    ERP Row Index; 1st row contains outlier marks <tcwt_help t2f>

%       10        20        30        40        50        60        70        80


function tcwt_f2pc(dsnl,Npc,Nstd,MinGood);

rin1='0';
rin2='1';

if (~isscalar(Nstd) || ~isnumeric(Nstd) || ~isreal(Nstd) || Nstd<2 )
 error('Invalid single-trial outlier detection criterion <tcwt_help f2pc>.');
end

if (~isscalar(MinGood) || ~isnumeric(MinGood) || ~isreal(MinGood) || MinGood<0 || MinGood>1 )
 error('Invalid dataset outlier detection criterion <tcwt_help f2pc>.');
end

tI = clock();

spaceS = repmat(' ',1,size(dsnl,2));
minusS = repmat('-',1,size(dsnl,2));
equalS = repmat('=',1,size(dsnl,2));
ulineS = repmat('_',1,size(dsnl,2));

if (Npc==round(Npc)) NpcStr = sprintf('%u',Npc); else NpcStr = sprintf('%f',Npc); end

O=tcwt_txt('\n');
O=[O,tcwt_txt('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n')];
O=[O,tcwt_txt('tcwt_f2pc: Principal Component Transform (PCT) & Multivariate Outlier Detection\n')];
O=[O,tcwt_txt('-------------------------------------------------------------------------------\n')];
O=[O,tcwt_txt('PCA criterion = %s, Nstd = %f, MinGood = %f \n', NpcStr,Nstd,MinGood)];
O=[O,tcwt_txt('D = Single Trial Mahalanobis Distance from Mean\n')];
O=[O,tcwt_txt('Single-trial outlier criterion: D > Mean(D) + Nstd * StdDev(D)\n')];
O=[O,tcwt_txt('Dataset outlier criterion: Good Trials < MinGood*100%% of All Trials\n')];
O=[O,tcwt_txt('-------------------------------------------------------------------------------\n')];

Npc0=1;

for n=1:size(dsnl,1)
 tIn = clock();
 dsn=dsnl(n,:);
 load([dsn '.f.mat']);
 load([dsn '.ri' rin1 '.mat']);

 On=O;
 On=[On,tcwt_txt('%s______________________________________________\n', ulineS)];
 On=[On,tcwt_txt('%s            SINGLE TRIALS        DATASETS\n', spaceS)];
 On=[On,tcwt_txt('%s Itr  PC   Left   Outliers     Left  Outliers\n', spaceS)];
 On=[On,tcwt_txt('%s----------------------------------------------\n', minusS)];
 %                   123 123  12345 12345 (12.1%)   123 123 (12.1%)
 formatStr =     '%s %3u %3u  %5u %5u (%4.1f%%)  %3u %3u (%4.1f%%)\n';

 pSout = logical(RIerp(:,1)); % Previuosly found Outliers
 nSout = pSout;
 Ntr=length(nSout);
 nNout=sum(nSout);

 Sout=~nSout;

 if (Npc0==1)
  pcTerp0 = 1;
 else
  pcTerp0 = tcwt_pct(cov( Verp(~pSout,:) ),Npc0);
 end
 Verp = Verp*pcTerp0;

 nNpc=size(Verp,2);
 Nds=NaN; Ndsout=NaN;
 N=0;
 while ( ~all(Sout==nSout) )
  N=N+1;
  Sout=nSout; Nout=nNout;
  [pcTerp,pcEV]=tcwt_pct(cov( Verp(~Sout,:) ),Npc);
  pcVerp=Verp*pcTerp;
  if ( nNpc==length(pcEV) )
   Npc=nNpc;
  else
   nNpc=length(pcEV);
  end
  pcVerp = pcVerp - repmat( mean(pcVerp(~Sout,:)), size(pcVerp,1), 1 );
  D = sqrt( pcVerp.^2 * (1./pcEV)' );
  nSout = D > mean(D(~Sout)) + Nstd*std(D(~Sout)); % Outlier Criterion
  nSout = nSout | pSout;
  nNout=sum(nSout);
  if ( nNout<=Nout && size(RIerp,2)>2 ) % Exclude whole individual datasets
   Nds=0; Ndsout=0;
   for nCnd=unique(RIerp(:,2))'
    Scnd = RIerp(:,2)==nCnd;
    for nSbj=unique(RIerp(Scnd,3))'
     Nds=Nds+1;
     Ssbj = RIerp(:,3)==nSbj;
     if ( sum(~nSout & Scnd & Ssbj)/sum(Scnd & Ssbj) < MinGood )
      Ndsout=Ndsout+1;
      nSout = nSout | ( Scnd & Ssbj ); % Exclude individual dataset
     end
    end
   end
   nNout=sum(nSout);
  end
  if ( nNout<=Nout )      % Secure convergence by
   nSout = nSout | Sout; % retaining outliers detected at previous iteration
   nNout=sum(nSout);
  end
  nNoutPc=100*nNout/Ntr; NdsoutPc=100*Ndsout/Nds;
  On=[On,tcwt_txt(formatStr,dsn,N,length(pcEV),Ntr-nNout,nNout,nNoutPc,Nds-Ndsout,Ndsout,NdsoutPc)];
 end
 Nout = sum(nSout);
 RIerp(:,1) = nSout+0;
 pcTerp = pcTerp0*pcTerp;
 On=[On,tcwt_txt('%s==============================================\n', equalS)];
 On=[On,tcwt_txt('tcwt_f2pc: %s: Elapsed time  =  %f s\n', dsn, etime(clock(),tIn) )];

 save([dsn '.pc.mat'],'pcTerp','pcEV');
 save([dsn '.ri' rin2 '.mat'],'RIerp','DSNLpool');
 tcwt_txtsave([dsn '.ri' rin2 '.txt'],On);
end
tE = etime(clock(),tI);
O=[O,tcwt_txt('-------------------------------------------------------------------------------\n')];
O=[O,tcwt_txt('tcwt_f2pc: Datasets processed: %u\n',n)];
O=[O,tcwt_txt('ELAPSED TIME: %s\n', tcwt_hms(tE))];
O=[O,tcwt_txt('  Total: %f seconds\n', tE )];
O=[O,tcwt_txt('    per dataset: %f seconds\n', tE/n)];
O=[O,tcwt_txt('_______________________________________________________________________________\n')];


