function [strout, rgrsout, statistics] = straux1(L, d, nu, L0, d0, nu0, belief, nbest, max_nrep, lambda, order_k); % structure estimation based on LD decomposition % % This m/mex file is internally called by facstr, IT IS NOT TO BE CALLED % BY USER!! Documentation guiven for reference. % % % [strout, rgrsout, statistics] = straux1(L, d, nu, L0, d0, nu0, belief, nbest, max_nrep, lambda, order_k); % % L : Actual LD decomposition based on data % d : Actual LD decomposition based on data % nu : Actual data amount % L0 : prior information % d0 : prior information % nu0 : prior data amount % belief: user's belief on maximum structure items % (1 items must be present, 2 items are probably present % 4 items must not be present, 3 items are probably not present) % 2 and 3 is the same % nbest : how many "best" regressors are maintained % strout : structure estimated (of the regressor, richest is 2:length(d) % max_nrep : maximal number of random starts in search for the best % structure % lambda : stooping rule threshold % order_k : order of k % % Design : L. Tesar % Updated : Feb-Apr 2003 % Project : post-ProDaCTool % References: (only local inline functions) % % Todo: in add_new, we need to implement structure comparison, instead of % loglikelihood comparison: ~any(logliks == new.loglik) % randun seed stuff: %global SEED %SEED = randn('seed'); % Argument's checking: if nargin<8; if nargout>=2; nbest = 2; else % If we don't need the second parameter it is better to avoid % calculating it at all, because it is very costly (5x slowdown). nbest = 1; end; end; if nargin< 6, error('Incorrect number of input parameters in straux1'); end; if nargin< 7, belief = []; end; % Don't belive anybody. if nargin< 9, max_nrep = 3; end; if nargin<10, lambda = 0.75; end; if nargin<11, order_k = 2; end; % Arguments were just checked. n_data = length(d); belief_out = find(belief==4)+1; % we are avoiding to put this into regressor belief_in = find(belief==1)+1; % we are instantly keeping this in regressor full.d0 = d0; full.nu0 = nu0; full.L0 = L0; full.L = L; full.d = d; full.nu = nu; full.strL = 1:n_data; % Current structure of L and d full.strRgr = 2:n_data; % Structure elements currently inside regressor (after regressand) full.strMis = []; % structure elements, that are currently outside regressor (before regressand) full.posit1 = 1; % regressand position full.nbits = floor(log2(bitmax))-1; % number of bits available in double full.bitstr = str_bitset(zeros(1,floor(n_data/full.nbits)+1),full.strRgr,full.nbits); full.loglik = seloglik1(full); % loglikelihood % construct full and empty structure full = sestrremove(full,belief_out); empty = sestrremove(full,setdiff(full.strRgr,belief_in)); % stopping rule calculation: local_max = []; muto = 0; % statistics: cputime0 = cputime; if nargout>=3; mutos = zeros(1,max_nrep+2); maxmutos = zeros(1,max_nrep+2); end; % ---------------------- % For stopping-rule calculation %so = 2^(n_data -1-length(belief_in)- length(belief_out)); % do we use this ? % ---------------------- all_str = 1:n_data; global_best = full; % MAIN LOOP is here. for n_start = -1:max_nrep; to = n_start+2; if n_start == -1; % start from the full structure last = full; elseif n_start == 0; % start from the empty structure last = empty; else % start from random structure last_str = find([ 0 floor(2*randun(1,n_data-1))]); % this creates random vector consisting of indexes, and sorted last = sestrremove(full,setdiff(all_str,[1 last_str empty.strRgr])); end; % DEBUGging print: %fprintf('STRUCTURE generated in loop %2i was %s\n', n_start, strPrintstr(last)); % The loop is repeated until likelihood stops growing (break condition % used at the end; while 1; % This structure is going to hold the best elements best = last; % Nesting by removing elements (enpoorment) for removed_item = setdiff(last.strRgr,belief_in); new = sestrremove(last,removed_item); if nbest>1; global_best = add_new(global_best,new,nbest); end; if new.loglik>best.loglik; best = new; end; end; % Nesting by adding elements (enrichment) for added_item = setdiff(last.strMis,belief_out); new = sestrinsert(last,added_item); if nbest>1; global_best = add_new(global_best,new,nbest); end; if new.loglik>best.loglik; best = new; end; end; % Break condition if likelihood does not change. if best.loglik <= last.loglik; break; else % Making best structure last structure. last = best; end; end; % DEBUGging print: %fprintf('STRUCTURE found (local maxima) in loop %2i was %s randun_seed=%11lu randun_counter=%4lu\n', n_start, strPrintstr(best), randn('seed'), RANDUN_COUNTER); % Collecting of the best structure in case we don't need the second parameter if nbest<=1; if best.loglik>global_best.loglik; global_best = best; end; end; % uniqueness of the structure found if ~ismember(best.bitstr,local_max,'rows'); local_max = [local_max ; best.bitstr]; muto = muto + 1; end; % stopping rule: maxmuto = (to-order_k-1)/lambda-to+1; if to>2; if maxmuto>=muto; % fprintf('*'); break; end; end; % do statistics if necessary: if nargout>=3; mutos(to) = muto; maxmutos(to) = maxmuto; end; end; % Aftermath: The best structure was in: global_best % Updating loglikelihoods: we have to add the constant stuff for f=1:length(global_best); global_best(f).loglik = global_best(f).loglik + seloglik2(global_best(f)); end; % Making first output parameter: [lik i] = max([global_best.loglik]); best = global_best(i); strout = best.strRgr; % Making the second output parameter [lik i] = sort([global_best.loglik]); rgrsout = global_best(i(length(i):-1:1)); if (nargout>=3); statistics.allstrs = 2^(n_data -1-length(belief_in) - length(belief_out)); statistics.nrand = to-2; statistics.unique = muto; statistics.to = to; statistics.cputime_seconds = cputime - cputime0; statistics.itemspeed = statistics.to / statistics.cputime_seconds; statistics.muto = muto; statistics.mutos = mutos; statistics.maxmutos = maxmutos; end; % randun seed stuff: %randn('seed',SEED); % --------------------- END of MAIN program -------------------- % This is needed for bitstr manipulations function out = str_bitset(in,ns,nbits) out = in; for n = ns; index = 1+floor((n-2)/nbits); bitindex = 1+rem(n-2,nbits); out(index) = bitset(out(index),bitindex); end; function out = str_bitres(in,ns,nbits) out = in; for n = ns; index = 1+floor((n-2)/nbits); bitindex = 1+rem(n-2,nbits); mask = bitset(0,bitindex); out(index) = bitxor(bitor(out(index),mask),mask); end; function out = strPrintstr(in) out = '0'; nbits = in.nbits; for f = 2:length(in.d0); index = 1+floor((f-2)/nbits); bitindex = 1+rem(f-2,nbits); if bitget(in.bitstr(index),bitindex); out(f) = '1'; else; out(f) = '0'; end; end; function global_best_out = add_new(global_best,new,nbest) % Eventually add to global best, but do not go over nbest values % Also avoids repeating things, which makes this function awfully slow if length(global_best)>=nbest; logliks = [global_best.loglik]; [loglik i] = min(logliks); global_best_out = global_best; if loglik mzero; kD = kD / Dfout; kr = kr / Dfout; else; kD = 1; kr = 0; end; Drout = Dr * kD; % Try to uncomment marked stuff (*) if in numerical problems, but I don't % think it can make any difference for normal healthy floating-point unit if update_whole; rout = r - r0*f; % rout(R) = 0; % * could be needed for some nonsense cases(or numeric reasons?), normally not fout = f + kr*rout; % fout(R) = 1; % * could be needed for some nonsense cases(or numeric reasons?), normally not else; rout = r; fout = f; rout(jl:jh) = r(jl:jh) - r0 * f(jl:jh); rout(R) = 0; fout(jl:jh) = f(jl:jh) + kr * rout(jl:jh); end;