#include "arx.h"


namespace bdm {



	
void str_bitset ( bvec &out, ivec ns, int nbits ) {
	
for ( int i = 0; i < ns.length(); i++ ) {
		out ( ns ( i ) - 2 ) = 1;
	}
}


double seloglik1 ( str_aux in ) {
// This is the loglikelihood (non-constant part) - this should be used in
// frequent computation
	int len = length ( in.d );
	int p1  = in.posit1 - 1;

	double i1 = -0.5 * in.nu * log ( in.d ( p1 ) ) - 0.5 * sum ( log ( in.d.right ( len - p1 - 1 ) ) );
	double i0 = -0.5 * in.nu0 * log ( in.d0 ( p1 ) ) - 0.5 * sum ( log ( in.d0.right ( len - p1 - 1 ) ) );
	return i1 - i0;
//DEBUGGing print:
//fprintf('SELOGLIK1: str=%s loglik=%g\n', strPrintstr(in), l);*/
}


void sedydr ( mat &r, mat &f, double &Dr, double &Df, int R/*,int jl,int jh ,mat &rout, mat &fout, double &Drout, double &Dfoutint &kr*/ ) {
	/*SEDYDR dyadic reduction, performs transformation of sum of 2 dyads
	%
	% [rout, fout, Drout, Dfout, kr] = sedydr(r,f,Dr,Df,R,jl,jh);
	% [rout, fout, Drout, Dfout] = sedydr(r,f,Dr,Df,R);
	%
	% Description: dyadic reduction, performs transformation of sum of
	%  2 dyads r*Dr*r'+ f*Df*f' so that the element of r pointed by R is zeroed
	%
	%     r    : column vector of reduced dyad
	%     f    : column vector of reducing dyad
	%     Dr   : scalar with weight of reduced dyad
	%     Df   : scalar with weight of reducing dyad
	%     R    : scalar number giving 1 based index to the element of r,
	%            which is to be reduced to
	%            zero; the corresponding element of f is assumed to be 1.
	%     jl   : lower index of the range within which the dyads are
	%            modified (can be omitted, then everything is updated)
	%     jh   : upper index of the range within which the dyads are
	%            modified (can be omitted then everything is updated)
	%     rout,fout,Drout,dfout : resulting two dyads
	%     kr   : coefficient used in the transformation of r
	%            rnew = r + kr*f
	%
	% Description: dyadic reduction, performs transformation of sum of
	%            2 dyads r*Dr*r'+ f*Df*f' so that the element of r indexed by R is zeroed
	% Remark1: Constant mzero means machine zero and should be modified
	%           according to the precision of particular machine
	% Remark2: jl and jh are, in fact, obsolete. It takes longer time to
	%           compute them compared to plain version. The reason is that we
	%           are doing vector operations in m-file. Other reason is that
	%           we need to copy whole vector anyway. It can save half of time for
	%           c-file, if you use it correctly. (please do tests)
	%
	% Note: naming:
	%       se = structure estimation
	%       dydr = dyadic reduction
	%
	% Original Fortran design: V. Peterka 17-7-89
	% Modified for c-language: probably R. Kulhavy
	% Modified for m-language: L. Tesar 2/2003
	% Updated: Feb 2003
	% Project: post-ProDaCTool
	% Reference: none*/

	/*if nargin<6;
	   update_whole=1;
	else
	   update_whole=0;
	end;*/

	double mzero = 1e-32;

	if ( Dr < mzero ) {
		Dr = 0;
	}

	double r0   = r ( R, 0 );
	double kD   = Df;
	double kr   = r0 * Dr;


	Df   = kD + r0 * kr;

	if ( Df > mzero ) {
		kD = kD / Df;
		kr = kr / Df;
	} else {
		kD = 1;
		kr = 0;
	}

	Dr = Dr * kD;

// Try to uncomment marked stuff (*) if in numerical problems, but I don't
// think it can make any difference for normal healthy floating-point unit
//if update_whole;
	r = r - r0 * f;
//   rout(R) = 0;   // * could be needed for some nonsense cases(or numeric reasons?), normally not
	f = f + kr * r;
//   fout(R) = 1;   // * could be needed for some nonsense cases(or numeric reasons?), normally not
	/*else;
	   rout = r;
	   fout = f;
	   rout(jl:jh) = r(jl:jh) - r0 * f(jl:jh);
	   rout(R) = 0;
	   fout(jl:jh) = f(jl:jh) + kr * rout(jl:jh);
	end;*/
}



/*mat*/ void seswapudl ( mat &L, vec &d , int i/*, vec &dout*/ ) {
	/*%SESWAPUDL swaps information matrix in decomposition V=L^T diag(d) L
	%
	%   [Lout, dout] = seswapudl(L,d,i);
	%
	% L : lower triangular matrix with 1's on diagonal of the decomposistion
	% d : diagonal vector of diagonal matrix of the decomposition
	% i : index of line to be swapped with the next one
	% Lout : output lower triangular matrix
	% dout : output diagional vector of diagonal matrix D
	%
	% Description:
	%  Lout' * diag(dout) * Lout = P(i,i+1) * L' * diag(d) * L * P(i,i+1);
	%
	%  Where permutation matrix P(i,j) permutates columns if applied from the
	%  right and line if applied from the left.
	%
	% Note: naming:
	%       se = structure estimation
	%       lite = light, simple
	%       udl = U*D*L, or more precisely, L'*D*L, also called as ld
	%
	% Design  : L. Tesar
	% Updated : Feb 2003
	% Project : post-ProDaCTool
	% Reference: sedydr*/

	int j = i + 1;

	double pomd = d ( i );
	d ( i ) = d ( j );
	d ( j ) = pomd;

	L.swap_rows ( i, j );
	L.swap_cols ( i, j );

	
//% We must be working with LINES of matrix L !

    mat r = L.get_row ( i );
	r.transpose();
	mat f = L.get_row ( j );
	f.transpose();

    double Dr = d ( i );
	double Df = d ( j );

	sedydr ( r, f, Dr, Df, j );

    double r0 = r ( i, 0 );
	Dr = Dr * r0 * r0;
	r  = r / r0;

    mat pom_mat = r.transpose();
	L.set_row ( i, pom_mat.get_row ( 0 ) );
	pom_mat = f.transpose();
	L.set_row ( j, pom_mat.get_row ( 0 ) );

	d ( i )   = Dr;
	d ( j )   = Df;

	L ( i, i ) = 1;
	L ( j, j ) = 1;
}


void str_bitres ( bvec &out, ivec ns, int nbits ) {

  for ( int i = 0; i < ns.length(); i++ ) {
		out ( ns ( i ) - 2 ) = 0;
	}
}

str_aux sestrremove ( str_aux in, ivec removed_elements ) {
//% Removes elements from regressor
	
	int n_strL = length ( in.strL );
	str_aux out = in;
	
    for ( int i = 0; i < removed_elements.length(); i++ ) {

		int f = removed_elements ( i );
		int posit1 = ( find ( out.strL == 1 ) ) ( 0 );
		int positf = ( find ( out.strL == f ) ) ( 0 );
		for ( int g = positf - 1; g > posit1 - 1; g-- ) {
			//% BEGIN: We are swapping g and g+1 NOW!!!!
			seswapudl ( out.L, out.d, g );
			seswapudl ( out.L0, out.d0, g );

			int pom_strL = out.strL ( g );
			out.strL ( g ) = out.strL ( g + 1 );
			out.strL ( g + 1 ) = pom_strL;
			//% END
		}
	
	
	}
	out.posit1 = ( find ( out.strL == 1 ) ) ( 0 ) + 1;
	out.strRgr = out.strL.right ( n_strL - out.posit1 );
	out.strMis = out.strL.left ( out.posit1 - 1 );
	str_bitres ( out.bitstr, removed_elements, out.nbits );
	out.loglik = seloglik1 ( out );

	return out;
}


ivec setdiff ( ivec a, ivec b ) {
	ivec pos;

	for ( int i = 0; i < b.length(); i++ ) {
		pos = find ( a == b ( i ) );
		for ( int j = 0; j < pos.length(); j++ ) {
			a.del ( pos ( j ) - j );
		}
	}
	return a;
}




void add_new ( Array<str_aux> &global_best, str_aux newone, int nbest ) {
// Eventually add to global best, but do not go over nbest values
// Also avoids repeating things, which makes this function awfully slow

	int  addit, i = 0;
	if ( global_best.length() >= nbest ) {
		//logliks = [global_best.loglik];

	for ( int j = 1; j < global_best.length(); j++ ) {
			if ( global_best ( j ).loglik < global_best ( i ).loglik ) {
				i = j;
			}
		}
	if ( global_best ( i ).loglik < newone.loglik ) {
			//         if ~any(logliks == new.loglik);
			addit = 1;


	for (int j = 0; j < global_best.length(); j++){
		if (newone.bitstr == global_best(j).bitstr){
         addit = 0;
         break;
		}
	}
	if ( addit ) {
		global_best ( i ) = newone;
				// DEBUGging print:
				// fprintf('ADDED structure, add_new: %s, loglik=%g\n', strPrintstr(new), new.loglik);
		}
	 }
	} else
		global_best = concat ( global_best, newone );

}






str_aux sestrinsert ( str_aux in, ivec inserted_elements ) {
// Moves elements into regressor
	int n_strL = in.strL.length();
	str_aux out = in;
	for ( int j = 0; j < inserted_elements.length(); j++ ) {
		int f = inserted_elements ( j );
		int posit1 = ( find ( out.strL == 1 ) ) ( 0 );
		int positf = ( find ( out.strL == f ) ) ( 0 );
		for ( int g = positf; g <= posit1 - 1; g++ ) {

			// BEGIN: We are swapping g and g+1 NOW!!!!
			seswapudl ( out.L,  out.d,  g );
			seswapudl ( out.L0, out.d0, g );

			int pom_strL = out.strL ( g );
			out.strL ( g ) = out.strL ( g + 1 );
			out.strL ( g + 1 ) = pom_strL;

			// END
		}
	}

	out.posit1 = ( find ( out.strL == 1 ) ) ( 0 ) + 1;
	out.strRgr = out.strL.right ( n_strL - out.posit1 );
	out.strMis = out.strL.left ( out.posit1 - 1 );
	str_bitset ( out.bitstr, inserted_elements, out.nbits );
	out.loglik = seloglik1 ( out );

	return out;
}

double seloglik2 ( str_aux in ) {
// This is the loglikelihood (constant part) - this should be added to
// everything at the end. It needs some computation, so it is useless to
// make it for all the stuff
	double logpi = log ( pi );

	double i1 = lgamma ( in.nu / 2 ) - 0.5 * in.nu * logpi;
	double i0 = lgamma ( in.nu0 / 2 ) - 0.5 * in.nu0 * logpi;
	return i1 - i0;
}




ivec straux1 ( ldmat Ld, double nu, ldmat Ld0, double nu0, ivec belief, int nbest, int max_nrep, double lambda, int order_k, Array<str_aux> &rgrsout/*, stat &statistics*/ ) {
// see utia_legacy/ticket_12/ implementation and str_test.m

	const mat &L = Ld._L();
	const vec &d = Ld._D();

	const mat &L0 = Ld0._L();
	const vec &d0 = Ld0._D();

	int n_data = d.length();

	ivec belief_out = find ( belief == 4 ) + 2; // we are avoiding to put this into regressor
	ivec belief_in  = find ( belief == 1 ) + 2; // we are instantly keeping this in regressor

	str_aux full;

	full.d0  = d0;
	full.nu0 = nu0;
	full.L0  = L0;
	full.L   = L;
	full.d   = d;
	full.nu0 = nu0;
	full.nu  = nu;
	full.strL = linspace ( 1, n_data );
	full.strRgr = linspace ( 2, n_data );
	full.strMis = ivec ( 0 );
	full.posit1 = 1;
	full.bitstr.set_size ( n_data - 1 );
	full.bitstr.clear();
	str_bitset ( full.bitstr, full.strRgr, full.nbits );
	full.loglik = seloglik1 ( full );    // % loglikelihood


//% construct full and empty structure
	full = sestrremove ( full, belief_out );
	str_aux empty = sestrremove ( full, setdiff ( full.strRgr, belief_in ) );

//% stopping rule calculation:

	bmat local_max ( 0, 0 );
	int to, muto = 0;

//% statistics:
//double cputime0 = cputime;
//if nargout>=3;

	CPU_Timer timer;
	timer.start();
	ivec mutos ( max_nrep + 2 );
	vec maxmutos ( max_nrep + 2 );
	mutos.zeros();
	maxmutos.zeros();


//end;
//% ----------------------

//% For stopping-rule calculation
//%so       = 2^(n_data -1-length(belief_in)- length(belief_out)); % do we use this ?
//% ----------------------

	ivec all_str = linspace ( 1, n_data );
	Array<str_aux> global_best ( 1 );
	global_best ( 0 ) = full;


//% MAIN LOOP is here.

	str_aux   best;
	for ( int n_start = -1; n_start <= max_nrep; n_start++ ) {
		str_aux  last, best;

		to = n_start + 2;

		if ( n_start == -1 ) {
			//% start from the full structure
			last = full;
		} else {
			if ( n_start == 0 )
				//% start from the empty structure
				last = empty;

			else {
				//% start from random structure
            
		
		
			  ivec last_str = find ( to_bvec( concat( 0, floor_i ( 2 * randun ( n_data - 1 )) ) ) ) + 1;// this creates random vector consisting of indexes, and sorted
			  last = sestrremove ( full, setdiff ( all_str, concat( concat( 1 , last_str ), empty.strRgr ) ) );

			}
		}
		//% DEBUGging print:
		//%fprintf('STRUCTURE generated            in loop %2i was %s\n', n_start, strPrintstr(last));

		//% The loop is repeated until likelihood stops growing (break condition
		//% used at the end;


		while ( 1 ) {
			//% This structure is going to hold the best elements
			best = last;
			//% Nesting by removing elements (enpoorment)
			ivec  removed_items = setdiff ( last.strRgr, belief_in );

			ivec removed_item;
			str_aux newone;

			for ( int i = 0; i < removed_items.length(); i++ ) {
				removed_item = vec_1 ( removed_items ( i ) );
				newone = sestrremove ( last, removed_item );
				if ( nbest > 1 ) {
					add_new ( global_best, newone, nbest );
				}
				if ( newone.loglik > best.loglik ) {
					best = newone;
				}
			}
			//% Nesting by adding elements (enrichment)
			ivec added_items = setdiff( last.strMis, belief_out );
			ivec added_item;

			for ( int j = 0; j < added_items.length(); j++ ) {
				added_item = vec_1 ( added_items ( j ) );
				newone = sestrinsert ( last, added_item );
				if ( nbest > 1 ) {
					add_new ( global_best, newone, nbest );
				}
				if ( newone.loglik > best.loglik ) {
					best = newone;
				}
			}


			//% Break condition if likelihood does not change.
			if ( best.loglik <= last.loglik )
				break;
			else
				//% Making best structure last structure.
				last = best;
			}


		// % DEBUGging print:
		//%fprintf('STRUCTURE found (local maxima) in loop %2i was %s randun_seed=%11lu randun_counter=%4lu\n', n_start, strPrintstr(best), randn('seed'), RANDUN_COUNTER);

		//% Collecting of the best structure in case we don't need the second parameter
		if ( nbest <= 1 ) {
			if ( best.loglik > global_best ( 0 ).loglik ) {
				global_best = best;
			}
		}

		//% uniqueness of the structure found
		int append = 1;

		for ( int j = 0; j  < local_max.rows() ; j++ ) {
			if ( best.bitstr == local_max.get_row ( j ) ) {
				append = 0;
				break;
			}
		}


		if ( append ) {
			local_max.append_row ( best.bitstr );
			muto = muto + 1;
		}

		//% stopping rule:
		double maxmuto = ( to - order_k - 1 ) / lambda - to + 1;
		if ( to > 2 ) {
			if ( maxmuto >= muto ) {
				//% fprintf('*');
				break;
			}
		}

		// do statistics if necessary:
		//if (nargout>=3){
		mutos ( to - 1 )    = muto;
		maxmutos ( to - 1 ) = maxmuto;
		//}
	}

//% Aftermath: The best structure was in: global_best

//% Updating loglikelihoods: we have to add the constant stuff

	for ( int f = 0 ; f < global_best.length(); f++ ) {
		global_best ( f ).loglik = global_best ( f ).loglik + seloglik2 ( global_best ( f ) );
	}

//% Making first output parameter:

	int max_i = 0;
	for ( int j = 1; j < global_best.length(); j++ )
		if ( global_best ( max_i ).loglik < ( global_best ( j ).loglik ) ) max_i = j;

	best = global_best ( max_i );

//% Making the second output parameter

	vec logliks ( global_best.length() );
	for ( int j = 0; j < logliks.length(); j++ )
		logliks ( j ) = global_best ( j ).loglik;

	ivec i = sort_index ( logliks );
	rgrsout.set_length ( global_best.length() );

	for ( int j = global_best.length() - 1; j >= 0; j-- )
		rgrsout ( j ) = global_best ( i ( j ) );

//if (nargout>=3);

	str_statistics statistics;

	statistics.allstrs = 2 ^ ( n_data - 1 - length ( belief_in ) - length ( belief_out ) );
	statistics.nrand   = to - 2;
	statistics.unique  = muto;
	statistics.to  = to;
	statistics.cputime_seconds = timer.get_time();
	statistics.itemspeed       = statistics.to / statistics.cputime_seconds;
	statistics.muto = muto;
	statistics.mutos = mutos;
	statistics.maxmutos = maxmutos;
//end;

	return best.strRgr;

}






}