/*!
  \file
  \brief Common DataSources.
  \author Vaclav Smidl.

  -----------------------------------
  BDM++ - C++ library for Bayesian Decision Making under Uncertainty

  Using IT++ for numerical operations
  -----------------------------------
*/

#ifndef DATASOURCE_H
#define DATASOURCE_H


#include "../base/bdmbase.h"
#include "../stat/exp_family.h"
#include "../base/user_info.h"

namespace bdm {
/*!
* \brief Memory storage of off-line data column-wise

The data are stored in an internal matrix \c Data . Each column of Data corresponds to one discrete time observation \f$t\f$. Access to this matrix is via indices \c rowid and \c delays.

The data can be loaded from a file.
*/
class MemDS : public DS {
	protected:
		//! internal matrix of data
		mat Data;
		//! active column in the Data matrix
		int time;
		//!  vector of rows that are presented in Dt
		ivec rowid;
		//! vector of delays that are presented in Dt
		ivec delays;

	public:
		void getdata ( vec &dt );
		void getdata ( vec &dt, const ivec &indeces );
		void set_rvs ( RV &drv, RV &urv );

		void write ( vec &ut ) {
			bdm_error ( "MemDS::write is not supported" );
		}

		void write ( vec &ut, ivec &indices ) {
			bdm_error ( "MemDS::write is not supported" );
		}

		void step();
		//!Default constructor
		MemDS () {};
		MemDS ( mat &Dat, ivec &rowid0, ivec &delays0 );
};

/*!  \brief Simulate data from a static pdf
Trivial example of a data source, could be used for tests of some estimation algorithms. For example, simulating data from a mixture model and feeding them to mixture model estimators.
*/

class EpdfDS: public DS {
	protected:
		//! internal pointer to epdf from which we samplecond
		shared_ptr<epdf> iepdf;
		//! internal storage of data sample
		vec dt;
	public:
		void step() {
			dt=iepdf->sample();
		}
		void getdata ( vec &dt_out ) {
			dt_out = dt;
		}
		void getdata ( vec &dt_out, const ivec &ids ) {
			dt_out = dt ( ids );
		}
		const RV& _drv() {
			return iepdf->_rv();
		}

		/*!
		\code
		class = "PdfDS";
		epdf = {class="epdf_offspring", ...}// list of points
		\endcode

		*/
		void from_setting ( const Setting &set ) {
			iepdf=UI::build<epdf> ( set,"epdf",UI::compulsory );
			dt = zeros ( iepdf->dimension() );
		}
};
UIREGISTER ( EpdfDS );

/*!  \brief Simulate data from conditional density
Still having only one density but allowing conditioning on either input or delayed values.
*/
class MpdfDS :public DS {
	protected:
		//! internal pointer to epdf from which we samplecond
		shared_ptr<mpdf> impdf;
		//! internal storage of data sample
		vec yt;
		//! input vector
		vec ut;
		//! datalink between ut and regressor
		datalink_buffered ut2rgr;
		//! datalink between yt and regressor
		datalink_buffered yt2rgr;
		//! numeric values of regressor
		vec rgr;
		
	public:
		void step() {
			yt2rgr.step(yt); // y is now history
			ut2rgr.filldown ( ut,rgr );
			yt2rgr.filldown ( yt,rgr );
			yt=impdf->samplecond ( rgr );
			ut2rgr.step(ut); //u is now history
		}
		void getdata ( vec &dt_out ) {
			bdm_assert_debug(dt_out.length()>=utsize+ytsize,"Short output vector");
			dt_out.set_subvector(0, yt);
			dt_out.set_subvector(ytsize, ut);
		}
		void write(const vec &ut0){ut=ut0;}

		/*!
		\code
		class = "MpdfDS";
		mpdf = {class="mpdf_offspring", ...}// mpdf to simulate
		\endcode

		*/
		void from_setting ( const Setting &set ) {
			impdf=UI::build<mpdf> ( set,"mpdf",UI::compulsory );
			
			Yrv = impdf->_rv();
			// get unique rvs form rvc
			RV rgrv0=impdf->_rvc().remove_time();
			// input is what in not in Yrv
			Urv=rgrv0.subt(Yrv); 
			set_drv(Yrv, Urv);
			// connect input and output to rvc
			ut2rgr.set_connection(impdf->_rvc(), Urv); 
			yt2rgr.set_connection(impdf->_rvc(), Yrv); 

			yt = zeros ( impdf->dimension() );
			rgr = zeros ( impdf->dimensionc() );
			ut = zeros(Urv._dsize());

			ytsize=yt.length();
			utsize=ut.length();
			dtsize = ytsize+utsize;
		}
};
UIREGISTER ( MpdfDS );

/*! Pseudovirtual class for reading data from files

*/
class FileDS: public MemDS {

	public:
		void getdata ( vec &dt ) {
			dt = Data.get_col ( time );
		}

		void getdata ( vec &dt, const ivec &indices ) {
			vec tmp = Data.get_col ( time );
			dt = tmp ( indices );
		}

		//! returns number of data in the file;
		int ndat() {
			return Data.cols();
		}
		//! no sense to log this type
		void log_add ( logger &L ) {};
		//! no sense to log this type
		void logit ( logger &L ) {};
};

/*!
* \brief Read Data Matrix from an IT file

The constructor creates an internal matrix \c Data from an IT++ file. The file is binary and can be made using the IT++ library or the Matlab/Octave function itsave. NB: the data are stored columnwise, i.e. each column contains the data for time \f$t\f$!

*/
class ITppFileDS: public FileDS {

	public:
		ITppFileDS ( const string &fname, const string &varname ) : FileDS() {
			it_file it ( fname );
			it << Name ( varname );
			it >> Data;
			time = 0;
			//rowid and delays are ignored
		};

		ITppFileDS () : FileDS() {
		};

		void from_setting ( const Setting &set );

		// TODO dodelat void to_setting( Setting &set ) const;

};

UIREGISTER ( ITppFileDS );
SHAREDPTR ( ITppFileDS );

/*!
* \brief CSV file data storage
The constructor creates \c Data matrix from the records in a CSV file \c fname. The orientation can be of two types:
1. \c BY_COL which is default - the data are stored in columns; one column per time \f$t\f$, one row per data item.
2. \c BY_ROW if the data are stored the classical CSV style. Then each column stores the values for data item, for ex. \f$[y_{t} y_{t-1} ...]\f$, one row for each discrete time instant.

*/
class CsvFileDS: public FileDS {

	public:
		//! Constructor - create DS from a CSV file.
		CsvFileDS ( const string& fname, const string& orientation = "BY_COL" );
};



/*!
\brief Generator of ARX data

*/
class ArxDS : public DS {
	protected:
		//! Rv of the regressor
		RV Rrv;
		//! History, ordered as \f$[y_t, u_t, y_{t-1 }, u_{t-1}, \ldots]\f$
		vec H;
		//! (future) input
		vec U;
		//! temporary variable for regressor
		vec rgr;
		//! data link: H -> rgr
		datalink rgrlnk;
		//! model of Y - linear Gaussian
		mlnorm<chmat> model;
		//! options
		bool opt_L_theta;
		//! loggers
		int L_theta;
		int L_R;
		int dt_size;
	public:
		void getdata ( vec &dt ) {
			dt = H;
		}

		void getdata ( vec &dt, const ivec &indices ) {
			dt = H ( indices );
		}

		void write ( vec &ut ) {
			U = ut;
		}

		void write ( vec &ut, const ivec &indices ) {
			bdm_assert_debug ( ut.length() == indices.length(), "ArxDS" );
			set_subvector ( U, indices, ut );
		}

		void step();

		//!Default constructor
		ArxDS ( ) {};
		//! Set parameters of the internal model, H is maximum time delay
		void set_parameters ( const mat &Th0, const vec mu0, const chmat &sqR0 ) {
			model.set_parameters ( Th0, mu0, sqR0 );
		};
		//! Set
		void set_drv ( const RV &yrv, const RV &urv, const RV &rrv ) {
			Rrv = rrv;
			Urv = urv;
			dt_size = yrv._dsize() + urv._dsize();

			RV drv = concat ( yrv, urv );
			Drv = drv;
			int td = rrv.mint();
			H.set_size ( drv._dsize() * ( -td + 1 ) );
			U.set_size ( Urv._dsize() );
			for ( int i = -1; i >= td; i-- ) {
				drv.t ( -1 );
				Drv.add ( drv ); //shift u1
			}
			rgrlnk.set_connection ( rrv, Drv );

			dtsize = Drv._dsize();
			utsize = Urv._dsize();
		}
		//! set options from a string
		void set_options ( const string &s ) {
			opt_L_theta = ( s.find ( "L_theta" ) != string::npos );
		};
		virtual void log_add ( logger &L ) {
			//DS::log_add ( L ); too long!!
			L_dt = L.add ( Drv ( 0, dt_size ), "" );
			L_ut = L.add ( Urv, "" );

			mat &A = model._A();
			mat R = model._R();
			if ( opt_L_theta ) {
				L_theta = L.add ( RV ( "{th }", vec_1 ( A.rows() * A.cols() ) ), "t" );
			}
			if ( opt_L_theta ) {
				L_R = L.add ( RV ( "{R }", vec_1 ( R.rows() * R.cols() ) ), "r" );
			}
		}
		virtual void logit ( logger &L ) {
			//DS::logit ( L );
			L.logit ( L_dt, H.left ( dt_size ) );
			L.logit ( L_ut, U );

			mat &A = model._A();
			mat R = model._R();
			if ( opt_L_theta ) {
				L.logit ( L_theta, vec ( A._data(), A.rows() *A.cols() ) );
			};
			if ( opt_L_theta ) {
				L.logit ( L_R, vec ( R._data(), R.rows() *R.rows() ) );
			};
		}

		// TODO dokumentace - aktualizovat
		/*! UI for ArxDS using factorized description!

		The ArxDS is constructed from a structure with fields:
		\code
		system = {
			type = "ArxDS";
			// description of y variables
			y = {type="rv"; names=["y", "u"];};
			// description of u variable
			u = {type="rv"; names=[];}
			// description of regressor
			rgr = {type="rv";
				names = ["y","y","y","u"];
				times = [-1, -2, -3, -1];
			}

			// theta
			theta = [0.8, -0.3, 0.4, 1.0,
					 0.0, 0.0, 0.0, 0.0];
			// offset (optional)
			offset = [0.0, 0.0];
			//variance
			r = [0.1, 0.0,
				 0.0, 1.0];
			//options: L_theta = log value of theta,
			opt = "L_theta";
		};
		\endcode

		Result is ARX data source offering with full history as Drv.
		*/
		void from_setting ( const Setting &set );

		// TODO dodelat void to_setting( Setting &set ) const;
};

UIREGISTER ( ArxDS );
SHAREDPTR ( ArxDS );

class stateDS : public DS {
	private:
		//!conditional pdf of the state evolution \f$ f(x_t|x_{t-1}) \f$
		shared_ptr<mpdf> IM;

		//!conditional pdf of the observations \f$ f(d_t|x_t) \f$
		shared_ptr<mpdf> OM;

	protected:
		//! result storage
		vec dt;
		//! state storage
		vec xt;
		//! input storage
		vec ut;
		//! Logger
		int L_xt;

	public:
		void getdata ( vec &dt0 ) {
			dt0 = dt;
		}

		void getdata ( vec &dt0, const ivec &indices ) {
			dt0 = dt ( indices );
		}

		stateDS ( const shared_ptr<mpdf> &IM0, const shared_ptr<mpdf> &OM0, int usize ) : IM ( IM0 ), OM ( OM0 ),
				dt ( OM0->dimension() ), xt ( IM0->dimension() ),
				ut ( usize ), L_xt ( 0 ) { }

		stateDS() : L_xt ( 0 ) { }

		virtual void step() {
			xt = IM->samplecond ( concat ( xt, ut ) );
			dt = OM->samplecond ( concat ( xt, ut ) );
		}

		virtual void log_add ( logger &L ) {
			DS::log_add ( L );
			L_xt = L.add ( IM->_rv(), "true" );
		}
		virtual void logit ( logger &L ) {
			DS::logit ( L );
			L.logit ( L_xt, xt );
		}

		/*! UI for stateDS

		The DS is constructed from a structure with fields:
		\code
		system = {
			type = "stateDS";
			//Internal model
			IM = { type = "mpdf"; //<-- valid offspring! e.g. "mlnorm"
				rv = { //description of x_t
					names=["name1",...];
					sizes=[2,1]; // optional default=[1,1...];
					times=[0,0]; // optional default=[0,0...];
					}
				rvu= { //description of  u_t
					//optional default=empty
					}

				// remaining fields depending on the chosen type
				};
			//Observation model
			OM = { type = "mpdf-offspring";
				rv = {}; //description of d_t
				rvu = {type="internal", path="system.IM.rvu"}; //description of u_t

				//remaining fields
			}
		};
		\endcode
		*/
		void from_setting ( const Setting &set );

		// TODO dodelat void to_setting( Setting &set ) const;

};

UIREGISTER ( stateDS );
SHAREDPTR ( stateDS );

}; //namespace

#endif // DS_H
