/*!
  \file
  \brief Common DataSources.
  \author Vaclav Smidl.

  -----------------------------------
  BDM++ - C++ library for Bayesian Decision Making under Uncertainty

  Using IT++ for numerical operations
  -----------------------------------
*/

#ifndef DATASOURCE_H
#define DATASOURCE_H


#include "../base/bdmbase.h"
#include "../stat/exp_family.h"
#include "../base/user_info.h"

namespace bdm {
/*!
* \brief Memory storage of off-line data column-wise

The data are stored in an internal matrix \c Data . Each column of Data corresponds to one discrete time observation \f$t\f$. Access to this matrix is via indices \c rowid.

The data can be loaded from a file.
*/
class MemDS : public DS {
	protected:
		//! internal matrix of data
		mat Data;
		//! active column in the Data matrix
		int time;
		//!  vector of rows that are presented in Dt
		ivec rowid;

	public:
		int max_length() {return Data.cols();}
		void getdata ( vec &dt ) const;
		void getdata ( vec &dt, const ivec &indeces );
		void set_drv (const RV &drv,const  RV &urv );

		void write ( const vec &ut ) {
			bdm_error ( "MemDS::write is not supported" );
		}

		void write ( const vec &ut, const ivec &indices ) {
			bdm_error ( "MemDS::write is not supported" );
		}

		void step();
		//!Default constructor
		MemDS () {};
		//! Convenience constructor
		MemDS ( mat &Dat, ivec &rowid0);
		/*! Create object from the following structure
		\code
		{ class = "MemDS";
		   Data = (...);            // Data matrix or data vector
		   --- optional ---
		   drv = {class="RV"; ...} // Identification how rows of the matrix Data will be known to others
		   time = 0;               // Index of the first column to user_info,
		   rowid = [1,2,3...];     // ids of rows to be used
		}
		\endcode
		
		If the optional fields are not given, they will be filled as follows:
		\code
		rowid= [0, 1, 2, ...number_of_rows_of_Data];
		drv = {names=("ch0", "ch1", "ch2", ..."number_of_rows_of_Data");
		      sizes=( 1    1    1 ...);
			  times=( 0    0    0 ...);
			  };
		time = 0;
		\endcode
		If \c rowid is given, \c drv will be named after indeces in rowids.
		
		Hence the data provided by method \c getdata() will be full column of matrix Data starting from the first record. 
		*/
		void from_setting(const Setting &set){
			UI::get(Data, set, "Data", UI::compulsory);
			if(!UI::get(time, set,"time", UI::optional)) {time =0;}
			if(!UI::get(rowid, set, "rowid",UI::optional)) {rowid =linspace(0,Data.rows()-1);}
			shared_ptr<RV> r=UI::build<RV>(set,"drv",UI::optional);
			if (!r) {r=new RV();
				for (int i=0; i<rowid.length(); i++){ r->add(RV("ch"+num2str(rowid(i)), 1, 0));}
			}
			set_drv(*r,RV()); //empty urv
			dtsize=r->_dsize();
			ytsize = dtsize;
			utsize=0;
		}
};
UIREGISTER(MemDS);

/*!  \brief Simulate data from a static pdf (epdf)

Trivial example of a data source, could be used for tests of some estimation algorithms. For example, simulating data from a mixture model and feeding them to mixture model estimators.
*/

class EpdfDS: public DS {
	protected:
		//! internal pointer to epdf from which we samplecond
		shared_ptr<epdf> iepdf;
		//! internal storage of data sample
		vec dt;
	public:
		void step() {
			dt=iepdf->sample();
		}
		void getdata ( vec &dt_out ) const {
			dt_out = dt;
		}
		void getdata ( vec &dt_out, const ivec &ids ) {
			dt_out = dt ( ids );
		}
		const RV& _drv() const {
			return iepdf->_rv();
		}

		/*!
		\code
		class = "EpdfDS";
		epdf = {class="epdf_offspring", ...}// uncondtitional density to sample from
		\endcode

		*/
		void from_setting ( const Setting &set ) {
			iepdf=UI::build<epdf> ( set,"epdf",UI::compulsory );
			bdm_assert(iepdf->isnamed(), "Input epdf must be named, check if RV is given correctly");
			dt =  zeros(iepdf->dimension());
			dtsize=dt.length();
			ytsize=dt.length();
			set_drv(iepdf->_rv(),RV());
			utsize =0;
			validate();
		}
		void validate() {
			dt = iepdf->sample();
		}
};
UIREGISTER ( EpdfDS );

/*!  \brief Simulate data from conditional density
Still having only one density but allowing conditioning on either input or delayed values.
*/
class PdfDS :public DS {
	protected:
		//! internal pointer to epdf from which we samplecond
		shared_ptr<pdf> ipdf;
		//! internal storage of data sample
		vec yt;
		//! input vector
		vec ut;
		//! datalink between ut and regressor
		datalink_buffered ut2rgr;
		//! datalink between yt and regressor
		datalink_buffered yt2rgr;
		//! numeric values of regressor
		vec rgr;
		
	public:
		void step() {
			yt2rgr.step(yt); // y is now history
			ut2rgr.filldown ( ut,rgr );
			yt2rgr.filldown ( yt,rgr );
			yt=ipdf->samplecond ( rgr );
			ut2rgr.step(ut); //u is now history
		}
		void getdata ( vec &dt_out ) const {
			bdm_assert_debug(dt_out.length()>=utsize+ytsize,"Short output vector");
			dt_out.set_subvector(0, yt);
			dt_out.set_subvector(ytsize, ut);
		}
		void write(const vec &ut0){ut=ut0;}

		/*!
		\code
		class = "PdfDS";
		pdf = {class="pdf_offspring", ...};  // pdf to simulate
		--- optional ---
		init_rv = {class="RV",names=...};      // define what rv to initialize - typically delayed values!
		init_values = [...];                   // vector of initial values corresponding to init_rv
		\endcode

		If init_rv is not given, init_values are set to zero.
		*/
		void from_setting ( const Setting &set ) {
			ipdf=UI::build<pdf> ( set,"pdf",UI::compulsory );
			
			Yrv = ipdf->_rv();
			// get unique rvs form rvc
			RV rgrv0=ipdf->_rvc().remove_time();
			// input is what in not in Yrv
			Urv=rgrv0.subt(Yrv); 
			set_drv(Yrv, Urv);
			// connect input and output to rvc
			ut2rgr.set_connection(ipdf->_rvc(), Urv); 
			yt2rgr.set_connection(ipdf->_rvc(), Yrv); 
			
			//set history - if given
			shared_ptr<RV> rv_ini=UI::build<RV>(set,"init_rv",UI::optional);
			if(rv_ini){ // check if 
				vec val;
				UI::get(val, set, "init_values", UI::optional);
				if (val.length()!=rv_ini->_dsize()){
					bdm_error("init_rv and init_values fields have incompatible sizes");
				} else {
					ut2rgr.set_history(*rv_ini, val);
					yt2rgr.set_history(*rv_ini, val);
				}
			}

			yt = zeros ( ipdf->dimension() );
			rgr = zeros ( ipdf->dimensionc() );
			ut = zeros(Urv._dsize());

			ytsize=yt.length();
			utsize=ut.length();
			dtsize = ytsize+utsize;
			validate();
		}
		void validate() {
			//taken from sample() - shift of history is not done here
			ut2rgr.filldown ( ut,rgr );
			yt2rgr.filldown ( yt,rgr );
			yt=ipdf->samplecond ( rgr );
		}
};
UIREGISTER ( PdfDS );

/*! Pseudovirtual class for reading data from files

*/
class FileDS: public MemDS {

	public:
		void getdata ( vec &dt ) {
			dt = Data.get_col ( time );
		}

		void getdata ( vec &dt, const ivec &indices ) {
			vec tmp = Data.get_col ( time );
			dt = tmp ( indices );
		}

		//! returns number of data in the file;
		int ndat() {
			return Data.cols();
		}
		//! no sense to log this type
		void log_register(logger &L, const string &prefix){};
		//! no sense to log this type
		void log_write ( ) const {};
};

/*!
* \brief Read Data Matrix from an IT file

The constructor creates an internal matrix \c Data from an IT++ file. The file is binary and can be made using the IT++ library or the Matlab/Octave function itsave. NB: the data are stored columnwise, i.e. each column contains the data for time \f$t\f$!

*/
class ITppFileDS: public FileDS {

	public:
		//! Convenience constructor
		ITppFileDS ( const string &fname, const string &varname ) : FileDS() {
			it_file it ( fname );
			it << Name ( varname );
			it >> Data;
			time = 0;
			//rowid and delays are ignored
		};

		ITppFileDS () : FileDS() {
		};

		void from_setting ( const Setting &set );

		// TODO dodelat void to_setting( Setting &set ) const;

};

UIREGISTER ( ITppFileDS );
SHAREDPTR ( ITppFileDS );

/*!
* \brief CSV file data storage
The constructor creates \c Data matrix from the records in a CSV file \c fname. The orientation can be of two types:
1. \c BY_COL which is default - the data are stored in columns; one column per time \f$t\f$, one row per data item.
2. \c BY_ROW if the data are stored the classical CSV style. Then each column stores the values for data item, for ex. \f$[y_{t} y_{t-1} ...]\f$, one row for each discrete time instant.

*/
class CsvFileDS: public FileDS {

	public:
		//! Constructor - create DS from a CSV file.
		CsvFileDS ( const string& fname, const string& orientation = "BY_COL" );
};



// ARXDs - DELETED

//! State-space data source simulating two densities
class StateDS : public DS {
	protected:
		//!conditional pdf of the state evolution \f$ f(x_t|x_{t-1}) \f$
		shared_ptr<pdf> IM;

		//!conditional pdf of the observations \f$ f(d_t|x_t) \f$
		shared_ptr<pdf> OM;

		//! result storage
		vec dt;
		//! state storage
		vec xt;
		//! input storage
		vec ut;

		//! datalink from ut to IM.rvc
		datalink_part u2imc;
		//! datalink from ut to OM.rvc
		datalink_part u2omc;
	public:
		void getdata ( vec &dt0 ) const {
			dt0 = dt;
		}
		void write (const vec &ut0 ) {
			ut = ut0;
		}
		
		void getdata ( vec &dt0, const ivec &indices ) {
			dt0 = dt ( indices );
		}

		virtual void step() {
			vec imc(IM->dimensionc());
			imc.set_subvector(0,xt);
			u2imc.filldown(ut,imc);
			xt = IM->samplecond ( imc );
			
			vec omc(OM->dimensionc());
			omc.set_subvector(0,xt);
			u2omc.filldown(ut,omc);
			vec yt;
			yt = OM->samplecond ( omc );
			//fill all data
			dt.set_subvector(0,yt);
			dt.set_subvector(yt.length(),xt);
			dt.set_subvector(ytsize,ut);
		}

		//! set parameters
		void set_parameters(shared_ptr<pdf> IM0, shared_ptr<pdf> OM0){
			IM=IM0;
			OM = OM0;
		}
		void set_initx(const vec &x0){xt=x0;}

		/*! UI for stateDS

		The DS is constructed from a structure with fields:
		\code
		class = "stateDS";
		//Internal model
		IM = { type = "pdf-offspring"; };
		//Observation model
		OM = { type = "pdf-offspring"; }
		//initial state
		x0 = [...]; //vector of data
		\endcode
		Both models must have defined \c rv. and \c rvc
		Random variables not found in any rv are considered to be inputs.
		*/
		void from_setting ( const Setting &set );

		// TODO dodelat void to_setting( Setting &set ) const;

		void validate();
};

UIREGISTER ( StateDS );
SHAREDPTR ( StateDS );

}; //namespace

#endif // DS_H
