/*! \file \brief Common DataSources. \author Vaclav Smidl. ----------------------------------- BDM++ - C++ library for Bayesian Decision Making under Uncertainty Using IT++ for numerical operations ----------------------------------- */ #ifndef DATASOURCE_H #define DATASOURCE_H #include "../base/bdmbase.h" #include "../stat/exp_family.h" #include "../base/user_info.h" namespace bdm { /*! * \brief Memory storage of off-line data column-wise The data are stored in an internal matrix \c Data . Each column of Data corresponds to one discrete time observation \f$t\f$. Access to this matrix is via indices \c rowid. The data can be loaded from a file. */ class MemDS : public DS { protected: //! internal matrix of data mat Data; //! active column in the Data matrix int time; //! vector of rows that are presented in Dt ivec rowid; public: int max_length() {return Data.cols();} void getdata ( vec &dt ) const; void getdata ( vec &dt, const ivec &indeces ); void set_drv (const RV &drv,const RV &urv ); void write ( const vec &ut ) { bdm_error ( "MemDS::write is not supported" ); } void write ( const vec &ut, const ivec &indices ) { bdm_error ( "MemDS::write is not supported" ); } void step(); //!Default constructor MemDS () {}; //! Convenience constructor MemDS ( mat &Dat, ivec &rowid0); /*! Create object from the following structure \code { class = "MemDS"; Data = (...); // Data matrix or data vector --- optional --- drv = {class="RV"; ...} // Identification how rows of the matrix Data will be known to others time = 0; // Index of the first column to user_info, rowid = [1,2,3...]; // ids of rows to be used } \endcode If the optional fields are not given, they will be filled as follows: \code rowid= [0, 1, 2, ...number_of_rows_of_Data]; drv = {names=("ch0", "ch1", "ch2", ..."number_of_rows_of_Data"); sizes=( 1 1 1 ...); times=( 0 0 0 ...); }; time = 0; \endcode If \c rowid is given, \c drv will be named after indeces in rowids. Hence the data provided by method \c getdata() will be full column of matrix Data starting from the first record. */ void from_setting(const Setting &set){ UI::get(Data, set, "Data", UI::compulsory); if(!UI::get(time, set,"time", UI::optional)) {time =0;} if(!UI::get(rowid, set, "rowid",UI::optional)) {rowid =linspace(0,Data.rows()-1);} shared_ptr r=UI::build(set,"drv",UI::optional); if (!r) {r=new RV(); for (int i=0; iadd(RV("ch"+num2str(rowid(i)), 1, 0));} } set_drv(*r,RV()); //empty urv dtsize=r->_dsize(); ytsize = dtsize; utsize=0; } }; UIREGISTER(MemDS); /*! \brief Simulate data from a static pdf (epdf) Trivial example of a data source, could be used for tests of some estimation algorithms. For example, simulating data from a mixture model and feeding them to mixture model estimators. */ class EpdfDS: public DS { protected: //! internal pointer to epdf from which we samplecond shared_ptr iepdf; //! internal storage of data sample vec dt; public: void step() { dt=iepdf->sample(); } void getdata ( vec &dt_out ) const { dt_out = dt; } void getdata ( vec &dt_out, const ivec &ids ) { dt_out = dt ( ids ); } const RV& _drv() const { return iepdf->_rv(); } /*! \code class = "EpdfDS"; epdf = {class="epdf_offspring", ...}// uncondtitional density to sample from \endcode */ void from_setting ( const Setting &set ) { iepdf=UI::build ( set,"epdf",UI::compulsory ); bdm_assert(iepdf->isnamed(), "Input epdf must be named, check if RV is given correctly"); dt = zeros(iepdf->dimension()); dtsize=dt.length(); set_drv(iepdf->_rv(),RV()); utsize =0; validate(); } void validate() { dt = iepdf->sample(); } }; UIREGISTER ( EpdfDS ); /*! \brief Simulate data from conditional density Still having only one density but allowing conditioning on either input or delayed values. */ class MpdfDS :public DS { protected: //! internal pointer to epdf from which we samplecond shared_ptr impdf; //! internal storage of data sample vec yt; //! input vector vec ut; //! datalink between ut and regressor datalink_buffered ut2rgr; //! datalink between yt and regressor datalink_buffered yt2rgr; //! numeric values of regressor vec rgr; public: void step() { yt2rgr.step(yt); // y is now history ut2rgr.filldown ( ut,rgr ); yt2rgr.filldown ( yt,rgr ); yt=impdf->samplecond ( rgr ); ut2rgr.step(ut); //u is now history } void getdata ( vec &dt_out ) const { bdm_assert_debug(dt_out.length()>=utsize+ytsize,"Short output vector"); dt_out.set_subvector(0, yt); dt_out.set_subvector(ytsize, ut); } void write(const vec &ut0){ut=ut0;} /*! \code class = "MpdfDS"; mpdf = {class="mpdf_offspring", ...}; // mpdf to simulate --- optional --- init_rv = {class="RV",names=...}; // define what rv to initialize - typically delayed values! init_values = [...]; // vector of initial values corresponding to init_rv \endcode If init_rv is not given, init_values are set to zero. */ void from_setting ( const Setting &set ) { impdf=UI::build ( set,"mpdf",UI::compulsory ); Yrv = impdf->_rv(); // get unique rvs form rvc RV rgrv0=impdf->_rvc().remove_time(); // input is what in not in Yrv Urv=rgrv0.subt(Yrv); set_drv(Yrv, Urv); // connect input and output to rvc ut2rgr.set_connection(impdf->_rvc(), Urv); yt2rgr.set_connection(impdf->_rvc(), Yrv); //set history - if given shared_ptr rv_ini=UI::build(set,"init_rv",UI::optional); if(rv_ini){ // check if vec val; UI::get(val, set, "init_values", UI::optional); if (val.length()!=rv_ini->_dsize()){ bdm_error("init_rv and init_values fields have incompatible sizes"); } else { ut2rgr.set_history(*rv_ini, val); yt2rgr.set_history(*rv_ini, val); } } yt = zeros ( impdf->dimension() ); rgr = zeros ( impdf->dimensionc() ); ut = zeros(Urv._dsize()); ytsize=yt.length(); utsize=ut.length(); dtsize = ytsize+utsize; validate(); } void validate() { //taken from sample() - shift of history is not done here ut2rgr.filldown ( ut,rgr ); yt2rgr.filldown ( yt,rgr ); yt=impdf->samplecond ( rgr ); } }; UIREGISTER ( MpdfDS ); /*! Pseudovirtual class for reading data from files */ class FileDS: public MemDS { public: void getdata ( vec &dt ) { dt = Data.get_col ( time ); } void getdata ( vec &dt, const ivec &indices ) { vec tmp = Data.get_col ( time ); dt = tmp ( indices ); } //! returns number of data in the file; int ndat() { return Data.cols(); } //! no sense to log this type void log_register(logger &L, const string &prefix){}; //! no sense to log this type void log_write ( ) const {}; }; /*! * \brief Read Data Matrix from an IT file The constructor creates an internal matrix \c Data from an IT++ file. The file is binary and can be made using the IT++ library or the Matlab/Octave function itsave. NB: the data are stored columnwise, i.e. each column contains the data for time \f$t\f$! */ class ITppFileDS: public FileDS { public: //! Convenience constructor ITppFileDS ( const string &fname, const string &varname ) : FileDS() { it_file it ( fname ); it << Name ( varname ); it >> Data; time = 0; //rowid and delays are ignored }; ITppFileDS () : FileDS() { }; void from_setting ( const Setting &set ); // TODO dodelat void to_setting( Setting &set ) const; }; UIREGISTER ( ITppFileDS ); SHAREDPTR ( ITppFileDS ); /*! * \brief CSV file data storage The constructor creates \c Data matrix from the records in a CSV file \c fname. The orientation can be of two types: 1. \c BY_COL which is default - the data are stored in columns; one column per time \f$t\f$, one row per data item. 2. \c BY_ROW if the data are stored the classical CSV style. Then each column stores the values for data item, for ex. \f$[y_{t} y_{t-1} ...]\f$, one row for each discrete time instant. */ class CsvFileDS: public FileDS { public: //! Constructor - create DS from a CSV file. CsvFileDS ( const string& fname, const string& orientation = "BY_COL" ); }; // ARXDs - DELETED //! State-space data source simulating two densities class stateDS : public DS { private: //!conditional pdf of the state evolution \f$ f(x_t|x_{t-1}) \f$ shared_ptr IM; //!conditional pdf of the observations \f$ f(d_t|x_t) \f$ shared_ptr OM; protected: //! result storage vec dt; //! state storage vec xt; //! input storage vec ut; //! Logger int L_xt; public: void getdata ( vec &dt0 ) { dt0 = dt; } void getdata ( vec &dt0, const ivec &indices ) { dt0 = dt ( indices ); } //! convenience constructor stateDS ( const shared_ptr &IM0, const shared_ptr &OM0, int usize ) : IM ( IM0 ), OM ( OM0 ), dt ( OM0->dimension() ), xt ( IM0->dimension() ), ut ( usize ), L_xt ( 0 ) { } stateDS() : L_xt ( 0 ) { } virtual void step() { xt = IM->samplecond ( concat ( xt, ut ) ); dt = OM->samplecond ( concat ( xt, ut ) ); } virtual void log_register(logger &L, const string &prefix){ DS::log_register ( L, prefix ); //ids 0 and 1 logrec->ids.set_size(3,true);//copy logrec->ids(2)=logrec->L.add ( IM->_rv(), "true" ); } virtual void log_write () { DS::log_write ( ); logrec->L.logit ( logrec->ids(2), xt ); } /*! UI for stateDS The DS is constructed from a structure with fields: \code system = { type = "stateDS"; //Internal model IM = { type = "mpdf"; //<-- valid offspring! e.g. "mlnorm" rv = { //description of x_t names=["name1",...]; sizes=[2,1]; // optional default=[1,1...]; times=[0,0]; // optional default=[0,0...]; } rvu= { //description of u_t //optional default=empty } // remaining fields depending on the chosen type }; //Observation model OM = { type = "mpdf-offspring"; rv = {}; //description of d_t rvu = {type="internal", path="system.IM.rvu"}; //description of u_t //remaining fields } }; \endcode */ void from_setting ( const Setting &set ); // TODO dodelat void to_setting( Setting &set ) const; }; UIREGISTER ( stateDS ); SHAREDPTR ( stateDS ); }; //namespace #endif // DS_H