/*! \file \brief Common DataSources. \author Vaclav Smidl. ----------------------------------- BDM++ - C++ library for Bayesian Decision Making under Uncertainty Using IT++ for numerical operations ----------------------------------- */ #ifndef DATASOURCE_H #define DATASOURCE_H #include "../base/bdmbase.h" #include "../stat/exp_family.h" #include "../base/user_info.h" namespace bdm { /*! * \brief Memory storage of off-line data column-wise The data are stored in an internal matrix \c Data . Each column of Data corresponds to one discrete time observation \f$t\f$. Access to this matrix is via indices \c rowid. The data can be loaded from a file. */ class MemDS : public DS { protected: //! internal matrix of data mat Data; //! active column in the Data matrix int time; public: //!Default constructor MemDS () {}; //! Convenience constructor MemDS ( mat &Dat ); //! returns number of data in the file; int max_length() { return Data.cols(); } void getdata ( vec &dt ) const; void getdata ( vec &dt, const ivec &indices ); void set_drv ( const RV &drv ); void set_drv ( const RV &drv, const RV &urv ) { bdm_error ( "MemDS::urv is not supported" ); } void write ( const vec &ut ) { bdm_error ( "MemDS::write is not supported" ); } void write ( const vec &ut, const ivec &indices ) { bdm_error ( "MemDS::write is not supported" ); } void step(); /*! Create object from the following structure \code { class = "MemDS"; Data = (...); // Data matrix or data vector --- optional --- drv = {class="RV"; ...} // Identification how rows of the matrix Data will be known to others time = 0; // Index of the first column to user_info, } \endcode If the optional fields are not given, they will be filled as follows: \code drv = {names=("ch0", "ch1", "ch2", ..."number_of_rows_of_Data"); sizes=( 1 1 1 ...); times=( 0 0 0 ...); }; time = 0; \endcode If \c rowid is given, \c drv will be named after indices in rowids. Hence the data provided by method \c getdata() will be full column of matrix Data starting from the first record. */ void from_setting ( const Setting &set ); }; UIREGISTER ( MemDS ); /*! Pseudovirtual class for reading data from files */ class FileDS: public MemDS { protected: string filename; public: void from_setting ( const Setting & set ); }; /*! * \brief Read Data Matrix from an IT file The constructor creates an internal matrix \c Data from an IT++ file. The file is binary and can be made using the IT++ library or the Matlab/Octave function itsave. NB: the data are stored columnwise, i.e. each column contains the data for time \f$t\f$! */ class ITppFileDS: public FileDS { public: //! Convenience constructor ITppFileDS ( const string &fname, const string &varname ) : FileDS() { it_file it ( fname ); it << Name ( varname ); it >> Data; time = 0; //delays are ignored }; ITppFileDS () : FileDS() { }; void from_setting ( const Setting &set ); // TODO dodelat void to_setting( Setting &set ) const; }; UIREGISTER ( ITppFileDS ); SHAREDPTR ( ITppFileDS ); /*! * \brief CSV file data storage The constructor creates \c Data matrix from the records in a CSV file \c fname. The orientation can be of two types: 1. \c BY_COL which is default - the data are stored in columns; one column per time \f$t\f$, one row per data item. 2. \c BY_ROW if the data are stored the classical CSV style. Then each column stores the values for data item, for ex. \f$[y_{t} y_{t-1} ...]\f$, one row for each discrete time instant. */ class CsvFileDS: public FileDS { public: void from_setting ( const Setting & set ); }; // ARXDs - DELETED /*! \brief Simulate data from a static pdf (epdf) Trivial example of a data source, could be used for tests of some estimation algorithms. For example, simulating data from a mixture model and feeding them to mixture model estimators. */ class EpdfDS: public DS { protected: //! internal pointer to epdf from which we samplecond shared_ptr iepdf; //! internal storage of data sample vec dt; public: void step() { dt = iepdf->sample(); } void getdata ( vec &dt_out ) const { dt_out = dt; } void getdata ( vec &dt_out, const ivec &ids ) { dt_out = dt ( ids ); } const RV& _drv() const { return iepdf->_rv(); } //! Accepts action variable and schedule it for application. virtual void write ( const vec &ut ) NOT_IMPLEMENTED_VOID; //! Accepts action variables at specific indices virtual void write ( const vec &ut, const ivec &indices ) NOT_IMPLEMENTED_VOID; /*! \code class = "EpdfDS"; epdf = {class="epdf_offspring", ...}// uncondtitional density to sample from \endcode */ void from_setting ( const Setting &set ) { iepdf = UI::build ( set, "epdf", UI::compulsory ); bdm_assert ( iepdf->isnamed(), "Input epdf must be named, check if RV is given correctly" ); dt = zeros ( iepdf->dimension() ); dtsize = dt.length(); set_drv ( iepdf->_rv(), RV() ); utsize = 0; } void validate() { DS::validate(); dt = iepdf->sample(); } }; UIREGISTER ( EpdfDS ); /*! \brief Simulate data from conditional density Still having only one density but allowing conditioning on either input or delayed values. */ class PdfDS : public DS { protected: //! internal pointer to epdf from which we samplecond shared_ptr ipdf; //! internal storage of data sample vec yt; //! input vector vec ut; //! datalink between ut and regressor datalink_buffered ut2rgr; //! datalink between yt and regressor datalink_buffered yt2rgr; //! numeric values of regressor vec rgr; public: void step(); void getdata ( vec &dt_out ) const; void write ( const vec &ut0 ) { ut = ut0; } void write ( const vec &ut0, const ivec &ind ) { set_subvector ( ut, ind, ut0 ); } //! Returns data records at indices. virtual void getdata ( vec &dt, const ivec &indices ) NOT_IMPLEMENTED_VOID; /*! \code class = "PdfDS"; pdf = {class="pdf_offspring", ...}; // pdf to simulate --- optional --- init_rv = {class="RV",names=...}; // define what rv to initialize - typically delayed values! init_values = [...]; // vector of initial values corresponding to init_rv \endcode If init_rv is not given, init_values are set to zero. */ void from_setting ( const Setting &set ) { ipdf = UI::build ( set, "pdf", UI::compulsory ); RV Yrv = ipdf->_rv(); // get unique rvs form rvc RV rgrv0 = ipdf->_rvc().remove_time(); // input is what in not in Yrv Urv = rgrv0.subt ( Yrv ); set_drv ( concat(Yrv,Urv), Urv ); // connect input and output to rvc ut2rgr.set_connection ( ipdf->_rvc(), Urv ); yt2rgr.set_connection ( ipdf->_rvc(), Yrv ); //set history - if given shared_ptr rv_ini = UI::build ( set, "init_rv", UI::optional ); if ( rv_ini ) { // check if vec val; UI::get ( val, set, "init_values", UI::optional ); if ( val.length() != rv_ini->_dsize() ) { bdm_error ( "init_rv and init_values fields have incompatible sizes" ); } else { ut2rgr.set_history ( *rv_ini, val ); yt2rgr.set_history ( *rv_ini, val ); } } yt = zeros ( ipdf->dimension() ); rgr = zeros ( ipdf->dimensionc() ); ut = zeros ( Urv._dsize() ); utsize = ut.length(); dtsize = yt.length() + utsize; } void validate() { DS::validate(); //taken from sample() - shift of history is not done here ut2rgr.filldown ( ut, rgr ); yt2rgr.filldown ( yt, rgr ); yt = ipdf->samplecond ( rgr ); } }; UIREGISTER ( PdfDS ); //! State-space data source simulating two densities class StateDS : public DS { protected: //!conditional pdf of the state evolution \f$ f(x_t|x_{t-1}) \f$ shared_ptr IM; //!conditional pdf of the observations \f$ f(d_t|x_t) \f$ shared_ptr OM; //! result storage vec dt; //! state storage vec xt; //! input storage vec ut; //! datalink from ut to IM.rvc datalink_part u2imc; //! datalink from ut to OM.rvc datalink_part u2omc; public: void getdata ( vec &dt0 ) const { dt0 = dt; } void write ( const vec &ut0 ) { ut = ut0; } void getdata ( vec &dt0, const ivec &indices ) { dt0 = dt ( indices ); } virtual void step(); //! set parameters void set_parameters ( shared_ptr IM0, shared_ptr OM0 ) { IM = IM0; OM = OM0; } void set_initx ( const vec &x0 ) { xt = x0; } virtual void write ( const vec &ut, const ivec &indices ) NOT_IMPLEMENTED_VOID; /*! UI for stateDS The DS is constructed from a structure with fields: \code class = "stateDS"; //Internal model IM = { type = "pdf-offspring"; }; //Observation model OM = { type = "pdf-offspring"; } //initial state x0 = [...]; //vector of data \endcode Both models must have defined \c rv. and \c rvc Random variables not found in any rv are considered to be inputs. */ void from_setting ( const Setting &set ); // TODO dodelat void to_setting( Setting &set ) const; void validate(); }; UIREGISTER ( StateDS ); SHAREDPTR ( StateDS ); }; //namespace #endif // DS_H