/*! \file \brief Common DataSources. \author Vaclav Smidl. ----------------------------------- BDM++ - C++ library for Bayesian Decision Making under Uncertainty Using IT++ for numerical operations ----------------------------------- */ #ifndef DATASOURCE_H #define DATASOURCE_H #include "../base/bdmbase.h" #include "../stat/exp_family.h" #include "../base/user_info.h" namespace bdm { /*! * \brief Memory storage of off-line data column-wise (used mostly in C++) The data are stored in an internal matrix \c Data . Each column of Data corresponds to one discrete time observation \f$t\f$. */ class MemDS : public DS { protected: //! internal matrix of data mat Data; //! active column in the Data matrix int time; public: //!Default constructor MemDS () {}; //! Convenience constructor MemDS ( mat &Dat ); //! returns number of data in the file; int max_length() { return Data.cols(); } void getdata ( vec &dt ) const; void getdata ( vec &dt, const ivec &indices ); void write ( const vec &ut ) { if (ut.size()>0){ bdm_error ( "MemDS::write is not supported" ); } } void write ( const vec &ut, const ivec &indices ) { if (ut.size()>0){ bdm_error ( "MemDS::write is not supported" ); } } void step(); /*! Create object from the following structure \code class = 'MemDS'; Data = [...]; // Data matrix or data vector --- optional --- time = 0; // Index of the first column in the matrix Data --- fields from bdm::DS::from_setting --- \endcode If the optional fields are not given, they will be filled as follows: \code drv.names=(""); drv.sizes=( no_rows_Data ); time = 0; \endcode If \c rowid is given, \c drv will be named after indices in rowids. */ void from_setting ( const Setting &set ); void validate(); }; UIREGISTER ( MemDS ); /*! \brief Pseudovirtual class for reading data from files Common predecessord to various file formats. */ class FileDS: public MemDS { protected: string filename; public: /*! reads what DS::from_setting \code filename = 'data_file.ext'; // name of the file where the data sare stored --- fields from bdm::DS --- \endcode */ void from_setting ( const Setting & set ); }; /*! * \brief Read Data Matrix from an IT file The constructor creates an internal matrix \c Data from an IT++ file. The file is binary and can be made using the IT++ library or the Matlab/Octave function itsave. NB: the data are stored columnwise, i.e. each column contains the data for time \f$t\f$! */ class ITppFileDS: public FileDS { public: //! Convenience constructor ITppFileDS ( const string &fname, const string &varname ) : FileDS() { it_file it ( fname ); it << Name ( varname ); it >> Data; time = 0; //delays are ignored }; ITppFileDS () : FileDS() { }; /*! \code class = 'ITppFileDS'; filename = 'file_with_data.it'; varname = 'Data1'; // Name of a variable in which are th data stored \endcode */ void from_setting ( const Setting &set ); // TODO dodelat void to_setting( Setting &set ) const; }; UIREGISTER ( ITppFileDS ); SHAREDPTR ( ITppFileDS ); /*! * \brief CSV file data storage The constructor creates \c Data matrix from the records in a CSV file \c fname. The orientation can be of two types: 1. \c BY_COL which is default - the data are stored in columns; one column per time \f$t\f$, one row per data item. 2. \c BY_ROW if the data are stored the classical CSV style. Then each column stores the values for data item, for ex. \f$[y_{t} y_{t-1} ...]\f$, one row for each discrete time instant. */ class CsvFileDS: public FileDS { public: void from_setting ( const Setting & set ); }; // ARXDs - DELETED /*! \brief Simulate data from a static pdf (epdf) Trivial example of a data source, could be used for tests of some estimation algorithms. For example, simulating data from a mixture model and feeding them to mixture model estimators. */ class EpdfDS: public DS { protected: //! internal pointer to epdf from which we samplecond shared_ptr iepdf; //! internal storage of data sample vec dt; public: void step() { dt = iepdf->sample(); } void getdata ( vec &dt_out ) const { dt_out = dt; } void getdata ( vec &dt_out, const ivec &ids ) { dt_out = dt ( ids ); } const RV& _drv() const { return iepdf->_rv(); } //! Accepts action variable and schedule it for application. virtual void write ( const vec &ut ) NOT_IMPLEMENTED_VOID; //! Accepts action variables at specific indices virtual void write ( const vec &ut, const ivec &indices ) NOT_IMPLEMENTED_VOID; /*! \code class = "EpdfDS"; epdf = {class="epdf_offspring", ...}// uncondtitional density to sample from \endcode */ void from_setting ( const Setting &set ) { iepdf = UI::build ( set, "epdf", UI::compulsory ); bdm_assert ( iepdf->isnamed(), "Input epdf must be named, check if RV is given correctly" ); dt = zeros ( iepdf->dimension() ); dtsize = dt.length(); Drv = iepdf->_rv(); utsize = 0; } void validate() { DS::validate(); dt = iepdf->sample(); } }; UIREGISTER ( EpdfDS ); /*! \brief Simulate data from conditional density Data are simulated by sampling from conditional pdf. The condition can be of two types: -# delayed output values: these are cached internally, initiated via \c init_rv and \c init_values. -# input values: remaining RVs in the rvc of the given pdf are considered to be inputs and are declared as \c urv Note: a classical state-space model can be simulated as an instance of this case since it can be seen as a chain rule on state-evolution pdf and observation pdf. See class bdm::mprod and other offsprings of pdf. */ class PdfDS : public DS { protected: //! internal pointer to epdf from which we samplecond shared_ptr ipdf; //! internal storage of data sample vec yt; //! input vector vec ut; //! datalink between ut and regressor datalink_buffered ut2rgr; //! datalink between yt and regressor datalink_buffered yt2rgr; //! numeric values of regressor vec rgr; public: void step(); void getdata ( vec &dt_out ) const; void write ( const vec &ut0 ) { ut = ut0; } void write ( const vec &ut0, const ivec &ind ) { set_subvector ( ut, ind, ut0 ); } //! Returns data records at indices. virtual void getdata ( vec &dt, const ivec &indices ) NOT_IMPLEMENTED_VOID; /*! \code class = "PdfDS"; pdf = bdm::pdf::from_setting; // pdf to simulate, any offspring of pdf --- optional --- init_rv = bdm::RV::from_setting; // define what rv to initialize - typically delayed values! init_values = [...]; // vector of initial values corresponding to init_rv \endcode If init_rv is not given, init_values are set to zero. */ void from_setting ( const Setting &set ) { ipdf = UI::build ( set, "pdf", UI::compulsory ); RV Yrv = ipdf->_rv(); // get unique rvs form rvc RV rgrv0 = ipdf->_rvc().remove_time(); // input is what in not in Yrv Urv = rgrv0.subt ( Yrv ); Drv= concat(Yrv,Urv); // connect input and output to rvc ut2rgr.set_connection ( ipdf->_rvc(), Urv ); yt2rgr.set_connection ( ipdf->_rvc(), Yrv ); //set history - if given shared_ptr rv_ini = UI::build ( set, "init_rv", UI::optional ); if ( rv_ini ) { // check if vec val; UI::get ( val, set, "init_values", UI::optional ); if ( val.length() != rv_ini->_dsize() ) { bdm_error ( "init_rv and init_values fields have incompatible sizes" ); } else { ut2rgr.set_history ( *rv_ini, val ); yt2rgr.set_history ( *rv_ini, val ); } } yt = zeros ( ipdf->dimension() ); rgr = zeros ( ipdf->dimensionc() ); ut = zeros ( Urv._dsize() ); utsize = ut.length(); dtsize = yt.length() + utsize; } void validate() { DS::validate(); //taken from sample() - shift of history is not done here ut2rgr.filldown ( ut, rgr ); yt2rgr.filldown ( yt, rgr ); yt = ipdf->samplecond ( rgr ); } }; UIREGISTER ( PdfDS ); }; //namespace #endif // DS_H