/*! \file \brief Common DataSources. \author Vaclav Smidl. ----------------------------------- BDM++ - C++ library for Bayesian Decision Making under Uncertainty Using IT++ for numerical operations ----------------------------------- */ #ifndef DATASOURCE_H #define DATASOURCE_H #include "../base/bdmbase.h" #include "../stat/exp_family.h" #include "../base/user_info.h" namespace bdm { /*! * \brief Memory storage of off-line data column-wise (used mostly in C++) The data are stored in an internal matrix \c Data . Each column of Data corresponds to one discrete time observation \f$t\f$. */ class MemDS : public DS { protected: //! internal matrix of data mat Data; //! active column in the Data matrix int time; public: //!Default constructor MemDS () {}; //! Convenience constructor MemDS ( mat &Dat ); //! returns number of data in the file; int max_length() { return Data.cols(); } void getdata ( vec &dt ) const; void getdata ( vec &dt, const ivec &indices ); void write ( const vec &ut ) { if (ut.size()>0) { bdm_error ( "MemDS::write is not supported" ); } } void write ( const vec &ut, const ivec &indices ) { if (ut.size()>0) { bdm_error ( "MemDS::write is not supported" ); } } void step(); /*! Create object from the following structure \code class = 'MemDS'; Data = [...]; % Data matrix with records stored in columns --- optional fields --- time = 0; % Index of the first column in the matrix Data --- inherited fields --- bdm::DS::from_setting \endcode If the optional fields are not given, they will be filled as follows: \code drv.names = { "" }; % empty name drv.sizes = { no_rows_Data }; % full size of the record time = 0; \endcode */ void from_setting ( const Setting &set ); void validate(); }; UIREGISTER ( MemDS ); /*! \brief Pseudovirtual class for reading data from files Common predecessord to various file formats. */ class FileDS: public MemDS { protected: string filename; public: /*! Create object from the following structure \code filename = 'data_file.ext'; % name of the file where the data sare stored --- inherited fields --- bdm::DS::from_setting \endcode */ void from_setting ( const Setting & set ); }; /*! * \brief Read Data Matrix from an IT file The constructor creates an internal matrix \c Data from an IT++ file. The file is binary and can be made using the IT++ library or the Matlab/Octave function itsave. NB: the data are stored columnwise, i.e. each column contains the data for time \f$t\f$! */ class ITppFileDS: public FileDS { public: //! Convenience constructor ITppFileDS ( const string &fname, const string &varname ) : FileDS() { it_file it ( fname ); it << Name ( varname ); it >> Data; time = 0; //delays are ignored }; ITppFileDS () : FileDS() { }; /*! Create object from the following structure \code class = 'ITppFileDS'; filename = 'file_with_data.it'; varname = 'Data1'; // Name of a variable in which the data are stored --- inherited fields --- bdm::DS::from_setting \endcode */ void from_setting ( const Setting &set ); // TODO dodelat void to_setting( Setting &set ) const; }; UIREGISTER ( ITppFileDS ); SHAREDPTR ( ITppFileDS ); /*! * \brief CSV file data storage The constructor creates \c Data matrix from the records in a CSV file \c fname. The orientation can be of two types: -# \c BY_COL which is default - the data are stored in columns; one column per time \f$t\f$, one row per data item. -# \c BY_ROW if the data are stored the classical CSV style. Then each column stores the values for data item, for ex. \f$[y_{t} y_{t-1} ...]\f$, one row for each discrete time instant. */ class CsvFileDS: public FileDS { public: /*! Create object from the following structure \code class = 'CsvFileDS'; filename = 'file.csv'; orientation = 'BY_ROW' or 'BY_COL'; % data records are stored in rows of columns (default). --- inherited fields --- bdm::DS::from_setting \endcode */ void from_setting ( const Setting & set ); }; // ARXDs - DELETED /*! \brief Simulate data from a static pdf (epdf) Trivial example of a data source, could be used for tests of some estimation algorithms. For example, simulating data from a mixture model and feeding them to mixture model estimators. */ class EpdfDS: public DS { protected: //! internal pointer to epdf from which we samplecond shared_ptr iepdf; //! internal storage of data sample vec dt; public: void step() { dt = iepdf->sample(); } void getdata ( vec &dt_out ) const { dt_out = dt; } void getdata ( vec &dt_out, const ivec &ids ) { dt_out = dt ( ids ); } const RV& _drv() const { return iepdf->_rv(); } //! Accepts action variable and schedule it for application. virtual void write ( const vec &ut ) NOT_IMPLEMENTED_VOID; //! Accepts action variables at specific indices virtual void write ( const vec &ut, const ivec &indices ) NOT_IMPLEMENTED_VOID; /*! Create object from the following structure \code class = 'EpdfDS'; epdf = configuration of bdm::epdf % unconditional density to sample from, bdm::epdf::from_setting \endcode Does not inherit from bdm::DS::from_setting, names of data are taken from epdf.rv. */ void from_setting ( const Setting &set ) { iepdf = UI::build ( set, "epdf", UI::compulsory ); bdm_assert ( iepdf->isnamed(), "Input epdf must be named, check if RV is given correctly" ); dt = zeros ( iepdf->dimension() ); dtsize = dt.length(); Drv = iepdf->_rv(); utsize = 0; } void validate() { DS::validate(); dt = iepdf->sample(); } }; UIREGISTER ( EpdfDS ); /*! \brief Simulate data from conditional density Data are simulated by sampling from conditional pdf. The condition can be of two types: -# delayed output values: these are cached internally, initiated via \c init_rv and \c init_values. -# input values: remaining RVs in the rvc of the given pdf are considered to be inputs and are declared as \c urv Note: a classical state-space model can be simulated as an instance of this case since it can be seen as a chain rule on state-evolution pdf and observation pdf. See class bdm::mprod and other offsprings of pdf. */ class PdfDS : public DS { public: //! internal pointer to epdf from which we samplecond shared_ptr ipdf; protected: //! internal storage of data sample vec yt; //! input vector vec ut; //! datalink between ut and regressor datalink_buffered ut2rgr; //! datalink between yt and regressor datalink_buffered yt2rgr; //! numeric values of regressor vec rgr; //! RV for initial conditions RV init_rv; //! initial conditions vec init_values; public: void step(); void getdata ( vec &dt_out ) const; void write ( const vec &ut0 ) { ut = ut0; } void write ( const vec &ut0, const ivec &ind ) { set_subvector ( ut, ind, ut0 ); } //! Returns data records at indices. virtual void getdata ( vec &dt, const ivec &indices ) NOT_IMPLEMENTED_VOID; /*! Create object from the following structure \code class = 'PdfDS'; pdf = configuration of bdm::pdf; % any offspring of pdf, bdm::pdf::from_setting --- optional fields --- init_rv = RV({'names',...},[sizes,...],[times,...]); % define what rv to initialize - typically delayed values, time=-1, etc.! init_values = [...]; % vector of initial values corresponding to init_rv \endcode Class does not call from bdm::DS::from_setting, names of data are taken from pdf.rv and pdf.rvc. If init_rv is not given, init_values are set to zero. */ void from_setting ( const Setting &set ) { ipdf = UI::build ( set, "pdf", UI::compulsory ); UI::get ( init_rv, set, "init_rv", UI::optional); UI::get ( init_values, set, "init_values", UI::optional); } void validate() { RV Yrv = ipdf->_rv(); // get unique rvs form rvc RV rgrv0 = ipdf->_rvc().remove_time(); // input is what in not in Yrv Urv = rgrv0.subt ( Yrv ); Drv= concat(Yrv,Urv); // connect input and output to rvc ut2rgr.set_connection ( ipdf->_rvc(), Urv ); yt2rgr.set_connection ( ipdf->_rvc(), Yrv ); if ( init_rv._dsize()>0 ) { // check if if ( init_values.length() != init_rv._dsize() ) { bdm_error ( "init_rv and init_values fields have incompatible sizes" ); } else { ut2rgr.set_history ( init_rv, init_values ); yt2rgr.set_history ( init_rv, init_values ); } } yt = zeros ( ipdf->dimension() ); rgr = zeros ( ipdf->dimensionc() ); ut = zeros ( Urv._dsize() ); utsize = ut.length(); dtsize = yt.length() + utsize; DS::validate(); //taken from sample() - shift of history is not done here ut2rgr.filldown ( ut, rgr ); yt2rgr.filldown ( yt, rgr ); yt = ipdf->samplecond ( rgr ); } }; UIREGISTER ( PdfDS ); }; //namespace #endif // DS_H