/*! \file \brief Basic structures of probability calculus: random variables, probability densities, Bayes rule \author Vaclav Smidl. ----------------------------------- BDM++ - C++ library for Bayesian Decision Making under Uncertainty Using IT++ for numerical operations ----------------------------------- */ #ifndef BDMBASE_H #define BDMBASE_H #include #include "../itpp_ext.h" #include "../bdmroot.h" #include "../shared_ptr.h" #include "user_info.h" using namespace libconfig; using namespace itpp; using namespace std; namespace bdm { //! Structure of RV, i.e. RVs expanded into a flat list of IDs, used for debugging. class str { public: //! vector id ids (non-unique!) ivec ids; //! vector of times ivec times; //!Default constructor str ( ivec ids0, ivec times0 ) : ids ( ids0 ), times ( times0 ) { bdm_assert ( times0.length() == ids0.length(), "Incompatible input" ); }; }; /*! * \brief Class representing variables, most often random variables The purpose of this class is to decribe a vector of data. Such description is used for connecting various vectors between each other, see class datalink. The class is implemented using global variables to assure uniqueness of description: In is a vector \dot digraph datalink { rankdir=LR; subgraph cluster0 { node [shape=record]; label = "MAP \n std::map"; map [label="{{\"a\"| \"b\" | \"c\"} | {<3> 3 |<1> 1|<2> 2}}"]; color = "white" } subgraph cluster1{ node [shape=record]; label = "NAMES"; names [label="{<1> \"b\" | <2> \"c\" | <3>\"a\" }"]; color = "white" } subgraph cluster2{ node [shape=record]; label = "SIZES"; labelloc = b; sizes [label="{<1>1 |<2> 4 |<3> 1}"]; color = "white" } map:1 -> names:1; map:1 -> sizes:1; map:3 -> names:3; map:3 -> sizes:3; } \enddot */ class RV : public root { private: typedef std::map str2int_map; //! Internal global variable storing sizes of RVs static ivec SIZES; //! Internal global variable storing names of RVs static Array NAMES; //! TODO const static int BUFFER_STEP; //! TODO static str2int_map MAP; public: protected: //! size of the data vector int dsize; //! number of individual rvs int len; //! Vector of unique IDs ivec ids; //! Vector of shifts from current time ivec times; private: enum enum_dummy {dummy}; //! auxiliary function used in constructor void init( const Array &in_names, const ivec &in_sizes, const ivec &in_times ); //! auxiliary function assigning unique integer index related to the passed name and size of the random variable int assign_id( const string &name, int size ); //! Private constructor from IDs, potentially dangerous since all ids must be valid! //! dummy is there to prevent confusion with RV(" string"); explicit RV ( const ivec &ids0, enum_dummy dum ) : dsize ( 0 ), len ( ids0.length() ), ids ( ids0 ), times ( zeros_i ( ids0.length() ) ) { dsize = countsize(); } public: //! \name Constructors //!@{ //! Full constructor RV ( const Array &in_names, const ivec &in_sizes, const ivec &in_times ) { init ( in_names, in_sizes, in_times ); } //! Constructor with times=0 RV ( const Array &in_names, const ivec &in_sizes ) { init ( in_names, in_sizes, zeros_i ( in_names.length() ) ); } //! Constructor with sizes=1, times=0 RV ( const Array &in_names ) { init ( in_names, ones_i ( in_names.length() ), zeros_i ( in_names.length() ) ); } //! Constructor of empty RV RV() : dsize ( 0 ), len ( 0 ), ids ( 0 ), times ( 0 ) {} //! Constructor of a single RV RV ( string name, int sz, int tm = 0 ); //! Constructor of a single nameless RV RV ( int sz, int tm = 0 ); // compiler-generated copy constructor is used //!@} //! \name Access functions //!@{ //! State output, e.g. for debugging. friend std::ostream &operator<< ( std::ostream &os, const RV &rv ); string to_string() const { ostringstream o; o << *this; return o.str(); } //! total size of a random variable int _dsize() const { return dsize; } //! access function const ivec& _ids() const { return ids; } //! Recount size of the corresponding data vector int countsize() const; //! Vector of cumulative sizes of RV ivec cumsizes() const; //! Number of named parts int length() const { return len; } int id ( int at ) const { return ids ( at ); } int size ( int at ) const { return SIZES ( ids ( at ) ); } int time ( int at ) const { return times ( at ); } std::string name ( int at ) const { return NAMES ( ids ( at ) ); } //! returns name of a scalar at position scalat, i.e. it can be in the middle of vector name, in that case it adds "_%d" to it std::string scalarname ( int scalat ) const; void set_time ( int at, int time0 ) { times ( at ) = time0; } //!@} //! \name Algebra on Random Variables //!@{ //! Find indices of self in another rv, \return ivec of the same size as self. ivec findself ( const RV &rv2 ) const; //! Find indices of self in another rv, ignore time, \return ivec of the same size as self. ivec findself_ids ( const RV &rv2 ) const; //! Compare if \c rv2 is identical to this \c RV bool equal ( const RV &rv2 ) const; //! Add (concat) another variable to the current one, \return true if all rv2 were added, false if rv2 is in conflict bool add ( const RV &rv2 ); //! Subtract another variable from the current one RV subt ( const RV &rv2 ) const; //! Select only variables at indices ind RV subselect ( const ivec &ind ) const; //! Select only variables at indices ind RV operator() ( const ivec &ind ) const { return subselect ( ind ); } //! Select from data vector starting at di1 to di2 RV operator() ( int di1, int di2 ) const; //! Shift \c time by delta. void t_plus ( int delta ); //!@} //! @{ \name Time manipulation functions //! returns rvs with time set to 0 and removed duplicates RV remove_time() const { return RV ( unique ( ids ), dummy ); } //! create new RV from the current one with time shifted by given value RV copy_t ( int dt ) const { RV tmp = *this; tmp.t_plus ( dt ); return tmp; } //! return rvs with expanded delayes and sorted in the order of: \f$ [ rv_{0}, rv_{-1},\ldots rv_{max_delay}]\f$ RV expand_delayes() const; //!@} //!\name Relation to vectors //!@{ //! generate \c str from rv, by expanding sizes str tostr() const; //! when this rv is a part of bigger rv, this function returns indices of self in the data vector of the bigger crv. //! Then, data can be copied via: data_of_this = cdata(ind); ivec dataind ( const RV &crv ) const; //! same as dataind but this time crv should not be complete supperset of rv. ivec dataind_part ( const RV &crv ) const; //! generate mutual indices when copying data between self and crv. //! Data are copied via: data_of_this(selfi) = data_of_rv2(rv2i) void dataind ( const RV &rv2, ivec &selfi, ivec &rv2i ) const; //! Minimum time-offset int mint() const { return times.length() > 0 ? min ( times ) : 0; } //! Minimum time-offset of ids of given RVs int mint ( const RV &rv ) const { bvec belong = zeros_b ( len ); for ( int r = 0; r < rv.length(); r++ ) { belong = belong | ( ids == rv.id ( r ) ); } return times.length() > 0 ? min ( get_from_bvec ( times, belong ) ) : 0; } //!@} /*! \brief UI for class RV (description of data vectors) \code class = 'RV'; names = {'a', 'b', 'c', ...}; // UNIQUE IDENTIFIER same names = same variable // names are also used when storing results --- optional --- sizes = [1, 2, 3, ...]; // size of each name. default = ones() // if size = -1, it is found out from previous instances of the same name times = [-1, -2, 0, ...]; // time shifts with respect to current time, default = zeros() \endcode */ void from_setting ( const Setting &set ); void to_setting ( Setting &set ) const; //! Invalidate all named RVs. Use before initializing any RV instances, with care... static void clear_all(); //! function for debugging RV related stuff string show_all(); }; UIREGISTER ( RV ); SHAREDPTR ( RV ); //! Concat two random variables RV concat ( const RV &rv1, const RV &rv2 ); /*! @brief Class for storing results (and semi-results) of an experiment This class abstracts logging of results from implementation. This class replaces direct logging of results (e.g. to files or to global variables) by calling methods of a logger. Specializations of this abstract class for specific storage method are designed. */ class logger : public root { protected: //! RVs of all logged variables. Array entries; //! Names of logged quantities, e.g. names of algorithm variants Array names; //! Root Setting for storing Settings Config setting_conf; //! list of Settings for specific ids Array settings; //! log this instance to Setting //! //! this method has to be called only through \c log_level class to assure the validity of the passed id template void log_setting ( int id, const U data ) { UI::save(data, *settings ( id ) ); } //! log this vector //! //! this method has to be called only through \c log_level class to assure the validity of the passed id virtual void log_vector ( int id, const vec &v ) NOT_IMPLEMENTED_VOID; //! log this double //! //! this method has to be called only through \c log_level class to assure the validity of the passed id virtual void log_double ( int id, const double &d ) NOT_IMPLEMENTED_VOID; //! it is necessary to allow log_levels to call log_setting, log_vector and log_double methods template friend class log_level_template; public: //!separator of prefixes of entries //! //! It is a constant string, thus it can be safely declared as public without creating any accessor method const string separator; //!Default constructor logger ( const string separator ) : entries ( 0 ), names ( 0 ), separator ( separator ) {} //!Destructor calls the finalize method ~logger() { finalize(); } //! sets up the ids identifier in the passed log_level instance to permit future calls of the log_level_template::store(...) method //! //! It also sets a pointer to logger or justify it is correctly assigned from previous call to this procedure //! Entries with empty RV will be ignored //! //! passing the last parameter \c enum_subindex one can store multiple vectors in the position of one enum template void add_vector ( log_level_base &log_level, enum T::log_level_enums const log_level_enum, const RV &rv, const string &prefix, int enum_subindex = 0 ) { if( !log_level.registered_logger ) log_level.registered_logger = this; else bdm_assert_debug ( log_level.registered_logger == this, "This log_level is already registered to another logger!"); if ( rv._dsize() == 0 ) return; int id = entries.length(); string adjusted_name = log_level.store_id_and_give_name( log_level_enum, enum_subindex, id ); names = concat ( names, prefix + separator + adjusted_name ); // diff entries.set_length ( id + 1, true ); entries ( id ) = rv; } //! sets up the ids identifier in the passed log_level instance to permit future calls of the log_level_template::store(...) method //! //! It also sets a pointer to logger or justify it is correctly assigned from previous call to this procedure //! //! To allow both arguments log_level and log_level_enum be templated, it was necessary to declare log_level_base class. //! This way we check compatibility of the passed log_level and log_level_enum, which would be impossible using just log_level_base class //! here. //! //! //! passing the last parameter \c enum_subindex one can store multiple settings in the position of one enum template void add_setting ( log_level_base &log_level, enum T::log_level_enums const log_level_enum, const string &prefix, int enum_subindex = 0 ) { if( !log_level.registered_logger ) log_level.registered_logger = this; else bdm_assert_debug ( log_level.registered_logger == this, "This log_level is already registered to another logger!"); Setting &root = setting_conf.getRoot(); int id = root.getLength(); //root must be group!! string adjusted_name = log_level.store_id_and_give_name( log_level_enum, enum_subindex, id ); settings.set_length ( id + 1, true ); settings ( id ) = &root.add ( prefix + separator + adjusted_name, Setting::TypeList ); } //! Shifts storage position for another time step. virtual void step() = 0; //! Finalize storing information //! //! This method is called either directly or via destructor ~logger(), therefore it has to permit repetitive calls for the case it is called twice virtual void finalize() {}; //! Initialize the storage virtual void init() {}; }; //! This class stores a details that will be logged to a logger template class log_level_template : public log_level_base { public: //! This method stores a vector to the proper place in registered logger //! //! parameter \c enum_subindex identifies the precise position of vector in the case there is more vectors registered to with this enum void store( const enum T::log_level_enums log_level_enum, const vec &vect, int enum_subindex = 0 ) const { bdm_assert_debug( this->registered_logger != NULL, "You have to register instance to a logger first! Use root::log_register(...) method."); bdm_assert_debug( ids( log_level_enum )( enum_subindex ) >= 0, "This particular vector was not added to logger! Use logger::add_vector(...) method."); this->registered_logger->log_vector( ids( log_level_enum )( enum_subindex ), vect ); } //! This method stores a double to the proper place in registered logger //! //! parameter \c enum_subindex identifies the precise position of double in the case there is more doubles registered to with this enum void store( const enum T::log_level_enums log_level_enum, const double &dbl, int enum_subindex = 0 ) const { bdm_assert_debug( this->registered_logger != NULL, "You have to register instance to a logger first! See root::log_register(...) method."); bdm_assert_debug( ids( log_level_enum )( enum_subindex ) >= 0, "This particular double was not added to logger! Use logger::add_vector(...) method."); this->registered_logger->log_double( ids( log_level_enum )( enum_subindex ), dbl ); } //! This method stores a Setting obtained by call of UI::save( data, .. ) to the proper place in registered logger //! //! parameter \c enum_subindex identifies the precise position of setting in the case there is more settings registered to with this enum //! //! If this method was not templated, we could store whole body of this class in cpp file without explicitly touching registered_logger->log_setting(...) here. //! (it would not be straightforward, though, still there are some enums which had to be converted into integers but it could be done without loosing type control) //! This way, it would not be necessary to declare log_level_base class and we could declare log_level_template //! before the logger class itself with a mere foroward declaration of logger. In our case, however, touching of registered_logger->log_setting //! implies that forward declaration is not enough and we are lost in a circle. And just by cutting this circle we obtains log_level_base class. template void store( const enum T::log_level_enums log_level_enum, const U data, int enum_subindex = 0 ) const { bdm_assert_debug( this->registered_logger != NULL, "You have to register instance to a logger first! See root::log_register(...) method."); bdm_assert_debug( ids( log_level_enum )(enum_subindex ) >= 0, "This particular vector was not added to logger! Use logger::add_setting(...) method."); this->registered_logger->log_setting( ids( log_level_enum )( enum_subindex ), data); } }; /*! \def LOG_LEVEL(classname,...) \brief Macro for defining a log_level attribute with a specific set of enumerations related to a specific class This macro has to be called within a class declaration. Its argument \a classname has to correspond to that wrapping class. This macro defines a log_level instance which can be modified either directly or by the means of #UI class. One of the main purposes of this macro is to allow variability in using enumerations. By relating them to their names through an array of strings, we are no more dependant on their precise ordering. What is more, we can add or remove any without harming any applications which are using this library. \todo Write a more detailed explanation including also examples \ref ui */ #define LOG_LEVEL(classname,...) public: enum log_level_enums { __VA_ARGS__ }; log_level_template log_level; private: friend class log_level_template; friend class log_level_base; static const Array &log_level_names() { static const Array log_level_names = log_level_base::string2Array( #__VA_ARGS__ ); return log_level_names; } //! Class representing function \f$f(x)\f$ of variable \f$x\f$ represented by \c rv class fnc : public root { protected: //! Length of the output vector int dimy; //! Length of the input vector int dimc; public: //!default constructor fnc() {}; //! function evaluates numerical value of \f$f(x)\f$ at \f$x=\f$ \c cond virtual vec eval ( const vec &cond ) { return vec ( 0 ); }; //! access function int dimension() const { return dimy; } //! access function int dimensionc() const { return dimc; } void from_setting(const Setting &set){ UI::get(dimy, set, "dim", UI::optional); UI::get(dimc, set, "dimc", UI::optional); } }; class epdf; //! Conditional probability density, e.g. modeling \f$ f( x | y) \f$, where \f$ x \f$ is random variable, \c rv, and \f$ y \f$ is conditioning variable, \c rvc. class pdf : public root { protected: //!dimension of the condition int dimc; //! random variable in condition RV rvc; //! dimension of random variable int dim; //! random variable RV rv; public: //! \name Constructors //! @{ pdf() : dimc ( 0 ), rvc(), dim ( 0 ), rv() { } pdf ( const pdf &m ) : dimc ( m.dimc ), rvc ( m.rvc ), dim ( m.dim ), rv ( m.rv ) { } //!@} //! \name Matematical operations //!@{ //! Returns a sample from the density conditioned on \c cond, \f$x \sim epdf(rv|cond)\f$. \param cond is numeric value of \c rv virtual vec samplecond ( const vec &cond ) = 0; //! Returns \param N samples from the density conditioned on \c cond, \f$x \sim epdf(rv|cond)\f$. \param cond is numeric value of \c rv virtual mat samplecond_mat ( const vec &cond, int N ); //! Shortcut for conditioning and evaluation of the internal epdf. In some cases, this operation can be implemented efficiently. virtual double evallogcond ( const vec &yt, const vec &cond ) = 0; //! Matrix version of evallogcond virtual vec evallogcond_mat ( const mat &Yt, const vec &cond ) { vec v ( Yt.cols() ); for ( int i = 0; i < Yt.cols(); i++ ) { v ( i ) = evallogcond ( Yt.get_col ( i ), cond ); } return v; } //! Array version of evallogcond virtual vec evallogcond_mat ( const Array &Yt, const vec &cond ) { vec v ( Yt.length() ); for ( int i = 0; i < Yt.length(); i++ ) { v ( i ) = evallogcond ( Yt( i ), cond ); } return v; } //! \name Access to attributes //! @{ const RV& _rv() const { return rv; } const RV& _rvc() const { return rvc; } int dimension() const { return dim; } int dimensionc() { return dimc; } //! access function void set_dim ( int d ) { dim = d; } //! access function void set_dimc ( int d ) { dimc = d; } //! Load from structure with elements: //! \code //! { class = "pdf_offspring", //! rv = {class="RV", names=(...),}; // RV describing meaning of random variable //! rvc= {class="RV", names=(...),}; // RV describing meaning of random variable in condition //! // elements of offsprings //! } //! \endcode //!@} void from_setting ( const Setting &set ); void to_setting ( Setting &set ) const; //!@} //! \name Connection to other objects //!@{ void set_rvc ( const RV &rvc0 ) { rvc = rvc0; } void set_rv ( const RV &rv0 ) { rv = rv0; } //! Names of variables stored in RV are considered to be valid only if their size match size of the parameters (dim). bool isnamed() const { return ( dim == rv._dsize() ) && ( dimc == rvc._dsize() ); } //!@} }; SHAREDPTR ( pdf ); //! Probability density function with numerical statistics, e.g. posterior density. class epdf : public pdf { //! \var log_level_enums logmean //! log mean value of the density when requested //! \var log_level_enums loglbound //! log lower bound of the density (see function qbounds) //! \var log_level_enums logubound //! log upper bound of the density (see function qbounds) //! \var log_level_enums logfull //! log full record of the density in the form of setting LOG_LEVEL(epdf,logmean,loglbound,logubound,logfull); public: /*! \name Constructors Construction of each epdf should support two types of constructors: \li empty constructor, \li copy constructor, The following constructors should be supported for convenience: \li constructor followed by calling \c set_parameters() WHICH IS OBSOLETE (TODO) \li constructor accepting random variables calling \c set_rv() All internal data structures are constructed as empty. Their values (including sizes) will be set by method \c set_parameters() WHICH IS OBSOLETE (TODO). This way references can be initialized in constructors. @{*/ epdf() {}; epdf ( const epdf &e ) : pdf ( e ) {}; //!@} //! \name Matematical Operations //!@{ //! Returns a sample, \f$ x \f$ from density \f$ f_x()\f$ virtual vec sample() const = 0; //! Returns N samples, \f$ [x_1 , x_2 , \ldots \ \f$ from density \f$ f_x(rv)\f$ virtual mat sample_mat ( int N ) const; //! Compute log-probability of argument \c val //! In case the argument is out of suport return -Infinity virtual double evallog ( const vec &val ) const = 0; //! Compute log-probability of multiple values argument \c val virtual vec evallog_mat ( const mat &Val ) const; //! Compute log-probability of multiple values argument \c val virtual vec evallog_mat ( const Array &Avec ) const; //! Return conditional density on the given RV, the remaining rvs will be in conditioning virtual shared_ptr condition ( const RV &rv ) const; //! Return marginal density on the given RV, the remainig rvs are intergrated out virtual shared_ptr marginal ( const RV &rv ) const; virtual vec mean() const = 0; //! return expected variance (not covariance!) virtual vec variance() const = 0; //! return expected covariance -- default is diag(variance)!! virtual mat covariance() const {return diag(variance());}; //! Lower and upper bounds of \c percentage % quantile, returns mean-2*sigma as default virtual void qbounds ( vec &lb, vec &ub, double percentage = 0.95 ) const { vec mea = mean(); vec std = sqrt ( variance() ); lb = mea - 2 * std; ub = mea + 2 * std; }; //! Set statistics to match given input epdf. Typically it copies statistics from epdf of the same type and projects those form different types //! \param pdf0 epdf to match //! \param option placeholder for potential options void set_statistics(const epdf *pdf0) NOT_IMPLEMENTED_VOID; //!@} //! \name Connection to other classes //! Description of the random quantity via attribute \c rv is optional. //! For operations such as sampling \c rv does not need to be set. However, for \c marginalization //! and \c conditioning \c rv has to be set. NB: //! @{ //! store values of the epdf on the following levels: //! #1 mean //! #2 mean + lower & upper bound void log_register ( logger &L, const string &prefix ); void log_write() const; //!@} //! \name Access to attributes //! @{ //! Load from structure with elements: //! \code //! { rv = {class="RV", names=(...),}; // RV describing meaning of random variable //! // elements of offsprings //! } //! \endcode //!@} void from_setting ( const Setting &set ) { root::from_setting( set ); shared_ptr r = UI::build ( set, "rv", UI::optional ); if ( r ) { set_rv ( *r ); } } void to_setting ( Setting &set ) const { // we do not want to store rvc, therfore, pdf::to_setting( set ) is omitted root::to_setting(set); UI::save( &rv, set, "rv" ); } vec samplecond ( const vec &cond ) { return sample(); } double evallogcond ( const vec &val, const vec &cond ) { return evallog ( val ); } }; SHAREDPTR ( epdf ); //! pdf with internal epdf that is modified by function \c condition template class pdf_internal: public pdf { protected : //! Internal epdf used for sampling EPDF iepdf; public: //! constructor pdf_internal() : pdf(), iepdf() { } //! Update \c iepdf so that it represents this pdf conditioned on \c rvc = cond //! This function provides convenient reimplementation in offsprings virtual void condition ( const vec &cond ) = 0; //!access function to iepdf EPDF& e() { return iepdf; } //! Reimplements samplecond using \c condition() vec samplecond ( const vec &cond ); //! Reimplements evallogcond using \c condition() double evallogcond ( const vec &val, const vec &cond ); //! Efficient version of evallogcond for matrices virtual vec evallogcond_mat ( const mat &Dt, const vec &cond ); //! Efficient version of evallogcond for Array virtual vec evallogcond_mat ( const Array &Dt, const vec &cond ); //! Efficient version of samplecond virtual mat samplecond_mat ( const vec &cond, int N ); void validate() { iepdf.validate(); if ( rv._dsize() < iepdf._rv()._dsize() ) { rv = iepdf._rv(); }; dim = iepdf.dimension(); } }; /*! \brief DataLink is a connection between two data vectors Up and Down Up can be longer than Down. Down must be fully present in Up (TODO optional) See chart: \dot digraph datalink { node [shape=record]; subgraph cluster0 { label = "Up"; up [label="<1>|<2>|<3>|<4>|<5>"]; color = "white" } subgraph cluster1{ label = "Down"; labelloc = b; down [label="<1>|<2>|<3>"]; color = "white" } up:1 -> down:1; up:3 -> down:2; up:5 -> down:3; } \enddot */ class datalink { protected: //! Remember how long val should be int downsize; //! Remember how long val of "Up" should be int upsize; //! val-to-val link, indices of the upper val ivec v2v_up; public: //! Constructor datalink() : downsize ( 0 ), upsize ( 0 ) { } //! Convenience constructor datalink ( const RV &rv, const RV &rv_up ) { set_connection ( rv, rv_up ); } //! set connection, rv must be fully present in rv_up virtual void set_connection ( const RV &rv, const RV &rv_up ); //! set connection using indices virtual void set_connection ( int ds, int us, const ivec &upind ); //! Get val for myself from val of "Up" vec pushdown ( const vec &val_up ) { vec tmp ( downsize ); filldown ( val_up, tmp ); return tmp; } //! Get val for vector val_down from val of "Up" virtual void filldown ( const vec &val_up, vec &val_down ) { bdm_assert_debug ( upsize == val_up.length(), "Wrong val_up" ); val_down = val_up ( v2v_up ); } //! Fill val of "Up" by my pieces virtual void pushup ( vec &val_up, const vec &val ) { bdm_assert_debug ( downsize == val.length(), "Wrong val" ); bdm_assert_debug ( upsize == val_up.length(), "Wrong val_up" ); set_subvector ( val_up, v2v_up, val ); } //! access functions int _upsize() { return upsize; } //! access functions int _downsize() { return downsize; } //! for future use virtual ~datalink() {} }; /*! Extension of datalink to fill only part of Down */ class datalink_part : public datalink { protected: //! indices of values in vector downsize ivec v2v_down; public: void set_connection ( const RV &rv, const RV &rv_up ); //! Get val for vector val_down from val of "Up" void filldown ( const vec &val_up, vec &val_down ) { set_subvector ( val_down, v2v_down, val_up ( v2v_up ) ); } }; /*! \brief Datalink that buffers delayed values - do not forget to call step() Up is current data, Down is their subset with possibly delayed values */ class datalink_buffered: public datalink_part { protected: //! History, ordered as \f$[Up_{t-1},Up_{t-2}, \ldots]\f$ vec history; //! rv of the history RV Hrv; //! h2v : indices in down ivec h2v_down; //! h2v : indices in history ivec h2v_hist; //! v2h: indices of up too be pushed to h ivec v2h_up; public: datalink_buffered() : datalink_part(), history ( 0 ), h2v_down ( 0 ), h2v_hist ( 0 ) {}; //! push current data to history void store_data ( const vec &val_up ) { if ( v2h_up.length() > 0 ) { history.shift_right ( 0, v2h_up.length() ); history.set_subvector ( 0, val_up ( v2h_up ) ); } } //! Get val for myself from val of "Up" vec pushdown ( const vec &val_up ) { vec tmp ( downsize ); filldown ( val_up, tmp ); return tmp; } void filldown ( const vec &val_up, vec &val_down ) { bdm_assert_debug ( val_down.length() >= downsize, "short val_down" ); set_subvector ( val_down, v2v_down, val_up ( v2v_up ) ); // copy direct values set_subvector ( val_down, h2v_down, history ( h2v_hist ) ); // copy delayed values } void set_connection ( const RV &rv, const RV &rv_up ); //! set history of variable given by \c rv1 to values of \c hist. void set_history ( const RV& rv1, const vec &hist0 ); }; //! buffered datalink from 2 vectors to 1 class datalink_2to1_buffered { protected: //! link 1st vector to down datalink_buffered dl1; //! link 2nd vector to down datalink_buffered dl2; public: //! set connection between RVs void set_connection ( const RV &rv, const RV &rv_up1, const RV &rv_up2 ) { dl1.set_connection ( rv, rv_up1 ); dl2.set_connection ( rv, rv_up2 ); } //! fill values of down from the values of the two up vectors void filldown ( const vec &val1, const vec &val2, vec &val_down ) { bdm_assert_debug ( val_down.length() >= dl1._downsize() + dl2._downsize(), "short val_down" ); dl1.filldown ( val1, val_down ); dl2.filldown ( val2, val_down ); } //! update buffer void step ( const vec &dt, const vec &ut ) { dl1.store_data ( dt ); dl2.store_data ( ut ); } }; //! Data link with a condition. class datalink_m2e: public datalink { protected: //! Remember how long cond should be int condsize; //!upper_val-to-local_cond link, indices of the upper val ivec v2c_up; //!upper_val-to-local_cond link, indices of the local cond ivec v2c_lo; public: //! Constructor datalink_m2e() : condsize ( 0 ) { } //! Set connection between vectors void set_connection ( const RV &rv, const RV &rvc, const RV &rv_up ); //!Construct condition vec get_cond ( const vec &val_up ); //! Copy corresponding values to Up.condition void pushup_cond ( vec &val_up, const vec &val, const vec &cond ); }; //!DataLink is a connection between pdf and its superordinate (Up) //! This class links class datalink_m2m: public datalink_m2e { protected: //!cond-to-cond link, indices of the upper cond ivec c2c_up; //!cond-to-cond link, indices of the local cond ivec c2c_lo; public: //! Constructor datalink_m2m() {}; //! Set connection between the vectors void set_connection ( const RV &rv, const RV &rvc, const RV &rv_up, const RV &rvc_up ) { datalink_m2e::set_connection ( rv, rvc, rv_up ); //establish c2c connection rvc.dataind ( rvc_up, c2c_lo, c2c_up ); // bdm_assert_debug ( c2c_lo.length() + v2c_lo.length() == condsize, "cond is not fully given" ); } //! Get cond for myself from val and cond of "Up" vec get_cond ( const vec &val_up, const vec &cond_up ) { vec tmp ( condsize ); fill_cond ( val_up, cond_up, tmp ); return tmp; } //! fill condition void fill_cond ( const vec &val_up, const vec &cond_up, vec& cond_out ) { bdm_assert_debug ( cond_out.length() >= condsize, "dl.fill_cond: cond_out is too small" ); set_subvector ( cond_out, v2c_lo, val_up ( v2c_up ) ); set_subvector ( cond_out, c2c_lo, cond_up ( c2c_up ) ); } //! Fill }; //! \brief Combines RVs from a list of pdfs to a single one. RV get_composite_rv ( const Array > &pdfs, bool checkoverlap = false ); /*! \brief Abstract class for discrete-time sources of data. The class abstracts operations of: \li data aquisition, \li data-preprocessing, such as scaling of data, \li data resampling from the task of estimation and control. Moreover, for controlled systems, it is able to receive the desired control action and perform it in the next step. (Or as soon as possible). The DataSource has three main data interaction structures: \li input, \f$ u_t \f$, \li output \f$ y_t \f$, \li data, \f$ d_t=[y_t,u_t, \ldots ]\f$ a collection of all inputs and outputs and possibly some internal variables too. */ class DS : public root { //! \var log_level_enums logdt //! TODO DOPLNIT //! \var log_level_enums logut //! TODO DOPLNIT LOG_LEVEL(DS,logdt,logut); protected: //! size of data returned by \c getdata() int dtsize; //! size of data int utsize; //!Description of data returned by \c getdata(). RV Drv; //!Description of data witten by by \c write(). RV Urv; // public: //! default constructors DS() : dtsize ( 0 ), utsize ( 0 ), Drv(), Urv(){ log_level[logdt] = true; log_level[logut] = true; }; //! Returns maximum number of provided data, by default it is set to maximum allowed length, shorter DS should overload this method! See, MemDS.max_length(). virtual int max_length() { return std::numeric_limits< int >::max(); } //! Returns full vector of observed data=[output, input] virtual void getdata ( vec &dt ) const = 0; //! Returns data records at indices. Default is inefficent. virtual void getdata ( vec &dt, const ivec &indices ) { vec tmp(dtsize); getdata(tmp); dt = tmp(indices); }; //! Accepts action variable and schedule it for application. virtual void write ( const vec &ut ) NOT_IMPLEMENTED_VOID; //! Accepts action variables at specific indices virtual void write ( const vec &ut, const ivec &indices ) NOT_IMPLEMENTED_VOID; //! Moves from \f$ t \f$ to \f$ t+1 \f$, i.e. perfroms the actions and reads response of the system. virtual void step() = 0; //! Register DS for logging into logger L virtual void log_register ( logger &L, const string &prefix ); //! Register DS for logging into logger L virtual void log_write ( ) const; //!access function virtual const RV& _drv() const { return Drv; } //!access function const RV& _urv() const { return Urv; } //! set random variables virtual void set_drv ( const RV &drv, const RV &urv) { Drv = drv; Urv = urv; } void from_setting ( const Setting &set ); void validate(); }; /*! \brief Bayesian Model of a system, i.e. all uncertainty is modeled by probabilities. This object represents exact or approximate evaluation of the Bayes rule: \f[ f(\theta_t | y_1,\ldots,y_t, u_1,\ldots,u_t) = \frac{f(y_t|\theta_t,\cdot) f(\theta_t|d_1,\ldots,d_{t-1})}{f(y_t|d_1,\ldots,d_{t-1})} \f] where: * \f$ y_t \f$ is the variable Access to the resulting posterior density is via function \c posterior(). As a "side-effect" it also evaluates log-likelihood of the data, which can be accessed via function _ll(). It can also evaluate predictors of future values of \f$y_t\f$, see functions epredictor() and predictor(). Alternatively, it can evaluate posterior density with rvc replaced by the given values, \f$ c_t \f$: \f[ f(\theta_t | c_t, d_1,\ldots,d_t) \propto f(y_t,\theta_t|c_t,\cdot, d_1,\ldots,d_{t-1}) \f] */ class BM : public root { //! \var log_level_enums logfull //! TODO DOPLNIT //! \var log_level_enums logevidence //! TODO DOPLNIT //! \var log_level_enums logbounds //! TODO DOPLNIT LOG_LEVEL(BM,logfull,logevidence,logbounds); protected: //! Random variable of the data (optional) RV yrv; //! size of the data record int dimy; //! Name of extension variable RV rvc; //! size of the conditioning vector int dimc; //!Logarithm of marginalized data likelihood. double ll; //! If true, the filter will compute likelihood of the data record and store it in \c ll . Set to false if you want to save computational time. bool evalll; public: //! \name Constructors //! @{ BM() : yrv(), dimy ( 0 ), rvc(), dimc ( 0 ), ll ( 0 ), evalll ( true ) { }; // BM ( const BM &B ) : yrv ( B.yrv ), dimy(B.dimy), rvc ( B.rvc ),dimc(B.dimc), ll ( B.ll ), evalll ( B.evalll ) {} //! \brief Copy function required in vectors, Arrays of BM etc. Have to be DELETED manually! //! Prototype: \code BM* _copy() const {return new BM(*this);} \endcode virtual BM* _copy() const NOT_IMPLEMENTED(NULL); //!@} //! \name Mathematical operations //!@{ /*! \brief Incremental Bayes rule @param dt vector of input data */ virtual void bayes ( const vec &yt, const vec &cond = empty_vec ) = 0; //! Batch Bayes rule (columns of Dt are observations) virtual void bayes_batch ( const mat &Dt, const vec &cond = empty_vec ); //! Batch Bayes rule (columns of Dt are observations, columns of Cond are conditions) virtual void bayes_batch ( const mat &Dt, const mat &Cond ); //! Evaluates predictive log-likelihood of the given data record //! I.e. marginal likelihood of the data with the posterior integrated out. //! This function evaluates only \f$ y_t \f$, condition is assumed to be the last used in bayes(). //! See bdm::BM::predictor for conditional version. virtual double logpred ( const vec &yt ) const NOT_IMPLEMENTED(0.0); //! Matrix version of logpred vec logpred_mat ( const mat &Yt ) const { vec tmp ( Yt.cols() ); for ( int i = 0; i < Yt.cols(); i++ ) { tmp ( i ) = logpred ( Yt.get_col ( i ) ); } return tmp; } //!Constructs a predictive density \f$ f(d_{t+1} |d_{t}, \ldots d_{0}) \f$ virtual epdf* epredictor() const NOT_IMPLEMENTED(NULL); //!Constructs conditional density of 1-step ahead predictor \f$ f(d_{t+1} |d_{t+h-1}, \ldots d_{t}) \f$ virtual pdf* predictor() const NOT_IMPLEMENTED(NULL); //!@} //! \name Access to attributes //!@{ //! access function const RV& _rvc() const { return rvc; } //! access function int dimensionc() const { return dimc; } //! access function int dimensiony() const { return dimy; } //! access function int dimension() const { return posterior().dimension(); } //! access function const RV& _rv() const { return posterior()._rv(); } //! access function const RV& _yrv() const { return yrv; } //! access function void set_yrv ( const RV &rv ) { yrv = rv; } //! access function void set_rvc ( const RV &rv ) { rvc = rv; } //! access to rv of the posterior void set_rv ( const RV &rv ) { const_cast ( posterior() ).set_rv ( rv ); } //! access function void set_dim ( int dim ) { const_cast ( posterior() ).set_dim ( dim ); } //! return internal log-likelihood of the last data vector double _ll() const { return ll; } //! switch evaluation of log-likelihood on/off void set_evalll ( bool evl0 ) { evalll = evl0; } //! return posterior density virtual const epdf& posterior() const = 0; epdf& prior() {return const_cast(posterior());} //! set prior density -- same as posterior but writable virtual void set_prior(const epdf *pdf0) NOT_IMPLEMENTED_VOID; //!@} //! \name Logging of results //!@{ //! Add all logged variables to a logger //! Log levels two digits: xy where //! * y = 0/1 log-likelihood is to be logged //! * x = level of the posterior (typically 0/1/2 for nothing/mean/bounds) virtual void log_register ( logger &L, const string &prefix = "" ); //! Save results to the given logger, details of what is stored is configured by \c LIDs and \c options virtual void log_write ( ) const; //!@} void from_setting ( const Setting &set ) { shared_ptr r = UI::build ( set, "yrv", UI::optional ); if ( r ) { set_yrv ( *r ); } shared_ptr r2 = UI::build ( set, "rvc", UI::optional ); if ( r2 ) { rvc = *r2; } shared_ptr r3 = UI::build ( set, "rv", UI::optional ); if ( r3 ) { set_rv ( *r3 ); } UI::get ( log_level, set, "log_level", UI::optional ); } void to_setting ( Setting &set ) const { root::to_setting( set ); UI::save( &yrv, set, "yrv" ); UI::save( &rvc, set, "rvc" ); UI::save( &posterior()._rv(), set, "rv" ); UI::save( log_level, set ); } void validate() { if ( log_level[logfull] ) { const_cast ( posterior() ).log_level[epdf::logfull] = true; } else { if ( log_level[logbounds] ) { const_cast ( posterior() ).log_level[epdf::loglbound] = true; } else { const_cast ( posterior() ).log_level[epdf::logmean] = true;; } if ( log_level[logevidence] ) { } } } }; //! array of pointers to epdf typedef Array > epdf_array; //! array of pointers to pdf typedef Array > pdf_array; template vec pdf_internal::samplecond ( const vec &cond ) { condition ( cond ); vec temp = iepdf.sample(); return temp; } template mat pdf_internal::samplecond_mat ( const vec &cond, int N ) { condition ( cond ); mat temp ( dimension(), N ); vec smp ( dimension() ); for ( int i = 0; i < N; i++ ) { smp = iepdf.sample(); temp.set_col ( i, smp ); } return temp; } template double pdf_internal::evallogcond ( const vec &yt, const vec &cond ) { double tmp; condition ( cond ); tmp = iepdf.evallog ( yt ); return tmp; } template vec pdf_internal::evallogcond_mat ( const mat &Yt, const vec &cond ) { condition ( cond ); return iepdf.evallog_mat ( Yt ); } template vec pdf_internal::evallogcond_mat ( const Array &Yt, const vec &cond ) { condition ( cond ); return iepdf.evallog_mat ( Yt ); } }; //namespace #endif // BDMBASE_H