/*! \file \brief Bayesian Models (bm) that use Bayes rule to learn from observations \author Vaclav Smidl. ----------------------------------- BDM++ - C++ library for Bayesian Decision Making under Uncertainty Using IT++ for numerical operations ----------------------------------- */ #ifndef BDMBASE_H #define BDMBASE_H #include #include "../itpp_ext.h" #include "../bdmroot.h" #include "../shared_ptr.h" #include "user_info.h" using namespace libconfig; using namespace itpp; using namespace std; namespace bdm { typedef std::map RVmap; extern ivec RV_SIZES; extern Array RV_NAMES; //! Structure of RV, i.e. RVs expanded into a flat list of IDs, used for debugging. class str { public: //! vector id ids (non-unique!) ivec ids; //! vector of times ivec times; //!Default constructor str(ivec ids0, ivec times0) : ids(ids0), times(times0) { it_assert_debug(times0.length() == ids0.length(), "Incompatible input"); }; }; /*! * \brief Class representing variables, most often random variables The purpose of this class is to decribe a vector of data. Such description is used for connecting various vectors between each other, see class datalink. The class is implemented using global variables to assure uniqueness of description: In is a vector \dot digraph datalink { rankdir=LR; subgraph cluster0 { node [shape=record]; label = "RV_MAP \n std::map"; map [label="{{\"a\"| \"b\" | \"c\"} | {<3> 3 |<1> 1|<2> 2}}"]; color = "white" } subgraph cluster1{ node [shape=record]; label = "RV_NAMES"; names [label="{<1> \"b\" | <2> \"c\" | <3>\"a\" }"]; color = "white" } subgraph cluster2{ node [shape=record]; label = "RV_SIZES"; labelloc = b; sizes [label="{<1>1 |<2> 4 |<3> 1}"]; color = "white" } map:1 -> names:1; map:1 -> sizes:1; map:3 -> names:3; map:3 -> sizes:3; } \enddot */ class RV : public root { protected: //! size of the data vector int dsize; //! number of individual rvs int len; //! Vector of unique IDs ivec ids; //! Vector of shifts from current time ivec times; private: //! auxiliary function used in constructor void init(const Array &in_names, const ivec &in_sizes, const ivec &in_times); int init(const string &name, int size); public: //! \name Constructors //!@{ //! Full constructor RV(const Array &in_names, const ivec &in_sizes, const ivec &in_times) { init(in_names, in_sizes, in_times); } //! Constructor with times=0 RV(const Array &in_names, const ivec &in_sizes) { init(in_names, in_sizes, zeros_i(in_names.length())); } //! Constructor with sizes=1, times=0 RV(const Array &in_names) { init(in_names, ones_i(in_names.length()), zeros_i(in_names.length())); } //! Constructor of empty RV RV() : dsize(0), len(0), ids(0), times(0) {} //! Constructor of a single RV with given id RV(string name, int sz, int tm = 0); //!@} //! \name Access functions //!@{ //! State output, e.g. for debugging. friend std::ostream &operator<< (std::ostream &os, const RV &rv); int _dsize() const { return dsize; } //! Recount size of the corresponding data vector int countsize() const; ivec cumsizes() const; int length() const {return len;} int id(int at) const {return ids(at);} int size(int at) const { return RV_SIZES(ids(at)); } int time(int at) const { return times(at); } std::string name(int at) const { return RV_NAMES(ids(at)); } void set_time(int at, int time0) { times(at) = time0; } //!@} //TODO why not inline and later?? //! \name Algebra on Random Variables //!@{ //! Find indices of self in another rv, \return ivec of the same size as self. ivec findself(const RV &rv2) const; //! Compare if \c rv2 is identical to this \c RV bool equal(const RV &rv2) const; //! Add (concat) another variable to the current one, \return true if all rv2 were added, false if rv2 is in conflict bool add(const RV &rv2); //! Subtract another variable from the current one RV subt(const RV &rv2) const; //! Select only variables at indices ind RV subselect(const ivec &ind) const; //! Select only variables at indices ind RV operator()(const ivec &ind) const { return subselect(ind); } //! Select from data vector starting at di1 to di2 RV operator()(int di1, int di2) const; //! Shift \c time by delta. void t(int delta); //!@} //!\name Relation to vectors //!@{ //! generate \c str from rv, by expanding sizes TODO to_string.. str tostr() const; //! when this rv is a part of bigger rv, this function returns indices of self in the data vector of the bigger crv. //! Then, data can be copied via: data_of_this = cdata(ind); ivec dataind(const RV &crv) const; //! generate mutual indices when copying data between self and crv. //! Data are copied via: data_of_this(selfi) = data_of_rv2(rv2i) void dataind(const RV &rv2, ivec &selfi, ivec &rv2i) const; //! Minimum time-offset int mint() const {return min(times);} //!@} // TODO aktualizovat dle soucasneho UI /*! \brief UI for class RV (description of data vectors) \code rv = { type = "rv"; //identifier of the description // UNIQUE IDENTIFIER same names = same variable names = ["a", "b", "c", ...]; // which will be used e.g. in loggers //optional arguments sizes = [1, 2, 3, ...]; // (optional) default = ones() times = [-1, -2, 0, ...]; // time shifts with respect to current time (optional) default = zeros() } \endcode */ void from_setting(const Setting &set); // TODO dodelat void to_setting( Setting &set ) const; //! Invalidate all named RVs. Use before initializing any RV instances, with care... static void clear_all(); }; UIREGISTER(RV); //! Concat two random variables RV concat(const RV &rv1, const RV &rv2); //!Default empty RV that can be used as default argument extern RV RV0; //! Class representing function \f$f(x)\f$ of variable \f$x\f$ represented by \c rv class fnc : public root { protected: //! Length of the output vector int dimy; public: //!default constructor fnc() {}; //! function evaluates numerical value of \f$f(x)\f$ at \f$x=\f$ \c cond virtual vec eval(const vec &cond) { return vec(0); }; //! function substitutes given value into an appropriate position virtual void condition(const vec &val) {}; //! access function int dimension() const {return dimy;} }; class mpdf; //! Probability density function with numerical statistics, e.g. posterior density. class epdf : public root { protected: //! dimension of the random variable int dim; //! Description of the random variable RV rv; public: /*! \name Constructors Construction of each epdf should support two types of constructors: \li empty constructor, \li copy constructor, The following constructors should be supported for convenience: \li constructor followed by calling \c set_parameters() \li constructor accepting random variables calling \c set_rv() All internal data structures are constructed as empty. Their values (including sizes) will be set by method \c set_parameters(). This way references can be initialized in constructors. @{*/ epdf() : dim(0), rv() {}; epdf(const epdf &e) : dim(e.dim), rv(e.rv) {}; epdf(const RV &rv0):dim(rv0._dsize()) {set_rv(rv0);}; void set_parameters(int dim0) {dim = dim0;} //!@} //! \name Matematical Operations //!@{ //! Returns a sample, \f$ x \f$ from density \f$ f_x()\f$ virtual vec sample() const {it_error("not implemneted"); return vec(0);}; //! Returns N samples, \f$ [x_1 , x_2 , \ldots \ \f$ from density \f$ f_x(rv)\f$ virtual mat sample_m(int N) const; //! Compute log-probability of argument \c val //! In case the argument is out of suport return -Infinity virtual double evallog(const vec &val) const {it_error("not implemneted"); return 0.0;}; //! Compute log-probability of multiple values argument \c val virtual vec evallog_m(const mat &Val) const { vec x(Val.cols()); for (int i = 0; i < Val.cols(); i++) {x(i) = evallog(Val.get_col(i)) ;} return x; } //! Compute log-probability of multiple values argument \c val virtual vec evallog_m(const Array &Avec) const { vec x(Avec.size()); for (int i = 0; i < Avec.size(); i++) {x(i) = evallog(Avec(i)) ;} return x; } //! Return conditional density on the given RV, the remaining rvs will be in conditioning virtual mpdf* condition(const RV &rv) const {it_warning("Not implemented"); return NULL;} //! Return marginal density on the given RV, the remainig rvs are intergrated out virtual epdf* marginal(const RV &rv) const {it_warning("Not implemented"); return NULL;} //! return expected value virtual vec mean() const {it_error("not implemneted"); return vec(0);}; //! return expected variance (not covariance!) virtual vec variance() const {it_error("not implemneted"); return vec(0);}; //! Lower and upper bounds of \c percentage % quantile, returns mean-2*sigma as default virtual void qbounds(vec &lb, vec &ub, double percentage = 0.95) const { vec mea = mean(); vec std = sqrt(variance()); lb = mea - 2 * std; ub = mea + 2 * std; }; //!@} //! \name Connection to other classes //! Description of the random quantity via attribute \c rv is optional. //! For operations such as sampling \c rv does not need to be set. However, for \c marginalization //! and \c conditioning \c rv has to be set. NB: //! @{ //!Name its rv void set_rv(const RV &rv0) {rv = rv0; } //it_assert_debug(isnamed(),""); }; //! True if rv is assigned bool isnamed() const {bool b = (dim == rv._dsize()); return b;} //! Return name (fails when isnamed is false) const RV& _rv() const {it_assert_debug(isnamed(), ""); return rv;} //!@} //! \name Access to attributes //! @{ //! Size of the random variable int dimension() const {return dim;} //! Load from structure with elements: //! \code //! { rv = {class="RV", names=(...),}; // RV describing meaning of random variable //! // elements of offsprings //! } //! \endcode //!@} void from_setting(const Setting &set){ if (set.exists("rv")){ RV* r = UI::build(set,"rv"); set_rv(*r); delete r; } } }; //! Conditional probability density, e.g. modeling some dependencies. //TODO Samplecond can be generalized class mpdf : public root { protected: //!dimension of the condition int dimc; //! random variable in condition RV rvc; private: //! pointer to internal epdf shared_ptr shep; public: //! \name Constructors //! @{ mpdf():dimc(0), rvc() { } mpdf(const mpdf &m):dimc(m.dimc), rvc(m.rvc), shep(m.shep) { } //!@} //! \name Matematical operations //!@{ //! Returns a sample from the density conditioned on \c cond, \f$x \sim epdf(rv|cond)\f$. \param cond is numeric value of \c rv virtual vec samplecond(const vec &cond); //! Returns \param N samples from the density conditioned on \c cond, \f$x \sim epdf(rv|cond)\f$. \param cond is numeric value of \c rv virtual mat samplecond_m(const vec &cond, int N); //! Update \c ep so that it represents this mpdf conditioned on \c rvc = cond virtual void condition(const vec &cond) {it_error("Not implemented");}; //! Shortcut for conditioning and evaluation of the internal epdf. In some cases, this operation can be implemented efficiently. virtual double evallogcond(const vec &dt, const vec &cond); //! Matrix version of evallogcond virtual vec evallogcond_m(const mat &Dt, const vec &cond); //! Array version of evallogcond virtual vec evallogcond_m(const Array &Dt, const vec &cond); //! \name Access to attributes //! @{ RV _rv() { return shep->_rv(); } RV _rvc() { it_assert_debug(isnamed(), ""); return rvc; } int dimension() { return shep->dimension(); } int dimensionc() { return dimc; } epdf *e() { return shep.get(); } void set_ep(shared_ptr ep) { shep = ep; } //! Load from structure with elements: //! \code //! { rv = {class="RV", names=(...),}; // RV describing meaning of random variable //! rvc= {class="RV", names=(...),}; // RV describing meaning of random variable in condition //! // elements of offsprings //! } //! \endcode //!@} void from_setting(const Setting &set); //!@} //! \name Connection to other objects //!@{ void set_rvc(const RV &rvc0) { rvc = rvc0; } void set_rv(const RV &rv0) { shep->set_rv(rv0); } bool isnamed() { return (shep->isnamed()) && (dimc == rvc._dsize()); } //!@} }; /*! \brief DataLink is a connection between two data vectors Up and Down Up can be longer than Down. Down must be fully present in Up (TODO optional) See chart: \dot digraph datalink { node [shape=record]; subgraph cluster0 { label = "Up"; up [label="<1>|<2>|<3>|<4>|<5>"]; color = "white" } subgraph cluster1{ label = "Down"; labelloc = b; down [label="<1>|<2>|<3>"]; color = "white" } up:1 -> down:1; up:3 -> down:2; up:5 -> down:3; } \enddot */ class datalink { protected: //! Remember how long val should be int downsize; //! Remember how long val of "Up" should be int upsize; //! val-to-val link, indices of the upper val ivec v2v_up; public: //! Constructor datalink():downsize(0), upsize(0) { } datalink(const RV &rv, const RV &rv_up) { set_connection(rv, rv_up); } //! set connection, rv must be fully present in rv_up void set_connection(const RV &rv, const RV &rv_up) { downsize = rv._dsize(); upsize = rv_up._dsize(); v2v_up = rv.dataind(rv_up); it_assert_debug(v2v_up.length() == downsize, "rv is not fully in rv_up"); } //! set connection using indices void set_connection(int ds, int us, const ivec &upind) { downsize = ds; upsize = us; v2v_up = upind; it_assert_debug(v2v_up.length() == downsize, "rv is not fully in rv_up"); } //! Get val for myself from val of "Up" vec pushdown(const vec &val_up) { it_assert_debug(upsize == val_up.length(), "Wrong val_up"); return get_vec(val_up, v2v_up); } //! Fill val of "Up" by my pieces void pushup(vec &val_up, const vec &val) { it_assert_debug(downsize == val.length(), "Wrong val"); it_assert_debug(upsize == val_up.length(), "Wrong val_up"); set_subvector(val_up, v2v_up, val); } }; //! Data link with a condition. class datalink_m2e: public datalink { protected: //! Remember how long cond should be int condsize; //!upper_val-to-local_cond link, indices of the upper val ivec v2c_up; //!upper_val-to-local_cond link, indices of the local cond ivec v2c_lo; public: //! Constructor datalink_m2e():condsize(0) { } void set_connection(const RV &rv, const RV &rvc, const RV &rv_up) { datalink::set_connection(rv, rv_up); condsize = rvc._dsize(); //establish v2c connection rvc.dataind(rv_up, v2c_lo, v2c_up); } //!Construct condition vec get_cond(const vec &val_up) { vec tmp(condsize); set_subvector(tmp, v2c_lo, val_up(v2c_up)); return tmp; } void pushup_cond(vec &val_up, const vec &val, const vec &cond) { it_assert_debug(downsize == val.length(), "Wrong val"); it_assert_debug(upsize == val_up.length(), "Wrong val_up"); set_subvector(val_up, v2v_up, val); set_subvector(val_up, v2c_up, cond); } }; //!DataLink is a connection between mpdf and its superordinate (Up) //! This class links class datalink_m2m: public datalink_m2e { protected: //!cond-to-cond link, indices of the upper cond ivec c2c_up; //!cond-to-cond link, indices of the local cond ivec c2c_lo; public: //! Constructor datalink_m2m() {}; void set_connection(const RV &rv, const RV &rvc, const RV &rv_up, const RV &rvc_up) { datalink_m2e::set_connection(rv, rvc, rv_up); //establish c2c connection rvc.dataind(rvc_up, c2c_lo, c2c_up); it_assert_debug(c2c_lo.length() + v2c_lo.length() == condsize, "cond is not fully given"); } //! Get cond for myself from val and cond of "Up" vec get_cond(const vec &val_up, const vec &cond_up) { vec tmp(condsize); set_subvector(tmp, v2c_lo, val_up(v2c_up)); set_subvector(tmp, c2c_lo, cond_up(c2c_up)); return tmp; } //! Fill }; /*! @brief Class for storing results (and semi-results) of an experiment This class abstracts logging of results from implementation. This class replaces direct logging of results (e.g. to files or to global variables) by calling methods of a logger. Specializations of this abstract class for specific storage method are designed. */ class logger : public root { protected: //! RVs of all logged variables. Array entries; //! Names of logged quantities, e.g. names of algorithm variants Array names; public: //!Default constructor logger() : entries(0), names(0) {} //! returns an identifier which will be later needed for calling the \c logit() function //! For empty RV it returns -1, this entry will be ignored by \c logit(). virtual int add(const RV &rv, string prefix = "") { int id; if (rv._dsize() > 0) { id = entries.length(); names = concat(names, prefix); // diff entries.set_length(id + 1, true); entries(id) = rv; } else { id = -1;} return id; // identifier of the last entry } //! log this vector virtual void logit(int id, const vec &v) = 0; //! log this double virtual void logit(int id, const double &d) = 0; //! Shifts storage position for another time step. virtual void step() = 0; //! Finalize storing information virtual void finalize() {}; //! Initialize the storage virtual void init() {}; }; /*! \brief Unconditional mpdf, allows using epdf in the role of mpdf. */ class mepdf : public mpdf { public: //!Default constructor mepdf() { } mepdf(shared_ptr em) { set_ep(em); dimc = 0; } //! empty void condition(const vec &cond); //! Load from structure with elements: //! \code //! { class = "mepdf", //! epdfs = {class="epdfs",...} //! } //! \endcode //!@} void from_setting(const Setting &set); }; UIREGISTER(mepdf); //!\brief Chain rule of pdfs - abstract part common for mprod and merger. //!this abstract class is common to epdf and mpdf //!\todo Think of better design - global functions rv=get_rv(Array mpdfs); ?? class compositepdf { protected: //! Elements of composition Array mpdfs; bool owning_mpdfs; public: compositepdf():mpdfs(0){}; compositepdf(Array A0, bool own=false){set_elements(A0,own);}; void set_elements(Array A0, bool own=false) {mpdfs=A0;owning_mpdfs=own;}; //! find common rv, flag \param checkoverlap modifies whether overlaps are acceptable RV getrv(bool checkoverlap = false); //! common rvc of all mpdfs is written to rvc void setrvc(const RV &rv, RV &rvc); ~compositepdf(){if (owning_mpdfs) for(int i=0;i(posterior()).set_rv(rv);} double _ll() const {return ll;} void set_evalll(bool evl0) {evalll = evl0;} virtual const epdf& posterior() const = 0; virtual const epdf* _e() const = 0; //!@} //! \name Logging of results //!@{ //! Set boolean options from a string, recognized are: "logbounds,logll" virtual void set_options(const string &opt) { LFlags(0) = 1; if (opt.find("logbounds") != string::npos) {LFlags(1) = 1; LFlags(2) = 1;} if (opt.find("logll") != string::npos) {LFlags(3) = 1;} } //! IDs of storages in loggers 4:[1=mean,2=lb,3=ub,4=ll] ivec LIDs; //! Flags for logging - same size as LIDs, each entry correspond to the same in LIDs ivec LFlags; //! Add all logged variables to a logger virtual void log_add(logger &L, const string &name = "") { // internal RV r; if (posterior().isnamed()) {r = posterior()._rv();} else {r = RV("est", posterior().dimension());}; // Add mean value if (LFlags(0)) LIDs(0) = L.add(r, name + "mean_"); if (LFlags(1)) LIDs(1) = L.add(r, name + "lb_"); if (LFlags(2)) LIDs(2) = L.add(r, name + "ub_"); if (LFlags(3)) LIDs(3) = L.add(RV("ll", 1), name); //TODO: "local" RV } virtual void logit(logger &L) { L.logit(LIDs(0), posterior().mean()); if (LFlags(1) || LFlags(2)) { //if one of them is off, its LID==-1 and will not be stored vec ub, lb; posterior().qbounds(lb, ub); L.logit(LIDs(1), lb); L.logit(LIDs(2), ub); } if (LFlags(3)) L.logit(LIDs(3), ll); } //!@} }; }; //namespace #endif // BDMBASE_H