00001
00013 #ifndef BDMBASE_H
00014 #define BDMBASE_H
00015
00016 #include <map>
00017
00018 #include "../itpp_ext.h"
00019 #include "../bdmroot.h"
00020 #include "../shared_ptr.h"
00021 #include "user_info.h"
00022
00023 using namespace libconfig;
00024 using namespace itpp;
00025 using namespace std;
00026
00027 namespace bdm
00028 {
00029
00030 typedef std::map<string, int> RVmap;
00032 extern ivec RV_SIZES;
00034 extern Array<string> RV_NAMES;
00035
00037 class str
00038 {
00039 public:
00041 ivec ids;
00043 ivec times;
00045 str (ivec ids0, ivec times0) : ids (ids0), times (times0) {
00046 bdm_assert_debug (times0.length() == ids0.length(), "Incompatible input");
00047 };
00048 };
00049
00088 class RV : public root
00089 {
00090 protected:
00092 int dsize;
00094 int len;
00096 ivec ids;
00098 ivec times;
00099
00100 private:
00102 void init (const Array<std::string> &in_names, const ivec &in_sizes, const ivec &in_times);
00103 int init (const string &name, int size);
00104 public:
00107
00109 RV (const Array<std::string> &in_names, const ivec &in_sizes, const ivec &in_times) {
00110 init (in_names, in_sizes, in_times);
00111 }
00112
00114 RV (const Array<std::string> &in_names, const ivec &in_sizes) {
00115 init (in_names, in_sizes, zeros_i (in_names.length()));
00116 }
00117
00119 RV (const Array<std::string> &in_names) {
00120 init (in_names, ones_i (in_names.length()), zeros_i (in_names.length()));
00121 }
00122
00124 RV() : dsize (0), len (0), ids (0), times (0) {}
00125
00127 RV (string name, int sz, int tm = 0);
00128
00129
00131
00134
00136 friend std::ostream &operator<< (std::ostream &os, const RV &rv);
00137
00138 int _dsize() const {
00139 return dsize;
00140 }
00141
00143 const ivec& _ids() const { return ids;}
00144
00146 int countsize() const;
00147 ivec cumsizes() const;
00148 int length() const {
00149 return len;
00150 }
00151 int id (int at) const {
00152 return ids (at);
00153 }
00154 int size (int at) const {
00155 return RV_SIZES (ids (at));
00156 }
00157 int time (int at) const {
00158 return times (at);
00159 }
00160 std::string name (int at) const {
00161 return RV_NAMES (ids (at));
00162 }
00163 void set_time (int at, int time0) {
00164 times (at) = time0;
00165 }
00167
00170
00172 ivec findself (const RV &rv2) const;
00174 bool equal (const RV &rv2) const;
00176 bool add (const RV &rv2);
00178 RV subt (const RV &rv2) const;
00180 RV subselect (const ivec &ind) const;
00181
00183 RV operator() (const ivec &ind) const {
00184 return subselect (ind);
00185 }
00186
00188 RV operator() (int di1, int di2) const;
00189
00191 void t (int delta);
00193
00196
00198 str tostr() const;
00201 ivec dataind (const RV &crv) const;
00204 void dataind (const RV &rv2, ivec &selfi, ivec &rv2i) const;
00206 int mint() const {
00207 return min (times);
00208 }
00210
00225 void from_setting (const Setting &set);
00226
00227
00229 static void clear_all();
00230 };
00231 UIREGISTER (RV);
00232 SHAREDPTR (RV);
00233
00235 RV concat (const RV &rv1, const RV &rv2);
00236
00238 extern RV RV0;
00239
00241
00242 class fnc : public root
00243 {
00244 protected:
00246 int dimy;
00247 public:
00249 fnc() {};
00251 virtual vec eval (const vec &cond) {
00252 return vec (0);
00253 };
00254
00256 virtual void condition (const vec &val) {};
00257
00259 int dimension() const {
00260 return dimy;
00261 }
00262 };
00263
00264 class mpdf;
00265
00267
00268 class epdf : public root
00269 {
00270 protected:
00272 int dim;
00274 RV rv;
00275
00276 public:
00288 epdf() : dim (0), rv() {};
00289 epdf (const epdf &e) : dim (e.dim), rv (e.rv) {};
00290 epdf (const RV &rv0) : dim (rv0._dsize()) {
00291 set_rv (rv0);
00292 };
00293 void set_parameters (int dim0) {
00294 dim = dim0;
00295 }
00297
00300
00302 virtual vec sample() const {
00303 bdm_error ("not implemented");
00304 return vec();
00305 }
00306
00308 virtual mat sample_m (int N) const;
00309
00312 virtual double evallog (const vec &val) const {
00313 bdm_error ("not implemented");
00314 return 0.0;
00315 }
00316
00318 virtual vec evallog_m (const mat &Val) const;
00319
00321 virtual vec evallog_m (const Array<vec> &Avec) const;
00322
00324 virtual shared_ptr<mpdf> condition (const RV &rv) const;
00325
00327 virtual shared_ptr<epdf> marginal (const RV &rv) const;
00328
00330 virtual vec mean() const {
00331 bdm_error ("not implemneted");
00332 return vec();
00333 }
00334
00336 virtual vec variance() const {
00337 bdm_error ("not implemneted");
00338 return vec();
00339 }
00340
00342 virtual void qbounds (vec &lb, vec &ub, double percentage = 0.95) const {
00343 vec mea = mean();
00344 vec std = sqrt (variance());
00345 lb = mea - 2 * std;
00346 ub = mea + 2 * std;
00347 };
00349
00355
00357 void set_rv (const RV &rv0) {
00358 rv = rv0;
00359 }
00360
00362 bool isnamed() const {
00363 bool b = (dim == rv._dsize());
00364 return b;
00365 }
00366
00368 const RV& _rv() const {
00369 bdm_assert_debug (isnamed(), "");
00370 return rv;
00371 }
00373
00376
00378 int dimension() const {
00379 return dim;
00380 }
00388 void from_setting (const Setting &set) {
00389 shared_ptr<RV> r = UI::build<RV> ( set, "rv", UI::optional );
00390 if (r) {
00391 set_rv (*r);
00392 }
00393 }
00394
00395 };
00396 SHAREDPTR(epdf);
00397
00399 class mpdf : public root
00400 {
00401 protected:
00403 int dimc;
00405 RV rvc;
00406 private:
00408 epdf* ep;
00409
00410 protected:
00412 void set_ep (epdf &iepdf) {
00413 ep = &iepdf;
00414 }
00416 void set_ep (epdf *iepdfp) {
00417 ep = iepdfp;
00418 }
00419
00420 public:
00423
00424 mpdf() : dimc (0), rvc(), ep (NULL) { }
00425
00426 mpdf (const mpdf &m) : dimc (m.dimc), rvc (m.rvc), ep (m.ep) { }
00428
00431
00433 virtual vec samplecond (const vec &cond) {
00434 bdm_error ("Not implemented");
00435 return vec();
00436 }
00437
00439 virtual mat samplecond_m (const vec &cond, int N);
00440
00442 virtual double evallogcond (const vec &dt, const vec &cond) {
00443 bdm_error ("Not implemented");
00444 return 0.0;
00445 }
00446
00448 virtual vec evallogcond_m (const mat &Dt, const vec &cond) {
00449 vec v(Dt.cols());
00450 for(int i=0;i<Dt.cols();i++){v(i)= evallogcond(Dt.get_col(i),cond);}
00451 return v;
00452 }
00453
00455 virtual vec evallogcond_m (const Array<vec> &Dt, const vec &cond) {
00456 bdm_error ("Not implemented");
00457 return vec();
00458 }
00459
00462
00463 RV _rv() const {
00464 return ep->_rv();
00465 }
00466 RV _rvc() {
00467 return rvc;
00468 }
00469
00470 int dimension() const {
00471 return ep->dimension();
00472 }
00473 int dimensionc() {
00474 return dimc;
00475 }
00476
00486 void from_setting (const Setting &set);
00488
00491 void set_rvc (const RV &rvc0) {
00492 rvc = rvc0;
00493 }
00494 void set_rv (const RV &rv0) {
00495 ep->set_rv (rv0);
00496 }
00497 bool isnamed() {
00498 return (ep->isnamed()) && (dimc == rvc._dsize());
00499 }
00501 };
00502 SHAREDPTR(mpdf);
00503
00505 template <class EPDF>
00506 class mpdf_internal: public mpdf
00507 {
00508 protected :
00510 EPDF iepdf;
00511 public:
00513 mpdf_internal() : mpdf(), iepdf() {set_ep (iepdf);}
00514
00517 virtual void condition (const vec &cond) {
00518 bdm_error ("Not implemented");
00519 }
00520
00522 EPDF& e() {return iepdf;}
00523
00525 vec samplecond (const vec &cond);
00527 double evallogcond (const vec &val, const vec &cond);
00529 virtual vec evallogcond_m (const mat &Dt, const vec &cond);
00531 virtual vec evallogcond_m (const Array<vec> &Dt, const vec &cond);
00533 virtual mat samplecond_m (const vec &cond, int N);
00534 };
00535
00561 class datalink
00562 {
00563 protected:
00565 int downsize;
00566
00568 int upsize;
00569
00571 ivec v2v_up;
00572
00573 public:
00575 datalink() : downsize (0), upsize (0) { }
00576
00578 datalink (const RV &rv, const RV &rv_up) {
00579 set_connection (rv, rv_up);
00580 }
00581
00583 void set_connection (const RV &rv, const RV &rv_up);
00584
00586 void set_connection (int ds, int us, const ivec &upind);
00587
00589 vec pushdown (const vec &val_up) {
00590 bdm_assert_debug (upsize == val_up.length(), "Wrong val_up");
00591 return get_vec (val_up, v2v_up);
00592 }
00593
00595 void pushup (vec &val_up, const vec &val) {
00596 bdm_assert_debug (downsize == val.length(), "Wrong val");
00597 bdm_assert_debug (upsize == val_up.length(), "Wrong val_up");
00598 set_subvector (val_up, v2v_up, val);
00599 }
00600 };
00601
00603 class datalink_m2e: public datalink
00604 {
00605 protected:
00607 int condsize;
00608
00610 ivec v2c_up;
00611
00613 ivec v2c_lo;
00614
00615 public:
00617 datalink_m2e() : condsize (0) { }
00618
00620 void set_connection (const RV &rv, const RV &rvc, const RV &rv_up);
00621
00623 vec get_cond (const vec &val_up);
00624
00626 void pushup_cond (vec &val_up, const vec &val, const vec &cond);
00627 };
00628
00631 class datalink_m2m: public datalink_m2e
00632 {
00633 protected:
00635 ivec c2c_up;
00637 ivec c2c_lo;
00638
00639 public:
00641 datalink_m2m() {};
00643 void set_connection (const RV &rv, const RV &rvc, const RV &rv_up, const RV &rvc_up) {
00644 datalink_m2e::set_connection (rv, rvc, rv_up);
00645
00646 rvc.dataind (rvc_up, c2c_lo, c2c_up);
00647 bdm_assert_debug (c2c_lo.length() + v2c_lo.length() == condsize, "cond is not fully given");
00648 }
00649
00651 vec get_cond (const vec &val_up, const vec &cond_up) {
00652 vec tmp (condsize);
00653 set_subvector (tmp, v2c_lo, val_up (v2c_up));
00654 set_subvector (tmp, c2c_lo, cond_up (c2c_up));
00655 return tmp;
00656 }
00658
00659 };
00660
00666 class logger : public root
00667 {
00668 protected:
00670 Array<RV> entries;
00672 Array<string> names;
00673 public:
00675 logger() : entries (0), names (0) {}
00676
00679 virtual int add (const RV &rv, string prefix = "") {
00680 int id;
00681 if (rv._dsize() > 0) {
00682 id = entries.length();
00683 names = concat (names, prefix);
00684 entries.set_length (id + 1, true);
00685 entries (id) = rv;
00686 } else {
00687 id = -1;
00688 }
00689 return id;
00690 }
00691
00693 virtual void logit (int id, const vec &v) = 0;
00695 virtual void logit (int id, const double &d) = 0;
00696
00698 virtual void step() = 0;
00699
00701 virtual void finalize() {};
00702
00704 virtual void init() {};
00705
00706 };
00707
00711 class mepdf : public mpdf
00712 {
00714 shared_ptr<epdf> iepdf;
00715 public:
00717 mepdf() { }
00719 mepdf (shared_ptr<epdf> em) {
00720 iepdf = em;
00721 set_ep (*iepdf.get());
00722 dimc = 0;
00723 }
00724
00726 vec samplecond(const vec &cond){return iepdf->sample();}
00727 double evallogcond(const vec &val, const vec &cond){return iepdf->evallog(val);}
00728
00736 void from_setting (const Setting &set);
00737 };
00738 UIREGISTER (mepdf);
00739 SHAREDPTR (mepdf);
00740
00742 RV get_composite_rv ( const Array<shared_ptr<mpdf> > &mpdfs, bool checkoverlap = false );
00743
00751 class DS : public root
00752 {
00753 protected:
00754 int dtsize;
00755 int utsize;
00757 RV Drv;
00759 RV Urv;
00761 int L_dt, L_ut;
00762 public:
00764 DS() : Drv(), Urv() {};
00765
00767 virtual void getdata (vec &dt) {
00768 bdm_error ("abstract class");
00769 }
00770
00772 virtual void getdata (vec &dt, const ivec &indeces) {
00773 bdm_error ("abstract class");
00774 }
00775
00777 virtual void write (vec &ut) {
00778 bdm_error ("abstract class");
00779 }
00780
00782 virtual void write (vec &ut, const ivec &indeces) {
00783 bdm_error ("abstract class");
00784 }
00785
00787 virtual void step() = 0;
00788
00790 virtual void log_add (logger &L) {
00791 bdm_assert_debug (dtsize == Drv._dsize(), "invalid DS: dtsize different from Drv");
00792 bdm_assert_debug (utsize == Urv._dsize(), "invalid DS: utsize different from Urv");
00793
00794 L_dt = L.add (Drv, "");
00795 L_ut = L.add (Urv, "");
00796 }
00798 virtual void logit (logger &L) {
00799 vec tmp (Drv._dsize() + Urv._dsize());
00800 getdata (tmp);
00801
00802 L.logit (L_dt, tmp.left (Drv._dsize()));
00803
00804 L.logit (L_ut, tmp.mid (Drv._dsize(), Urv._dsize()));
00805 }
00807 virtual RV _drv() const {
00808 return concat (Drv, Urv);
00809 }
00811 const RV& _urv() const {
00812 return Urv;
00813 }
00815 virtual void set_drv (const RV &drv, const RV &urv) {
00816 Drv = drv;
00817 Urv = urv;
00818 }
00819 };
00820
00842 class BM : public root
00843 {
00844 protected:
00846 RV drv;
00848 double ll;
00850 bool evalll;
00851 public:
00854
00855 BM() : ll (0), evalll (true), LIDs (4), LFlags (4) {
00856 LIDs = -1;
00857 LFlags = 0;
00858 LFlags (0) = 1;
00859 };
00860 BM (const BM &B) : drv (B.drv), ll (B.ll), evalll (B.evalll) {}
00863 virtual BM* _copy_() const {
00864 return NULL;
00865 };
00867
00870
00874 virtual void bayes (const vec &dt) = 0;
00876 virtual void bayesB (const mat &Dt);
00879 virtual double logpred (const vec &dt) const {
00880 bdm_error ("Not implemented");
00881 return 0.0;
00882 }
00883
00885 vec logpred_m (const mat &dt) const {
00886 vec tmp (dt.cols());
00887 for (int i = 0; i < dt.cols(); i++) {
00888 tmp (i) = logpred (dt.get_col (i));
00889 }
00890 return tmp;
00891 }
00892
00894 virtual epdf* epredictor() const {
00895 bdm_error ("Not implemented");
00896 return NULL;
00897 };
00899 virtual mpdf* predictor() const {
00900 bdm_error ("Not implemented");
00901 return NULL;
00902 };
00904
00909
00911 RV rvc;
00913 const RV& _rvc() const {
00914 return rvc;
00915 }
00916
00918 virtual void condition (const vec &val) {
00919 bdm_error ("Not implemented!");
00920 }
00921
00923
00924
00927
00928 const RV& _drv() const {
00929 return drv;
00930 }
00931 void set_drv (const RV &rv) {
00932 drv = rv;
00933 }
00934 void set_rv (const RV &rv) {
00935 const_cast<epdf&> (posterior()).set_rv (rv);
00936 }
00937 double _ll() const {
00938 return ll;
00939 }
00940 void set_evalll (bool evl0) {
00941 evalll = evl0;
00942 }
00943 virtual const epdf& posterior() const = 0;
00945
00948
00950 virtual void set_options (const string &opt) {
00951 LFlags (0) = 1;
00952 if (opt.find ("logbounds") != string::npos) {
00953 LFlags (1) = 1;
00954 LFlags (2) = 1;
00955 }
00956 if (opt.find ("logll") != string::npos) {
00957 LFlags (3) = 1;
00958 }
00959 }
00961 ivec LIDs;
00962
00964 ivec LFlags;
00966 virtual void log_add (logger &L, const string &name = "") {
00967
00968 RV r;
00969 if (posterior().isnamed()) {
00970 r = posterior()._rv();
00971 } else {
00972 r = RV ("est", posterior().dimension());
00973 };
00974
00975
00976 if (LFlags (0)) LIDs (0) = L.add (r, name + "mean_");
00977 if (LFlags (1)) LIDs (1) = L.add (r, name + "lb_");
00978 if (LFlags (2)) LIDs (2) = L.add (r, name + "ub_");
00979 if (LFlags (3)) LIDs (3) = L.add (RV ("ll", 1), name);
00980 }
00981 virtual void logit (logger &L) {
00982 L.logit (LIDs (0), posterior().mean());
00983 if (LFlags (1) || LFlags (2)) {
00984 vec ub, lb;
00985 posterior().qbounds (lb, ub);
00986 L.logit (LIDs (1), lb);
00987 L.logit (LIDs (2), ub);
00988 }
00989 if (LFlags (3)) L.logit (LIDs (3), ll);
00990 }
00992 void from_setting (const Setting &set){
00993 shared_ptr<RV> r = UI::build<RV> ( set, "drv", UI::optional );
00994 if (r) {
00995 set_drv (*r);
00996 }
00997 string opt;
00998 if(!UI::get(opt, set, "options", UI::optional)) {
00999 set_options(opt);
01000 }
01001 }
01002
01003 };
01004
01005 typedef Array<shared_ptr<epdf> > epdf_array;
01006
01007 typedef Array<shared_ptr<mpdf> > mpdf_array;
01008
01009 template<class EPDF>
01010 vec mpdf_internal<EPDF>::samplecond (const vec &cond)
01011 {
01012 condition (cond);
01013 vec temp = iepdf.sample();
01014 return temp;
01015 }
01016
01017 template<class EPDF>
01018 mat mpdf_internal<EPDF>::samplecond_m (const vec &cond, int N)
01019 {
01020 condition (cond);
01021 mat temp (dimension(), N);
01022 vec smp (dimension());
01023 for (int i = 0; i < N; i++) {
01024 smp = iepdf.sample();
01025 temp.set_col (i, smp);
01026 }
01027
01028 return temp;
01029 }
01030
01031 template<class EPDF>
01032 double mpdf_internal<EPDF>::evallogcond (const vec &dt, const vec &cond)
01033 {
01034 double tmp;
01035 condition (cond);
01036 tmp = iepdf.evallog (dt);
01037 return tmp;
01038 }
01039
01040 template<class EPDF>
01041 vec mpdf_internal<EPDF>::evallogcond_m (const mat &Dt, const vec &cond)
01042 {
01043 condition (cond);
01044 return iepdf.evallog_m (Dt);
01045 }
01046
01047 template<class EPDF>
01048 vec mpdf_internal<EPDF>::evallogcond_m (const Array<vec> &Dt, const vec &cond)
01049 {
01050 condition (cond);
01051 return iepdf.evallog_m (Dt);
01052 }
01053
01054 };
01055 #endif // BDMBASE_H