/*! \file \brief Mergers for combination of pdfs \author Vaclav Smidl. ----------------------------------- BDM++ - C++ library for Bayesian Decision Making under Uncertainty Using IT++ for numerical operations ----------------------------------- */ #ifndef MERGER_H #define MERGER_H #include "../estim/mixtures.h" #include "discrete.h" namespace bdm { using std::string; //!Merging methods enum MERGER_METHOD {ARITHMETIC = 1, GEOMETRIC = 2, LOGNORMAL = 3}; /*! @brief Base class for general combination of pdfs on discrete support Mixtures of Gaussian densities are used internally. Switching to other densities should be trivial. The merged pdfs are expected to be of the form: \f[ f(x_i|y_i), i=1..n \f] where the resulting merger is a density on \f$ \cup [x_i,y_i] \f$ . Note that all variables will be joined. As a result of this feature, each source must be extended to common support \f[ f(z_i|y_i,x_i) f(x_i|y_i) f(y_i) i=1..n \f] where \f$ z_i \f$ accumulate variables that were not in the original source. These extensions are calculated on-the-fly. However, these operations can not be performed in general. Hence, this class merges only sources on common support, \f$ y_i={}, z_i={}, \forall i \f$. For merging of more general cases, use offsprings merger_mix and merger_grid. */ class merger_base : public epdf { protected: //! Elements of composition Array > mpdfs; //! Data link for each mpdf in mpdfs Array dls; //! Array of rvs that are not modelled by mpdfs at all, \f$ z_i \f$ Array rvzs; //! Data Links for extension \f$ f(z_i|x_i,y_i) \f$ Array zdls; //! number of support points int Npoints; //! number of sources int Nsources; //! switch of the methoh used for merging MERGER_METHOD METHOD; //! Default for METHOD static const MERGER_METHOD DFLT_METHOD; //!Prior on the log-normal merging model double beta; //! default for beta static const double DFLT_beta; //! Projection to empirical density (could also be piece-wise linear) eEmp eSmp; //! debug or not debug bool DBG; //! debugging file it_file* dbg_file; public: //! \name Constructors //! @{ //! Default constructor merger_base () : Npoints(0), Nsources(0), DBG(false), dbg_file(0) { } //!Constructor from sources merger_base ( const Array > &S ); //! Function setting the main internal structures void set_sources ( const Array > &Sources ) { mpdfs = Sources; Nsources = mpdfs.length(); //set sizes dls.set_size ( Sources.length() ); rvzs.set_size ( Sources.length() ); zdls.set_size ( Sources.length() ); rv = get_composite_rv ( mpdfs, /* checkoverlap = */ false ); RV rvc; // Extend rv by rvc! for ( int i = 0; i < mpdfs.length(); i++ ) { RV rvx = mpdfs ( i )->_rvc().subt ( rv ); rvc.add ( rvx ); // add rv to common rvc } // join rv and rvc - see descriprion rv.add ( rvc ); // get dimension dim = rv._dsize(); // create links between sources and common rv RV xytmp; for ( int i = 0; i < mpdfs.length(); i++ ) { //Establich connection between mpdfs and merger dls ( i ) = new datalink_m2e; dls ( i )->set_connection ( mpdfs ( i )->_rv(), mpdfs ( i )->_rvc(), rv ); // find out what is missing in each mpdf xytmp = mpdfs ( i )->_rv(); xytmp.add ( mpdfs ( i )->_rvc() ); // z_i = common_rv-xy rvzs ( i ) = rv.subt ( xytmp ); //establish connection between extension (z_i|x,y)s and common rv zdls ( i ) = new datalink_m2e; zdls ( i )->set_connection ( rvzs ( i ), xytmp, rv ) ; }; } //! Set support points from rectangular grid void set_support ( rectangular_support &Sup) { Npoints = Sup.points(); eSmp.set_parameters ( Npoints, false ); Array &samples = eSmp._samples(); eSmp._w() = ones ( Npoints ) / Npoints; //unifrom size of bins //set samples samples(0)=Sup.first_vec(); for (int j=1; j < Npoints; j++ ) { samples ( j ) = Sup.next_vec(); } } //! Set support points from dicrete grid void set_support ( discrete_support &Sup) { Npoints = Sup.points(); eSmp.set_parameters(Sup._Spoints()); } //! set debug file void set_debug_file ( const string fname ) { if ( DBG ) delete dbg_file; dbg_file = new it_file ( fname ); if ( dbg_file ) DBG = true; } //! Set internal parameters used in approximation void set_method ( MERGER_METHOD MTH = DFLT_METHOD, double beta0 = DFLT_beta ) { METHOD = MTH; beta = beta0; } //! Set support points from a pdf by drawing N samples void set_support ( const epdf &overall, int N ) { eSmp.set_statistics ( overall, N ); Npoints = N; } //! Destructor virtual ~merger_base() { for ( int i = 0; i < Nsources; i++ ) { delete dls ( i ); delete zdls ( i ); } if ( DBG ) delete dbg_file; }; //!@} //! \name Mathematical operations //!@{ //!Merge given sources in given points virtual void merge () { validate(); //check if sources overlap: bool OK = true; for ( int i = 0; i < mpdfs.length(); i++ ) { OK &= ( rvzs ( i )._dsize() == 0 ); // z_i is empty OK &= ( mpdfs ( i )->_rvc()._dsize() == 0 ); // y_i is empty } if ( OK ) { mat lW = zeros ( mpdfs.length(), eSmp._w().length() ); vec emptyvec ( 0 ); for ( int i = 0; i < mpdfs.length(); i++ ) { for ( int j = 0; j < eSmp._w().length(); j++ ) { lW ( i, j ) = mpdfs ( i )->evallogcond ( eSmp._samples() ( j ), emptyvec ); } } vec w_nn = merge_points ( lW ); vec wtmp = exp ( w_nn - max ( w_nn ) ); //renormalize eSmp._w() = wtmp / sum ( wtmp ); } else { bdm_error ( "Sources are not compatible - use merger_mix" ); } }; //! Merge log-likelihood values in points using method specified by parameter METHOD vec merge_points ( mat &lW ); //! sample from merged density //! weight w is a vec mean() const { const Vec &w = eSmp._w(); const Array &S = eSmp._samples(); vec tmp = zeros ( dim ); for ( int i = 0; i < Npoints; i++ ) { tmp += w ( i ) * S ( i ); } return tmp; } mat covariance() const { const vec &w = eSmp._w(); const Array &S = eSmp._samples(); vec mea = mean(); // cout << sum (w) << "," << w*w << endl; mat Tmp = zeros ( dim, dim ); for ( int i = 0; i < Npoints; i++ ) { Tmp += w ( i ) * outer_product ( S ( i ), S ( i ) ); } return Tmp - outer_product ( mea, mea ); } vec variance() const { const vec &w = eSmp._w(); const Array &S = eSmp._samples(); vec tmp = zeros ( dim ); for ( int i = 0; i < Nsources; i++ ) { tmp += w ( i ) * pow ( S ( i ), 2 ); } return tmp - pow ( mean(), 2 ); } //!@} //! \name Access to attributes //! @{ //! Access function eEmp& _Smp() { return eSmp; } //! load from setting void from_setting ( const Setting& set ) { // get support // find which method to use string meth_str; UI::get ( meth_str, set, "method", UI::compulsory ); if ( !strcmp ( meth_str.c_str(), "arithmetic" ) ) set_method ( ARITHMETIC ); else { if ( !strcmp ( meth_str.c_str(), "geometric" ) ) set_method ( GEOMETRIC ); else if ( !strcmp ( meth_str.c_str(), "lognormal" ) ) { set_method ( LOGNORMAL ); set.lookupValue ( "beta", beta ); } } string dbg_file; if ( UI::get ( dbg_file, set, "dbg_file" ) ) set_debug_file ( dbg_file ); //validate() - not used } void validate() { bdm_assert ( eSmp._w().length() > 0, "Empty support, use set_support()." ); bdm_assert ( dim == eSmp._samples() ( 0 ).length(), "Support points and rv are not compatible!" ); bdm_assert ( isnamed(), "mergers must be named" ); } //!@} }; UIREGISTER ( merger_base ); SHAREDPTR ( merger_base ); //! Merger using importance sampling with mixture proposal density class merger_mix : public merger_base { protected: //!Internal mixture of EF models MixEF Mix; //!Number of components in a mixture int Ncoms; //! coefficient of resampling [0,1] double effss_coef; //! stop after niter iterations int stop_niter; //! default value for Ncoms static const int DFLT_Ncoms; //! default value for efss_coef; static const double DFLT_effss_coef; public: //!\name Constructors //!@{ merger_mix ():Ncoms(0), effss_coef(0), stop_niter(0) { } merger_mix ( const Array > &S ): Ncoms(0), effss_coef(0), stop_niter(0) { set_sources ( S ); } //! Set sources and prepare all internal structures void set_sources ( const Array > &S ) { merger_base::set_sources ( S ); Nsources = S.length(); } //! Set internal parameters used in approximation void set_parameters ( int Ncoms0 = DFLT_Ncoms, double effss_coef0 = DFLT_effss_coef ) { Ncoms = Ncoms0; effss_coef = effss_coef0; } //!@} //! \name Mathematical operations //!@{ //!Merge values using mixture approximation void merge (); //! sample from the approximating mixture vec sample () const { return Mix.posterior().sample(); } //! loglikelihood computed on mixture models double evallog ( const vec &yt ) const { vec dtf = ones ( yt.length() + 1 ); dtf.set_subvector ( 0, yt ); return Mix.logpred ( dtf ); } //!@} //!\name Access functions //!@{ //! Access function MixEF& _Mix() { return Mix; } //! Access function emix* proposal() { emix* tmp = Mix.epredictor(); tmp->set_rv ( rv ); return tmp; } //! from_settings void from_setting ( const Setting& set ) { merger_base::from_setting ( set ); set.lookupValue ( "ncoms", Ncoms ); set.lookupValue ( "effss_coef", effss_coef ); set.lookupValue ( "stop_niter", stop_niter ); } //! @} }; UIREGISTER ( merger_mix ); SHAREDPTR ( merger_mix ); } #endif // MER_H