| 21 | | using std::string; |
| 22 | | |
| 23 | | /*! |
| 24 | | @brief Function for general combination of pdfs |
| 25 | | |
| 26 | | Mixtures of Gaussian densities are used internally. Switching to other densities should be trivial. |
| 27 | | */ |
| 28 | | |
| 29 | | class merger : public compositepdf, public epdf |
| 30 | | { |
| 31 | | protected: |
| 32 | | //!Internal mixture of EF models |
| 33 | | MixEF Mix; |
| 34 | | //! Data link for each mpdf in mpdfs |
| 35 | | Array<datalink_m2e*> dls; |
| 36 | | //! Array of rvs that are not modelled by mpdfs at all (aux) |
| 37 | | Array<RV> rvzs; |
| 38 | | //! Data Links of rv0 mpdfs - these will be conditioned the (rv,rvc) of mpdfs |
| 39 | | Array<datalink_m2e*> zdls; |
| 40 | | |
| 41 | | //!Number of samples used in approximation |
| 42 | | int Ns; |
| 43 | | //!Number of components in a mixture |
| 44 | | int Nc; |
| 45 | | //!Prior on the log-normal merging model |
| 46 | | double beta; |
| 47 | | //! Projection to empirical density |
| 48 | | eEmp eSmp; |
| 49 | | //! coefficient of resampling |
| 50 | | double effss_coef; |
| 51 | | |
| 52 | | //! debug or not debug |
| 53 | | bool DBG; |
| 54 | | //! debugging file |
| 55 | | it_file* dbg; |
| 56 | | //! Flag if the samples are fixed or not |
| 57 | | bool fix_smp; |
| 58 | | public: |
| 59 | | //!Default constructor |
| 60 | | merger ( const Array<mpdf*> &S ) : |
| 61 | | compositepdf ( S ), epdf ( ), |
| 62 | | Mix ( Array<BMEF*> ( 0 ),vec ( 0 ) ), dls ( n ), rvzs ( n ), zdls ( n ), eSmp() |
| 63 | | { |
| 64 | | RV ztmp; |
| 65 | | rv = getrv ( false ); |
| 66 | | RV rvc; setrvc ( rv,rvc ); // Extend rv by rvc! |
| 67 | | rv.add ( rvc ); |
| 68 | | // get dimension |
| 69 | | dim = rv._dsize(); |
| 70 | | |
| 71 | | for ( int i=0;i<n;i++ ) |
| 72 | | { |
| 73 | | //Establich connection between mpdfs and merger |
| 74 | | dls ( i ) = new datalink_m2e;dls ( i )->set_connection ( mpdfs ( i )->_rv(), mpdfs ( i )->_rvc(), rv ); |
| 75 | | // find out what is missing in each mpdf |
| 76 | | ztmp= mpdfs ( i )->_rv(); |
| 77 | | ztmp.add ( mpdfs ( i )->_rvc() ); |
| 78 | | rvzs ( i ) =rv.subt ( ztmp ); |
| 79 | | zdls ( i ) = new datalink_m2e; zdls ( i )->set_connection ( rvzs ( i ), ztmp, rv ) ; |
| 80 | | }; |
| 81 | | //Set Default values of parameters |
| 82 | | beta=2.0; |
| 83 | | Ns=100; |
| 84 | | Nc=10; |
| 85 | | Mix.set_method ( EM ); |
| 86 | | DBG = false; |
| 87 | | fix_smp = false; |
| 88 | | } |
| 89 | | //! set debug file |
| 90 | | void debug_file ( const string fname ) { if ( DBG ) delete dbg; dbg = new it_file ( fname ); if ( dbg ) DBG=true;} |
| 91 | | //! Set internal parameters used in approximation |
| 92 | | void set_parameters ( double beta0, int Ns0, int Nc0, double effss_coef0=0.5 ) {beta=beta0; |
| 93 | | Ns=Ns0; |
| 94 | | Nc=Nc0; |
| 95 | | effss_coef=effss_coef0; |
| 96 | | eSmp.set_parameters ( Ns0,false ); |
| 97 | | } |
| 98 | | void set_grid ( Array<vec> &XYZ ) |
| 99 | | { |
| 100 | | int dim=XYZ.length(); ivec szs ( dim ); |
| 101 | | for(int i=0; i<dim;i++){szs=XYZ(i).length();} |
| 102 | | Ns=prod(szs); |
| 103 | | eSmp.set_parameters(Ns,false); |
| 104 | | Array<vec> &samples=eSmp._samples(); |
| 105 | | eSmp._w()=ones(Ns)/Ns; |
| 106 | | |
| 107 | | //set samples |
| 108 | | ivec is=zeros_i(dim);//indeces of dimensions in for cycle; |
| 109 | | vec smpi(dim); |
| 110 | | for(int i=0; i<Ns; i++){ |
| 111 | | for(int j=0; j<dim; j++){smpi(j)=XYZ(j)(is(j)); /* jty vector*/ } |
| 112 | | samples(i)=smpi; |
| 113 | | // shift indeces |
| 114 | | for (int j=0;j<dim;j++){ |
| 115 | | if (is(j)==szs(j)-1) { //j-th index is full |
| 116 | | is(j)=0; //shift back |
| 117 | | is(j+1)++; //increase th next dimension; |
| 118 | | if (is(j+1)<szs(j+1)-1) break; |
| 119 | | } else { |
| 120 | | is(j)++; break; |
| 121 | | } |
| | 21 | using std::string; |
| | 22 | |
| | 23 | //!Merging methods |
| | 24 | enum MERGER_METHOD {ARITHMETIC = 1, GEOMETRIC = 2, LOGNORMAL = 3}; |
| | 25 | |
| | 26 | /*! |
| | 27 | @brief Base class for general combination of pdfs on discrete support |
| | 28 | |
| | 29 | Mixtures of Gaussian densities are used internally. Switching to other densities should be trivial. |
| | 30 | |
| | 31 | The merged pdfs are expected to be of the form: |
| | 32 | \f[ f(x_i|y_i), i=1..n \f] |
| | 33 | where the resulting merger is a density on \f$ \cup [x_i,y_i] \f$ . |
| | 34 | Note that all variables will be joined. |
| | 35 | |
| | 36 | As a result of this feature, each source must be extended to common support |
| | 37 | \f[ f(z_i|y_i,x_i) f(x_i|y_i) f(y_i) i=1..n \f] |
| | 38 | where \f$ z_i \f$ accumulate variables that were not in the original source. |
| | 39 | These extensions are calculated on-the-fly. |
| | 40 | |
| | 41 | However, these operations can not be performed in general. Hence, this class merges only sources on common support, \f$ y_i={}, z_i={}, \forall i \f$. |
| | 42 | For merging of more general cases, use offsprings merger_mix and merger_grid. |
| | 43 | */ |
| | 44 | |
| | 45 | class merger_base : public compositepdf, public epdf |
| | 46 | { |
| | 47 | protected: |
| | 48 | //! Data link for each mpdf in mpdfs |
| | 49 | Array<datalink_m2e*> dls; |
| | 50 | //! Array of rvs that are not modelled by mpdfs at all, \f$ z_i \f$ |
| | 51 | Array<RV> rvzs; |
| | 52 | //! Data Links for extension \f$ f(z_i|x_i,y_i) \f$ |
| | 53 | Array<datalink_m2e*> zdls; |
| | 54 | //! number of support points |
| | 55 | int Npoints; |
| | 56 | //! number of sources |
| | 57 | int Nsources; |
| | 58 | |
| | 59 | //! switch of the methoh used for merging |
| | 60 | MERGER_METHOD METHOD; |
| | 61 | //!Prior on the log-normal merging model |
| | 62 | double beta; |
| | 63 | |
| | 64 | //! Projection to empirical density (could also be piece-wise linear) |
| | 65 | eEmp eSmp; |
| | 66 | |
| | 67 | //! debug or not debug |
| | 68 | bool DBG; |
| | 69 | |
| | 70 | //! debugging file |
| | 71 | it_file* dbg_file; |
| | 72 | public: |
| | 73 | //! \name Constructors |
| | 74 | //! @{ |
| | 75 | |
| | 76 | //!Empty constructor |
| | 77 | merger_base () : compositepdf() {DBG=false;dbg_file=NULL;}; |
| | 78 | //!Constructor from sources |
| | 79 | merger_base (const Array<mpdf*> &S) {set_sources (S);}; |
| | 80 | //! Function setting the main internal structures |
| | 81 | void set_sources (const Array<mpdf*> &Sources) { |
| | 82 | compositepdf::set_elements (Sources); |
| | 83 | //set sizes |
| | 84 | dls.set_size (Sources.length()); |
| | 85 | rvzs.set_size (Sources.length()); |
| | 86 | zdls.set_size (Sources.length()); |
| | 87 | |
| | 88 | rv = getrv (/* checkoverlap = */ false); |
| | 89 | RV rvc; setrvc (rv, rvc); // Extend rv by rvc! |
| | 90 | // join rv and rvc - see descriprion |
| | 91 | rv.add (rvc); |
| | 92 | // get dimension |
| | 93 | dim = rv._dsize(); |
| | 94 | |
| | 95 | // create links between sources and common rv |
| | 96 | RV xytmp; |
| | 97 | for (int i = 0;i < mpdfs.length();i++) { |
| | 98 | //Establich connection between mpdfs and merger |
| | 99 | dls (i) = new datalink_m2e; |
| | 100 | dls (i)->set_connection (mpdfs (i)->_rv(), mpdfs (i)->_rvc(), rv); |
| | 101 | |
| | 102 | // find out what is missing in each mpdf |
| | 103 | xytmp = mpdfs (i)->_rv(); |
| | 104 | xytmp.add (mpdfs (i)->_rvc()); |
| | 105 | // z_i = common_rv-xy |
| | 106 | rvzs (i) = rv.subt (xytmp); |
| | 107 | //establish connection between extension (z_i|x,y)s and common rv |
| | 108 | zdls (i) = new datalink_m2e; zdls (i)->set_connection (rvzs (i), xytmp, rv) ; |
| | 109 | }; |
| | 110 | } |
| | 111 | //! set debug file |
| | 112 | void set_debug_file (const string fname) { |
| | 113 | if (DBG) delete dbg_file; |
| | 114 | dbg_file = new it_file (fname); |
| | 115 | if (dbg_file) DBG = true; |
| | 116 | } |
| | 117 | //! Set internal parameters used in approximation |
| | 118 | void set_method (MERGER_METHOD MTH, double beta0=0.0) { |
| | 119 | METHOD = MTH; |
| | 120 | beta = beta0; |
| | 121 | } |
| | 122 | //! Set support points from a pdf by drawing N samples |
| | 123 | void set_support(const epdf &overall, int N){ |
| | 124 | it_assert_debug ( rv.equal ( overall._rv() ),"Incompatible parameter overall!" ); |
| | 125 | eSmp.set_statistics(&overall,N); |
| | 126 | Npoints=N; |
| | 127 | } |
| | 128 | |
| | 129 | //! Destructor |
| | 130 | virtual ~merger_base() { |
| | 131 | for (int i = 0; i < Nsources; i++) { |
| | 132 | delete dls (i); |
| | 133 | delete zdls (i); |
| | 134 | } |
| | 135 | if (DBG) delete dbg_file; |
| | 136 | }; |
| | 137 | //!@} |
| | 138 | |
| | 139 | //! \name Mathematical operations |
| | 140 | //!@{ |
| | 141 | |
| | 142 | //!Merge given sources in given points |
| | 143 | void merge () { |
| | 144 | if (eSmp._w().length() ==0) {it_error("Empty support points use set_support" );} |
| | 145 | //check if sources overlap: |
| | 146 | bool OK = true; |
| | 147 | for (int i = 0;i < mpdfs.length(); i++) { |
| | 148 | OK &= (rvzs (i)._dsize() == 0); // z_i is empty |
| | 149 | OK &= (mpdfs (i)->_rvc()._dsize() == 0); // y_i is empty |
| | 150 | } |
| | 151 | |
| | 152 | if (OK) { |
| | 153 | mat lW = zeros (mpdfs.length(), eSmp._w().length()); |
| | 154 | |
| | 155 | vec emptyvec (0); |
| | 156 | for (int i = 0; i < mpdfs.length(); i++) { |
| | 157 | for (int j = 0; j < eSmp._w().length(); j++) { |
| | 158 | lW (i, j) = mpdfs (i)->evallogcond (eSmp._samples() (j), emptyvec); |
| 143 | | vec sample ( ) const { return Mix.posterior().sample();} |
| 144 | | double evallog ( const vec &dt ) const |
| 145 | | { |
| 146 | | vec dtf=ones ( dt.length() +1 ); |
| 147 | | dtf.set_subvector ( 0,dt ); |
| 148 | | return Mix.logpred ( dtf ); |
| 149 | | } |
| 150 | | vec mean() const |
| 151 | | { |
| 152 | | const Vec<double> &w = eSmp._w(); |
| 153 | | const Array<vec> &S = eSmp._samples(); |
| 154 | | vec tmp=zeros ( dim ); |
| 155 | | for ( int i=0; i<Ns; i++ ) |
| 156 | | { |
| 157 | | tmp+=w ( i ) *S ( i ); |
| 158 | | } |
| 159 | | return tmp; |
| 160 | | } |
| 161 | | mat covariance() const |
| 162 | | { |
| 163 | | const vec &w = eSmp._w(); |
| 164 | | const Array<vec> &S = eSmp._samples(); |
| 165 | | |
| 166 | | vec mea = mean(); |
| 167 | | |
| 168 | | cout << sum ( w ) << "," << w*w <<endl; |
| 169 | | |
| 170 | | mat Tmp=zeros ( dim, dim ); |
| 171 | | for ( int i=0; i<Ns; i++ ) |
| 172 | | { |
| 173 | | Tmp+=w ( i ) *outer_product ( S ( i ), S ( i ) ); |
| 174 | | } |
| 175 | | return Tmp-outer_product ( mea,mea ); |
| 176 | | } |
| 177 | | vec variance() const |
| 178 | | { |
| 179 | | const vec &w = eSmp._w(); |
| 180 | | const Array<vec> &S = eSmp._samples(); |
| 181 | | |
| 182 | | vec tmp=zeros ( dim ); |
| 183 | | for ( int i=0; i<Ns; i++ ) |
| 184 | | { |
| 185 | | tmp+=w ( i ) *pow ( S ( i ),2 ); |
| 186 | | } |
| 187 | | return tmp-pow ( mean(),2 ); |
| 188 | | } |
| 189 | | //! for future use |
| 190 | | virtual ~merger() |
| 191 | | { |
| 192 | | for ( int i=0; i<n; i++ ) |
| 193 | | { |
| 194 | | delete dls ( i ); |
| 195 | | delete zdls ( i ); |
| 196 | | } |
| 197 | | if ( DBG ) delete dbg; |
| 198 | | }; |
| 199 | | |
| | 178 | vec mean() const { |
| | 179 | const Vec<double> &w = eSmp._w(); |
| | 180 | const Array<vec> &S = eSmp._samples(); |
| | 181 | vec tmp = zeros (dim); |
| | 182 | for (int i = 0; i < Npoints; i++) { |
| | 183 | tmp += w (i) * S (i); |
| | 184 | } |
| | 185 | return tmp; |
| | 186 | } |
| | 187 | mat covariance() const { |
| | 188 | const vec &w = eSmp._w(); |
| | 189 | const Array<vec> &S = eSmp._samples(); |
| | 190 | |
| | 191 | vec mea = mean(); |
| | 192 | |
| | 193 | cout << sum (w) << "," << w*w << endl; |
| | 194 | |
| | 195 | mat Tmp = zeros (dim, dim); |
| | 196 | for (int i = 0; i < Npoints; i++) { |
| | 197 | Tmp += w (i) * outer_product (S (i), S (i)); |
| | 198 | } |
| | 199 | return Tmp -outer_product (mea, mea); |
| | 200 | } |
| | 201 | vec variance() const { |
| | 202 | const vec &w = eSmp._w(); |
| | 203 | const Array<vec> &S = eSmp._samples(); |
| | 204 | |
| | 205 | vec tmp = zeros (dim); |
| | 206 | for (int i = 0; i < Nsources; i++) { |
| | 207 | tmp += w (i) * pow (S (i), 2); |
| | 208 | } |
| | 209 | return tmp -pow (mean(), 2); |
| | 210 | } |
| | 211 | //!@} |
| | 212 | |
| | 213 | //! \name Access to attributes |
| | 214 | //! @{ |
| | 215 | |
| | 216 | //! Access function |
| | 217 | eEmp& _Smp() {return eSmp;} |
| | 218 | |
| | 219 | //!@} |
| | 220 | }; |
| | 221 | |
| | 222 | class merger_mix : public merger_base |
| | 223 | { |
| | 224 | protected: |
| | 225 | //!Internal mixture of EF models |
| | 226 | MixEF Mix; |
| | 227 | //!Number of components in a mixture |
| | 228 | int Ncoms; |
| | 229 | //! coefficient of resampling |
| | 230 | double effss_coef; |
| | 231 | |
| | 232 | public: |
| | 233 | //!\name Constructors |
| | 234 | //!@{ |
| | 235 | merger_mix () {}; |
| | 236 | merger_mix (const Array<mpdf*> &S) {set_sources(S);}; |
| | 237 | //! Set sources and prepare all internal structures |
| | 238 | void set_sources (const Array<mpdf*> &S) { |
| | 239 | merger_base::set_sources(S); |
| | 240 | Nsources = S.length(); |
| | 241 | } |
| | 242 | //! Set internal parameters used in approximation |
| | 243 | void set_parameters (int Ncoms0=10, double effss_coef0 = 0.5) { |
| | 244 | Ncoms = Ncoms0; |
| | 245 | effss_coef = effss_coef0; |
| | 246 | } |
| | 247 | //!@} |
| | 248 | |
| | 249 | //! \name Mathematical operations |
| | 250 | //!@{ |
| | 251 | |
| | 252 | //!Merge values using mixture approximation |
| | 253 | void merge (); |
| | 254 | |
| | 255 | //! sample from the approximating mixture |
| | 256 | vec sample () const { return Mix.posterior().sample();} |
| | 257 | //! loglikelihood computed on mixture models |
| | 258 | double evallog (const vec &dt) const { |
| | 259 | vec dtf = ones (dt.length() + 1); |
| | 260 | dtf.set_subvector (0, dt); |
| | 261 | return Mix.logpred (dtf); |
| | 262 | } |
| | 263 | //!@} |
| | 264 | |
| | 265 | //!\name Access functions |
| | 266 | //!@{ |