root/library/bdm/estim/arx_straux.cpp @ 1064

Revision 1064, 16.3 kB (checked in by mido, 14 years ago)

astyle applied all over the library

  • Property svn:eol-style set to native
Line 
1
2#include "arx.h"
3
4
5namespace bdm {
6
7
8
9
10void str_bitset ( bvec &out, ivec ns, int nbits ) {
11
12    for ( int i = 0; i < ns.length(); i++ ) {
13        out ( ns ( i ) - 2 ) = 1;
14    }
15}
16
17
18double seloglik1 ( str_aux in ) {
19// This is the loglikelihood (non-constant part) - this should be used in
20// frequent computation
21    int len = length ( in.d );
22    int p1  = in.posit1 - 1;
23
24    double i1 = -0.5 * in.nu * log ( in.d ( p1 ) ) - 0.5 * sum ( log ( in.d.right ( len - p1 - 1 ) ) );
25    double i0 = -0.5 * in.nu0 * log ( in.d0 ( p1 ) ) - 0.5 * sum ( log ( in.d0.right ( len - p1 - 1 ) ) );
26    return i1 - i0;
27//DEBUGGing print:
28//fprintf('SELOGLIK1: str=%s loglik=%g\n', strPrintstr(in), l);*/
29}
30
31
32void sedydr ( mat &r, mat &f, double &Dr, double &Df, int R/*,int jl,int jh ,mat &rout, mat &fout, double &Drout, double &Dfoutint &kr*/ ) {
33    /*SEDYDR dyadic reduction, performs transformation of sum of 2 dyads
34    %
35    % [rout, fout, Drout, Dfout, kr] = sedydr(r,f,Dr,Df,R,jl,jh);
36    % [rout, fout, Drout, Dfout] = sedydr(r,f,Dr,Df,R);
37    %
38    % Description: dyadic reduction, performs transformation of sum of
39    %  2 dyads r*Dr*r'+ f*Df*f' so that the element of r pointed by R is zeroed
40    %
41    %     r    : column vector of reduced dyad
42    %     f    : column vector of reducing dyad
43    %     Dr   : scalar with weight of reduced dyad
44    %     Df   : scalar with weight of reducing dyad
45    %     R    : scalar number giving 1 based index to the element of r,
46    %            which is to be reduced to
47    %            zero; the corresponding element of f is assumed to be 1.
48    %     jl   : lower index of the range within which the dyads are
49    %            modified (can be omitted, then everything is updated)
50    %     jh   : upper index of the range within which the dyads are
51    %            modified (can be omitted then everything is updated)
52    %     rout,fout,Drout,dfout : resulting two dyads
53    %     kr   : coefficient used in the transformation of r
54    %            rnew = r + kr*f
55    %
56    % Description: dyadic reduction, performs transformation of sum of
57    %            2 dyads r*Dr*r'+ f*Df*f' so that the element of r indexed by R is zeroed
58    % Remark1: Constant mzero means machine zero and should be modified
59    %           according to the precision of particular machine
60    % Remark2: jl and jh are, in fact, obsolete. It takes longer time to
61    %           compute them compared to plain version. The reason is that we
62    %           are doing vector operations in m-file. Other reason is that
63    %           we need to copy whole vector anyway. It can save half of time for
64    %           c-file, if you use it correctly. (please do tests)
65    %
66    % Note: naming:
67    %       se = structure estimation
68    %       dydr = dyadic reduction
69    %
70    % Original Fortran design: V. Peterka 17-7-89
71    % Modified for c-language: probably R. Kulhavy
72    % Modified for m-language: L. Tesar 2/2003
73    % Updated: Feb 2003
74    % Project: post-ProDaCTool
75    % Reference: none*/
76
77    /*if nargin<6;
78       update_whole=1;
79    else
80       update_whole=0;
81    end;*/
82
83    double mzero = 1e-32;
84
85    if ( Dr < mzero ) {
86        Dr = 0;
87    }
88
89    double r0   = r ( R, 0 );
90    double kD   = Df;
91    double kr   = r0 * Dr;
92
93
94    Df   = kD + r0 * kr;
95
96    if ( Df > mzero ) {
97        kD = kD / Df;
98        kr = kr / Df;
99    } else {
100        kD = 1;
101        kr = 0;
102    }
103
104    Dr = Dr * kD;
105
106// Try to uncomment marked stuff (*) if in numerical problems, but I don't
107// think it can make any difference for normal healthy floating-point unit
108//if update_whole;
109    r = r - r0 * f;
110//   rout(R) = 0;   // * could be needed for some nonsense cases(or numeric reasons?), normally not
111    f = f + kr * r;
112//   fout(R) = 1;   // * could be needed for some nonsense cases(or numeric reasons?), normally not
113    /*else;
114       rout = r;
115       fout = f;
116       rout(jl:jh) = r(jl:jh) - r0 * f(jl:jh);
117       rout(R) = 0;
118       fout(jl:jh) = f(jl:jh) + kr * rout(jl:jh);
119    end;*/
120}
121
122
123
124/*mat*/ void seswapudl ( mat &L, vec &d , int i/*, vec &dout*/ ) {
125    /*%SESWAPUDL swaps information matrix in decomposition V=L^T diag(d) L
126    %
127    %   [Lout, dout] = seswapudl(L,d,i);
128    %
129    % L : lower triangular matrix with 1's on diagonal of the decomposistion
130    % d : diagonal vector of diagonal matrix of the decomposition
131    % i : index of line to be swapped with the next one
132    % Lout : output lower triangular matrix
133    % dout : output diagional vector of diagonal matrix D
134    %
135    % Description:
136    %  Lout' * diag(dout) * Lout = P(i,i+1) * L' * diag(d) * L * P(i,i+1);
137    %
138    %  Where permutation matrix P(i,j) permutates columns if applied from the
139    %  right and line if applied from the left.
140    %
141    % Note: naming:
142    %       se = structure estimation
143    %       lite = light, simple
144    %       udl = U*D*L, or more precisely, L'*D*L, also called as ld
145    %
146    % Design  : L. Tesar
147    % Updated : Feb 2003
148    % Project : post-ProDaCTool
149    % Reference: sedydr*/
150
151    int j = i + 1;
152
153    double pomd = d ( i );
154    d ( i ) = d ( j );
155    d ( j ) = pomd;
156
157    L.swap_rows ( i, j );
158    L.swap_cols ( i, j );
159
160
161//% We must be working with LINES of matrix L !
162
163    mat r = L.get_row ( i );
164    r.transpose();
165    mat f = L.get_row ( j );
166    f.transpose();
167
168    double Dr = d ( i );
169    double Df = d ( j );
170
171    sedydr ( r, f, Dr, Df, j );
172
173    double r0 = r ( i, 0 );
174    Dr = Dr * r0 * r0;
175    r  = r / r0;
176
177    mat pom_mat = r.transpose();
178    L.set_row ( i, pom_mat.get_row ( 0 ) );
179    pom_mat = f.transpose();
180    L.set_row ( j, pom_mat.get_row ( 0 ) );
181
182    d ( i )   = Dr;
183    d ( j )   = Df;
184
185    L ( i, i ) = 1;
186    L ( j, j ) = 1;
187}
188
189
190void str_bitres ( bvec &out, ivec ns, int nbits ) {
191
192    for ( int i = 0; i < ns.length(); i++ ) {
193        out ( ns ( i ) - 2 ) = 0;
194    }
195}
196
197str_aux sestrremove ( str_aux in, ivec removed_elements ) {
198//% Removes elements from regressor
199
200    int n_strL = length ( in.strL );
201    str_aux out = in;
202
203    for ( int i = 0; i < removed_elements.length(); i++ ) {
204
205        int f = removed_elements ( i );
206        int posit1 = ( find ( out.strL == 1 ) ) ( 0 );
207        int positf = ( find ( out.strL == f ) ) ( 0 );
208        for ( int g = positf - 1; g > posit1 - 1; g-- ) {
209            //% BEGIN: We are swapping g and g+1 NOW!!!!
210            seswapudl ( out.L, out.d, g );
211            seswapudl ( out.L0, out.d0, g );
212
213            int pom_strL = out.strL ( g );
214            out.strL ( g ) = out.strL ( g + 1 );
215            out.strL ( g + 1 ) = pom_strL;
216            //% END
217        }
218
219
220    }
221    out.posit1 = ( find ( out.strL == 1 ) ) ( 0 ) + 1;
222    out.strRgr = out.strL.right ( n_strL - out.posit1 );
223    out.strMis = out.strL.left ( out.posit1 - 1 );
224    str_bitres ( out.bitstr, removed_elements, out.nbits );
225    out.loglik = seloglik1 ( out );
226
227    return out;
228}
229
230
231ivec setdiff ( ivec a, ivec b ) {
232    ivec pos;
233
234    for ( int i = 0; i < b.length(); i++ ) {
235        pos = find ( a == b ( i ) );
236        for ( int j = 0; j < pos.length(); j++ ) {
237            a.del ( pos ( j ) - j );
238        }
239    }
240    return a;
241}
242
243
244
245
246void add_new ( Array<str_aux> &global_best, str_aux newone, int nbest ) {
247// Eventually add to global best, but do not go over nbest values
248// Also avoids repeating things, which makes this function awfully slow
249
250    int  addit, i = 0;
251    if ( global_best.length() >= nbest ) {
252        //logliks = [global_best.loglik];
253
254        for ( int j = 1; j < global_best.length(); j++ ) {
255            if ( global_best ( j ).loglik < global_best ( i ).loglik ) {
256                i = j;
257            }
258        }
259        if ( global_best ( i ).loglik < newone.loglik ) {
260            //         if ~any(logliks == new.loglik);
261            addit = 1;
262
263
264            for ( int j = 0; j < global_best.length(); j++ ) {
265                if ( newone.bitstr == global_best ( j ).bitstr ) {
266                    addit = 0;
267                    break;
268                }
269            }
270            if ( addit ) {
271                global_best ( i ) = newone;
272                // DEBUGging print:
273                // fprintf('ADDED structure, add_new: %s, loglik=%g\n', strPrintstr(new), new.loglik);
274            }
275        }
276    } else
277        global_best = concat ( global_best, newone );
278
279}
280
281
282
283
284
285
286str_aux sestrinsert ( str_aux in, ivec inserted_elements ) {
287// Moves elements into regressor
288    int n_strL = in.strL.length();
289    str_aux out = in;
290    for ( int j = 0; j < inserted_elements.length(); j++ ) {
291        int f = inserted_elements ( j );
292        int posit1 = ( find ( out.strL == 1 ) ) ( 0 );
293        int positf = ( find ( out.strL == f ) ) ( 0 );
294        for ( int g = positf; g <= posit1 - 1; g++ ) {
295
296            // BEGIN: We are swapping g and g+1 NOW!!!!
297            seswapudl ( out.L,  out.d,  g );
298            seswapudl ( out.L0, out.d0, g );
299
300            int pom_strL = out.strL ( g );
301            out.strL ( g ) = out.strL ( g + 1 );
302            out.strL ( g + 1 ) = pom_strL;
303
304            // END
305        }
306    }
307
308    out.posit1 = ( find ( out.strL == 1 ) ) ( 0 ) + 1;
309    out.strRgr = out.strL.right ( n_strL - out.posit1 );
310    out.strMis = out.strL.left ( out.posit1 - 1 );
311    str_bitset ( out.bitstr, inserted_elements, out.nbits );
312    out.loglik = seloglik1 ( out );
313
314    return out;
315}
316
317double seloglik2 ( str_aux in ) {
318// This is the loglikelihood (constant part) - this should be added to
319// everything at the end. It needs some computation, so it is useless to
320// make it for all the stuff
321    double logpi = log ( pi );
322
323    double i1 = lgamma ( in.nu / 2 ) - 0.5 * in.nu * logpi;
324    double i0 = lgamma ( in.nu0 / 2 ) - 0.5 * in.nu0 * logpi;
325    return i1 - i0;
326}
327
328
329
330
331ivec straux1 ( ldmat Ld, double nu, ldmat Ld0, double nu0, ivec belief, int nbest, int max_nrep, double lambda, int order_k, Array<str_aux> &rgrsout/*, stat &statistics*/ ) {
332// see utia_legacy/ticket_12/ implementation and str_test.m
333
334    const mat &L = Ld._L();
335    const vec &d = Ld._D();
336
337    const mat &L0 = Ld0._L();
338    const vec &d0 = Ld0._D();
339
340    int n_data = d.length();
341
342    ivec belief_out = find ( belief == 4 ) + 2; // we are avoiding to put this into regressor
343    ivec belief_in  = find ( belief == 1 ) + 2; // we are instantly keeping this in regressor
344
345    str_aux full;
346
347    full.d0  = d0;
348    full.nu0 = nu0;
349    full.L0  = L0;
350    full.L   = L;
351    full.d   = d;
352    full.nu0 = nu0;
353    full.nu  = nu;
354    full.strL = linspace ( 1, n_data );
355    full.strRgr = linspace ( 2, n_data );
356    full.strMis = ivec ( 0 );
357    full.posit1 = 1;
358    full.bitstr.set_size ( n_data - 1 );
359    full.bitstr.clear();
360    str_bitset ( full.bitstr, full.strRgr, full.nbits );
361    full.loglik = seloglik1 ( full );    // % loglikelihood
362
363
364//% construct full and empty structure
365    full = sestrremove ( full, belief_out );
366    str_aux empty = sestrremove ( full, setdiff ( full.strRgr, belief_in ) );
367
368//% stopping rule calculation:
369
370    bmat local_max ( 0, 0 );
371    int to, muto = 0;
372
373//% statistics:
374//double cputime0 = cputime;
375//if nargout>=3;
376
377    CPU_Timer timer;
378    timer.start();
379    ivec mutos ( max_nrep + 2 );
380    vec maxmutos ( max_nrep + 2 );
381    mutos.zeros();
382    maxmutos.zeros();
383
384
385//end;
386//% ----------------------
387
388//% For stopping-rule calculation
389//%so       = 2^(n_data -1-length(belief_in)- length(belief_out)); % do we use this ?
390//% ----------------------
391
392    ivec all_str = linspace ( 1, n_data );
393    Array<str_aux> global_best ( 1 );
394    global_best ( 0 ) = full;
395
396
397//% MAIN LOOP is here.
398
399    str_aux   best;
400    for ( int n_start = -1; n_start <= max_nrep; n_start++ ) {
401        str_aux  last, best;
402
403        to = n_start + 2;
404
405        if ( n_start == -1 ) {
406            //% start from the full structure
407            last = full;
408        } else {
409            if ( n_start == 0 )
410                //% start from the empty structure
411                last = empty;
412
413            else {
414                //% start from random structure
415
416
417
418                ivec last_str = find ( to_bvec ( concat ( 0, floor_i ( 2 * randun ( n_data - 1 ) ) ) ) ) + 1;// this creates random vector consisting of indexes, and sorted
419                last = sestrremove ( full, setdiff ( all_str, concat ( concat ( 1 , last_str ), empty.strRgr ) ) );
420
421            }
422        }
423        //% DEBUGging print:
424        //%fprintf('STRUCTURE generated            in loop %2i was %s\n', n_start, strPrintstr(last));
425
426        //% The loop is repeated until likelihood stops growing (break condition
427        //% used at the end;
428
429
430        while ( 1 ) {
431            //% This structure is going to hold the best elements
432            best = last;
433            //% Nesting by removing elements (enpoorment)
434            ivec  removed_items = setdiff ( last.strRgr, belief_in );
435
436            ivec removed_item;
437            str_aux newone;
438
439            for ( int i = 0; i < removed_items.length(); i++ ) {
440                removed_item = vec_1 ( removed_items ( i ) );
441                newone = sestrremove ( last, removed_item );
442                if ( nbest > 1 ) {
443                    add_new ( global_best, newone, nbest );
444                }
445                if ( newone.loglik > best.loglik ) {
446                    best = newone;
447                }
448            }
449            //% Nesting by adding elements (enrichment)
450            ivec added_items = setdiff ( last.strMis, belief_out );
451            ivec added_item;
452
453            for ( int j = 0; j < added_items.length(); j++ ) {
454                added_item = vec_1 ( added_items ( j ) );
455                newone = sestrinsert ( last, added_item );
456                if ( nbest > 1 ) {
457                    add_new ( global_best, newone, nbest );
458                }
459                if ( newone.loglik > best.loglik ) {
460                    best = newone;
461                }
462            }
463
464
465            //% Break condition if likelihood does not change.
466            if ( best.loglik <= last.loglik )
467                break;
468            else
469                //% Making best structure last structure.
470                last = best;
471        }
472
473
474        // % DEBUGging print:
475        //%fprintf('STRUCTURE found (local maxima) in loop %2i was %s randun_seed=%11lu randun_counter=%4lu\n', n_start, strPrintstr(best), randn('seed'), RANDUN_COUNTER);
476
477        //% Collecting of the best structure in case we don't need the second parameter
478        if ( nbest <= 1 ) {
479            if ( best.loglik > global_best ( 0 ).loglik ) {
480                global_best = best;
481            }
482        }
483
484        //% uniqueness of the structure found
485        int append = 1;
486
487        for ( int j = 0; j  < local_max.rows() ; j++ ) {
488            if ( best.bitstr == local_max.get_row ( j ) ) {
489                append = 0;
490                break;
491            }
492        }
493
494
495        if ( append ) {
496            local_max.append_row ( best.bitstr );
497            muto = muto + 1;
498        }
499
500        //% stopping rule:
501        double maxmuto = ( to - order_k - 1 ) / lambda - to + 1;
502        if ( to > 2 ) {
503            if ( maxmuto >= muto ) {
504                //% fprintf('*');
505                break;
506            }
507        }
508
509        // do statistics if necessary:
510        //if (nargout>=3){
511        mutos ( to - 1 )    = muto;
512        maxmutos ( to - 1 ) = maxmuto;
513        //}
514    }
515
516//% Aftermath: The best structure was in: global_best
517
518//% Updating loglikelihoods: we have to add the constant stuff
519
520    for ( int f = 0 ; f < global_best.length(); f++ ) {
521        global_best ( f ).loglik = global_best ( f ).loglik + seloglik2 ( global_best ( f ) );
522    }
523
524//% Making first output parameter:
525
526    int max_i = 0;
527    for ( int j = 1; j < global_best.length(); j++ )
528        if ( global_best ( max_i ).loglik < ( global_best ( j ).loglik ) ) max_i = j;
529
530    best = global_best ( max_i );
531
532//% Making the second output parameter
533
534    vec logliks ( global_best.length() );
535    for ( int j = 0; j < logliks.length(); j++ )
536        logliks ( j ) = global_best ( j ).loglik;
537
538    ivec i = sort_index ( logliks );
539    rgrsout.set_length ( global_best.length() );
540
541    for ( int j = global_best.length() - 1; j >= 0; j-- )
542        rgrsout ( j ) = global_best ( i ( j ) );
543
544//if (nargout>=3);
545
546    str_statistics statistics;
547
548    statistics.allstrs = 2 ^ ( n_data - 1 - length ( belief_in ) - length ( belief_out ) );
549    statistics.nrand   = to - 2;
550    statistics.unique  = muto;
551    statistics.to  = to;
552    statistics.cputime_seconds = timer.get_time();
553    statistics.itemspeed       = statistics.to / statistics.cputime_seconds;
554    statistics.muto = muto;
555    statistics.mutos = mutos;
556    statistics.maxmutos = maxmutos;
557//end;
558
559    return best.strRgr;
560
561}
562
563
564
565
566
567
568}
Note: See TracBrowser for help on using the browser.