/************************************
        Extended Kalman Filter
        Matrix operations

        V. Smidl, Z. Peroutka

Rev. 28.10.2010   (ZP)

26.10.2010      Prvni verze (VS)

26.10.2010      Upravena chyba v Thorton_fast - spatne shiftovani o vypoctu SIGMA.
27.10.2010      Pokus o odstraneni problemu v Thorton_fast - potize dela omezovani (orezavani) varianci.
28.10.2010      Drobne upravy v kodu.

*************************************/

#include <stdio.h>
#include "matrix.h"

#define  deb(x)    printf(" %s %ld \n",#x, x)

/* Matrix multiply Full matrix by upper diagonal matrix; */
void mmultAU(int *m1, int *up, int *result, unsigned int rows, unsigned int columns) 
{
    unsigned int i, j, k;
    long tmp_sum=0L; //in 15+qAU
    int *m2pom;
    int *m1pom=m1;
    int *respom=result;

    for (i=0; i<rows; i++) //rows of result
    {
        for (j=0; j<columns; j++) //columns of result
	{
            m2pom=up+j;//??

            for (k=0; k<j; k++) //inner loop up to "j" - U(j,j)==1;
            {
                tmp_sum = tmp_sum + (((long)(*(m1pom++))**m2pom)>>(15-qAU));
                m2pom+=columns;
            }
            // add the missing A(i,j)
            tmp_sum = tmp_sum + ((long)(*m1pom)<<qAU); // no need to shift
            m1pom-=(j); // shift back to first element

            *respom++=(tmp_sum>>15);

            tmp_sum=0;
        }
        m1pom+=(columns);
    }
};


void bierman_fast(int *difz, int *xp, int *U, int *D, int *R, unsigned int dimy, unsigned int dimx )
{
    int alpha;
    int beta,lambda;
    int b[5]; // ok even for 4-dim state
    int *a; // in [0,1] -> q15
    unsigned int iy,j,i;

    int *b_j,*b_i;
    int *a_j;
    int *D_j;
    int *U_ij;
    int *x_i;

    a = U; // iyth row of U
    for (iy=0; iy<dimy; iy++)
    {
        // a is a row
        a_j=a;
        b_j=b;
        D_j=D;

        for (j=0; j<dimx; j++)
        {
            *b_j=((long)(*D_j)*(*a_j))>>15;
//            deb(*b_j);
            b_j++;
            D_j++;
            a_j++;
        }

        alpha = R[iy]; //\alpha = R+vDv = R+a*b
        // R in q15, a in q15, b=q15

        a_j=a;
        b_j=b;
        D_j=D;

        for (j=0; j<dimx; j++)
        {
            lambda=alpha;
            alpha = alpha + (((long)(*a_j)*(*b_j))>>15);

//            deb(alpha);

            *D_j = ((long)lambda**D_j)/alpha;

            if (*D_j==0) *D_j=1;

            b_i=b;
            U_ij=U+j;

            for (i=0; i<j; i++)
            {
                beta   = *U_ij;
//                *U_ij +=  ((long)lambda*(*b_i))>>15;		// puvodni reseni
                *U_ij = *U_ij - ((long)(*a_j)*(*b_i))/lambda;         // pozadovane optimalni reseni
                *b_i  = *b_i + (((long)beta*(*b_j))>>15);

//                deb(*U_ij);

                b_i++;
                U_ij=U_ij+dimx;
            }

            a_j++;
            b_j++;
            D_j++;
        }

        x_i=xp;
        b_i=b;

        for (i=0; i<dimx; i++)
        {
            *x_i = *x_i + ((long)difz[iy]**b_i)/alpha; // multiply by unscaled Kalman gain

//            deb(*x_i);

            x_i++;
            b_i++;
        }

        a=a+dimx;
    }
}

/**/

// Thorton procedure - Kalman predictive variance in UD
void thorton_fast(int *U, int *D, int *PSIU, int *Q, int *G, int *Dold, unsigned int rows) {
    unsigned int i,j,k;
    // copy D to Dold
    int *Dold_i,*Dold_k;
    int *D_i;
    int *PSIU_ij,*PSIU_ik,*PSIU_jk;
    int *Q_jj,*Q_ii,*Q_kk;
    int *U_ji;
    int *G_ik,*G_jk;
    int irows,jrows;
    long sigma; // in qAU+15!!
    long z;
    long z_tmp;

    Dold_i=Dold;
    D_i=D;

    for (i=0;i<rows;i++)
    {
        *Dold_i=*D_i;
        Dold_i++;
        D_i++;
    }

    // initialize G = eye()
    G_ik= G;
    *G_ik++=32767;
    for (i=0;i<rows-1;i++)
    {
        // clean elem before diag
        for (k=0; k<rows; k++)
        {
            *G_ik++=0;
        }
        *G_ik++=32767;
    }
    // eye created

    Dold_i=Dold+rows-1;
    D_i=D+rows-1;

    for (i=rows-1; 1; i--)
    { // stop if i==0 at the END!
        irows=i*rows;
        sigma = 0;

        PSIU_ik=PSIU+irows;
        Dold_k=Dold;

        for (k=0;k<rows; k++)
        {
            sigma = sigma + (((long)(*PSIU_ik)**PSIU_ik)>>(qAU))*(*Dold_k);
//            deb(sigma);

            PSIU_ik++;
            Dold_k++;
        }

	sigma += (long)*(Q+i+irows)<<qAU;

        G_ik=G+irows+i+1;

        for (j=i+1; j<rows; j++)
        {
            sigma = sigma + (((long)(*G_ik)**G_ik)>>16)**(Q+j+j*rows);
            G_ik++;
        }

        if (sigma>((long)1<<(qAU+15)))
        {
	   *D_i = 32767;
//	   *(Dold+i)-=*(Q+i+irows);
	} else
        {
           *D_i=(sigma>>qAU);
	}

        if (*D_i==0) *D_i=1;

        for (j=0;j<i;j++)
        {
            jrows = j*rows;

            sigma =0;
            PSIU_ik=PSIU+irows;
            PSIU_jk=PSIU+jrows;
            Dold_k=Dold;

            for (k=0;k<rows; k++)
            {

                sigma = sigma + ((((long)(*PSIU_ik)**PSIU_jk)>>qAU)**Dold_k);
                PSIU_ik++;
                PSIU_jk++;
                Dold_k++;
            }

            G_ik=G+irows+i;
            G_jk=G+jrows+i;
            Q_kk=Q+i*rows+i;

            for (k=i;k<rows;k++)
            {
                sigma = sigma + ((((long)*G_ik)**G_jk)>>16)**Q_kk;
                G_ik++;
                G_jk++;
                Q_kk=Q_kk+rows+1;
            }

            z=((sigma/(*D_i))<<(15-qAU)); // shift to q15
            if (z>32767) z=32767;
            if (z<-32768) z=-32768;

            U_ji=U+jrows+i;
            *U_ji = z;


            PSIU_ik=PSIU+irows;
            PSIU_jk=PSIU+jrows;

            for (k=0; k<rows;k++)
            {
                *PSIU_jk = *PSIU_jk - (((long)*U_ji**PSIU_ik)>>15);
                
//                deb(*PSIU_jk);

                PSIU_ik++;
                PSIU_jk++;
            }

            G_jk=G+jrows;
            G_ik=G+irows;

            for (k=0;k<rows;k++)
            {
                *G_jk = *G_jk - (((long)*U_ji**G_ik)>>15);
//                deb(*G_jk);

                 G_jk++;
                 G_ik++;
            }
        }

        Dold_i--;
        D_i--;
        if (i==0) return;
    }
}
