/************************************ Extended Kalman Filter Matrix operations V. Smidl, Z. Peroutka Rev. 28.10.2010 (ZP) 26.10.2010 Prvni verze (VS) 26.10.2010 Upravena chyba v Thorton_fast - spatne shiftovani o vypoctu SIGMA. 27.10.2010 Pokus o odstraneni problemu v Thorton_fast - potize dela omezovani (orezavani) varianci. 28.10.2010 Drobne upravy v kodu. *************************************/ #include "matrix_vs.h" #include #include "matrix.h" #include /* Matrix multiply Full matrix by upper diagonal matrix; */ void mmultAU(int16 *m1, int16 *up, int16 *result, unsigned int16 rows, unsigned int16 columns) { unsigned int16 i, j, k; int32 tmp_sum=0L; //in 15+qAU int16 *m2pom; int16 *m1pom=m1; int16 *respom=result; for (i=0; i>(15-qAU); m2pom+=columns; } // add the missing A(i,j) tmp_sum +=(int32)(*m1pom)<>15; tmp_sum=0; } m1pom+=(columns); } }; //same as mmultAU but different precision void mmultCU(int16 *m1, int16 *up, int16 *result, unsigned int16 rows, unsigned int16 columns) { unsigned int16 i, j, k; int32 tmp_sum=0L; //in 15+qAU int16 *m2pom; int16 *m1pom=m1; int16 *respom=result; for (i=0; i>(15-qCU); m2pom+=columns; } // add the missing A(i,j) tmp_sum +=(int32)(*m1pom)<>15; tmp_sum=0; } m1pom+=(columns); } }; void bierman_fast(int16 *difz, int16 *xp, int16 *U, int16 *D, int16 *R, unsigned int16 dimy, unsigned int16 dimx ) { int16 alpha; // in qD!! int16 beta,lambda; int16 b[5]; // ok even for 4-dim state // in qD!!! int16 *a; // in [0,1] -> q15 unsigned int16 iy,j,i; int16 *b_j,*b_i; int16 *a_j; int16 *D_j; int16 *U_ij; int16 *x_i; int16 Ucopy[16]; /* copy U for vector a */ int16 *Uc_i=Ucopy; int16 *U_i=U; for (i=0;i>15; alpha = (R[iy])>>(15-qD); //\alpha = R+vDv = R+a*b for (j=0,a_j=a,b_j=b,D_j=D; j>15; D[j] = ((int32)lambda**D_j)/alpha; if (*D_j==0) *D_j=1; for (i=0,b_i=b,U_ij=U+j; i>15; } } // no shift due to gamma for (i=0,x_i=xp,b_i=b; i qCU unsigned int16 iy,j,i; int16 *b_j,*b_i; int16 *a_j; int16 *D_j; int16 *U_ij; int16 *x_i; // int32 z_pom; // int16 z_pom_int16; int16 UC[16]; // in q15 /* copy U for vector a */ mmultCU(C,U,UC,dimy,dimx); a = UC; // iyth row of U for (iy=0; iy>15; alpha = (R[iy])>>(15-qD); //\alpha = R+vDv = R+a*b // R in q15, a in q15, b=q15 // gamma = (1<<15)/alpha; //(in q15) //min alpha = R[iy] = 164 //max gamma = 0.0061 => gamma_ref = q7 for (j=0,a_j=a,b_j=b,D_j=D; j>15; * D[j] = ((int32)beta**D_j)/alpha;*/ /*xx*/ lambda=alpha; alpha += ((int32)(*a_j)*(*b_j))>>15; D[j] = ((int32)lambda**D_j)/alpha; // z_pom_int16 = -((int32)(*a_j)<<15)/lambda; /*xx*/ if (*D_j==0) *D_j=1; for (i=0,b_i=b,U_ij=U+j; i>15; // puvodni reseni *U_ij -= ((int32)(*a_j)*(*b_i))/lambda; // pozadovane optimalni reseni *b_i += ((int32)beta*(*b_j))>>15; //printf("%d, %d, %d\n", ((int32)(*a_j)*(*b_i))/lambda, *b_i, beta); } } // no shift due to gamma for (i=0,x_i=xp,b_i=b; i>(qAU))*(*Dold_k); } sigma += (int32)(*(Q+i+irows))<<(qAU+qD-15); for (j=i+1, G_ik=G+irows+i+1; j>15)**(Q+j+j*rows))>>(30-qAU-qD); } if (sigma>((int32)1<<(qAU+15))) { *D_i = 32767; // *(Dold+i)-=*(Q+i+irows); } else { *D_i=sigma>>qAU; } if (*D_i==0) *D_i=1; for (j=0;j>qAU)**Dold_k); } for (k=i,G_ik=G+irows+i,G_jk=G+jrows+i,Q_kk=Q+k*rows+k; k>15)**Q_kk)>>(30-qD-qAU); } z=(sigma/(*D_i))<<(15-qAU); // shift to q15 if (z>32767) z=32767; if (z<-32768) z=-32768; U_ji=U+jrows+i; *U_ji = (int16)z; for (k=0,PSIU_ik=PSIU+irows,PSIU_jk=PSIU+jrows; k>15; } for (k=0,G_jk=G+jrows,G_ik=G+irows; k>15; } } if (i==0) return; } } void householder(int16 *Ch /*= int16 *PSICh*/, int16 *Q, unsigned int16 dimx) { int16 k,j,i; int16 alpha,beta; int32 sigma; // 2*qCh int32 tmp_long; int16 B[25];//Q in qCh int16 w[5]; int16 v[5]; // copy Q to B for (i=0;i>(15-qCh); } for (k=dimx-1; k>=0; k--) { sigma=0; for (j=0;j>(qCh+1); // alpha = sigma /2; if (alpha==0) alpha =1; for (i=0;i<=k;i++) { sigma=0; for (j=0;j> 15; // navrat do Q15 // if (sigma>32767)sigma=32767; for (j=0;j32767) { B[i*dimx+j]=32767; } else { if (tmp_long<-32767){ B[i*dimx+j]=-32767; } else { B[i*dimx+j]=tmp_long; } } }; for (j=0;j<=k;j++) { tmp_long=Ch[i*dimx+j]-(sigma*v[j])/alpha; if (tmp_long>32767) { Ch[i*dimx+j]=32767; } else { if (tmp_long<-32767){ Ch[i*dimx+j]=-32767; } else { Ch[i*dimx+j]=tmp_long; } } } } } } void carlson(int16 *difz, int16 *xp, int16 *Ch, int16 *R, unsigned int16 dimy, unsigned int16 dimx ) { int16 alpha,beta,gamma; int16 delta, eta,epsilon,zeta,sigma,tau; int16 i,j,iy; int16 w[5]; int32 tmp_long; for (iy=0; iy>15; // double ab=(double)alpha*beta/32768./32768.; // double s_ab=sqrt(ab); gamma=(int16)(sqrt((double)((int32)alpha*beta))+0.5); // predelat v DSP //gamma = round(s_ab*(1<<15)); // eta=((long)beta<<15) / gamma; //zeta=(long(sigma)<<15)/ gamma; w[j]=0; for (i=0;i<=j;i++) { tau=Ch[i*dimx+j]; tmp_long=((int32)beta*Ch[i*dimx+j] -(int32)sigma*w[i])/gamma; /* if (tmp_long>32767) { Ch[i*dimx+j]=32767; } else { if (tmp_long<-32767){ Ch[i*dimx+j]=-32767; } else { */ Ch[i*dimx+j]=tmp_long; /* } } */ w[i]+=((int32)tau*sigma)>>15; } } //epsilon=(long(difz)<<15) / (alpha); // q15*q13/q13 = q15 for (i=0;i>(15-qCh); } for (i=dimx-1; i>=0; i--){ for (j=0; j0){ rho=sqrt(double(tmp_long)); s=(int32(A[i*dimx+j])<<15)/rho; c=(int32(Ch[i*dimx+i])<<15)/rho; for (k=0;k<=i; k++){ tau=(int32(c)*A[k*dimx+j]-int32(s)*Ch[k*dimx+i])>>15; Ch[k*dimx +i]=(int32(s)*A[k*dimx+j]+int32(c)*Ch[k*dimx+i])>>15; A[k*dimx +j]=tau; } } } for (j=0; j0){ rho=sqrt((double)(tmp_long)); s=(int32(Ch[i*dimx+j])<<15)/rho; c=(int32(Ch[i*dimx+i])<<15)/rho; for (k=0; k<=i; k++){ tau=(int32(c)*Ch[k*dimx+j]-int32(s)*Ch[k*dimx+i])>>15; Ch[k*dimx+i]=(int32(s)*Ch[k*dimx+j]+int32(c)*Ch[k*dimx+i])>>15; Ch[k*dimx+j]=tau; } } } } } /* Matrix multiply Full matrix by upper diagonal matrix; */ void mmultACh(int16 *m1, int16 *up, int16 *result, unsigned int16 rows, unsigned int16 columns) { unsigned int16 i, j, k; int32 tmp_sum=0L; int16 *m2pom; int16 *m1pom=m1; int16 *respom=result; for (i=0; i(1<<29)-1) *respom++=(1<<14); else */ *respom++=(tmp_sum+(1<<14))>>15; tmp_sum=0; } m1pom+=(columns); } } void givens_fast(int16 *Ch, int16 *Q, unsigned int16 dimx) { int16 i,j,k; int16 rho,s,c,tau; int32 tmp_long; //c,s in q14!! int16 A[25];//beware int16 *A_ij, *Q_i, *Ch_ki, *Ch_kj, *Ch_ii, *Ch_ij, *A_kj; A_ij=A; Q_i=Q; // copy Q to A for (i=0;i>(15-qCh); *A_ij++=(*Q_i++)>>(15-qCh); } for (i=dimx-1; i>=0; i--) { Ch_ii=Ch+i*dimx+i; A_ij=A+i*dimx; for (j=0; j>14; tmp_long=(int32)s**A_kj+(int32)c**Ch_ki; if (tmp_long>(1<<29)) //q14 + q14 *Ch_ki = (1<<15)-1; else *Ch_ki=tmp_long>>14; *A_kj=tau; Ch_ki+=dimx; A_kj+=dimx; } } A_ij++; } Ch_ij = Ch+i*dimx; for (j=0; j0) { tmp_long=(int32)*Ch_ii**Ch_ii+(int32)*Ch_ij**Ch_ij; // rho=qsqrt(tmp_long); // verze pro DSP if (tmp_long>(1<<30)-1) rho=(1<<15)-1; else rho=(int16)(sqrt((double)tmp_long)); // verze pro PC s=(((int32)*Ch_ij)<<14)/rho; c=(((int32)*Ch_ii)<<14)/rho; Ch_kj = Ch + j; Ch_ki = Ch + i; for (k=0; k<=i; k++) { tau=((int32)c**Ch_kj-(int32)s**Ch_ki)>>14; tmp_long =((int32)s**Ch_kj+(int32)c**Ch_ki); if (tmp_long>(1<<29)) *Ch_ki = (1<<15)-1; else *Ch_ki=tmp_long>>14; *Ch_kj=tau; Ch_kj += dimx; Ch_ki += dimx; } } Ch_ij++; } } } void carlson_fast(int16 *difz, int16 *xp, int16 *Ch, int16 *R, unsigned int16 dimy, unsigned int16 dimx ) { int16 alpha,beta,gamma; int16 delta, eta,epsilon,zeta,sigma,tau; int16 i,j,iy; int16 w[5]; int32 tmp_long; int16 *Ch_ij, *w_i, *x_i; for (iy=0; iy>15; alpha=(((int32)alpha<<15)+(int32)sigma*sigma)>>15; // vyssi presnost // gamma= qsqrt(((long)alpha*beta)); // verze pro DSP gamma= (int16)(sqrt((double)((int32)alpha*beta))); // verze pro PC w[j]=0; Ch_ij=Ch+j; w_i=w; for (i=0;i<=j;i++) { // tau=Ch[i*dimx+j]; tau=*Ch_ij; // tmp_long=((long)beta*Ch[i*dimx+j] -(long)sigma*w[i])/gamma; tmp_long=((int32)beta**Ch_ij -(int32)sigma**w_i)/gamma; if (tmp_long>32767) tmp_long=32767; if (tmp_long<-32768) tmp_long=-32768; *Ch_ij=tmp_long; // w_i+=((long)tau*sigma)>>15; *w_i=(((int32)*w_i<<15)+(int32)tau*sigma)>>15; w_i++; Ch_ij+=dimx; } } x_i=xp; w_i=w; for (i=0;i>15; //sigma in qCh Ch_ij+=dimx; C_yi++; } //sigma=Ch[iy*dimx+j]; beta=alpha; // in q15 // alpha+=((long)sigma*sigma)>>15; tmp_long=((int32)alpha<<15)+(((int32)sigma*sigma)<<(30-2*qCh)); alpha=(tmp_long+(1<<14))>>15; // vyssi presnost // gamma= qsqrt(((long)alpha*beta)); // verze pro DSP gamma= (int16)(sqrt((double)((int32)alpha*beta))); // verze pro PC // in q15 w[j]=0; Ch_ij=Ch+j; w_i=w; // in q15 for (i=0;i<=j;i++) { // tau=Ch[i*dimx+j]; tau=*Ch_ij; // tmp_long=((long)beta*Ch[i*dimx+j] -(long)sigma*w[i])/gamma; tmp_long=((int32)beta**Ch_ij -(int32)sigma**w_i)/gamma; // in qCh /* if (tmp_long>32767) tmp_long=32767; if (tmp_long<-32768) tmp_long=-32768;*/ *Ch_ij=tmp_long; // w_i+=((long)tau*sigma)>>15; tmp_long = ((int32)*w_i<<15)+((int32)tau*sigma<<(30-2*qCh)); *w_i=(tmp_long+(1<<14))>>15; w_i++; Ch_ij+=dimx; } } x_i=xp; w_i=w; for (i=0;i