| 50 | }; |
| 51 | |
| 52 | //same as mmultAU but different precision |
| 53 | void mmultCU(int16 *m1, int16 *up, int16 *result, unsigned int16 rows, unsigned int16 columns) { |
| 54 | unsigned int16 i, j, k; |
| 55 | int32 tmp_sum=0L; //in 15+qAU |
| 56 | int16 *m2pom; |
| 57 | int16 *m1pom=m1; |
| 58 | int16 *respom=result; |
| 59 | |
| 60 | for (i=0; i<rows; i++) //rows of result |
| 61 | { |
| 62 | for (j=0; j<columns; j++) //columns of result |
| 63 | { |
| 64 | m2pom=up+j;//?? |
| 65 | |
| 66 | for (k=0; k<j; k++) //inner loop up to "j" - U(j,j)==1; |
| 67 | { |
| 68 | tmp_sum+=((int32)(*(m1pom++))**m2pom)>>(15-qCU); |
| 69 | m2pom+=columns; |
| 70 | } |
| 71 | // add the missing A(i,j) |
| 72 | tmp_sum +=(int32)(*m1pom)<<qCU; // no need to shift |
| 73 | m1pom-=(j); // shift back to first element |
| 74 | |
| 75 | *respom++=tmp_sum>>15; |
| 76 | |
| 77 | tmp_sum=0; |
| 78 | } |
| 79 | m1pom+=(columns); |
| 80 | } |
| 155 | void bierman_fastC(int16 *difz, int16 *xp, int16 *U, int16 *D, int16 *C, int16 *R, unsigned int16 dimy, unsigned int16 dimx ) |
| 156 | { |
| 157 | int16 alpha; |
| 158 | int16 beta,lambda; |
| 159 | int16 b[5]; // ok even for 4-dim state |
| 160 | int16 *a; // in [0,1] -> qCU |
| 161 | unsigned int16 iy,j,i; |
| 162 | |
| 163 | int16 *b_j,*b_i; |
| 164 | int16 *a_j; |
| 165 | int16 *D_j; |
| 166 | int16 *U_ij; |
| 167 | int16 *x_i; |
| 168 | |
| 169 | // int32 z_pom; |
| 170 | // int16 z_pom_int16; |
| 171 | |
| 172 | int16 UC[16]; // in q15 |
| 173 | |
| 174 | /* copy U for vector a */ |
| 175 | mmultCU(C,U,UC,dimy,dimx); |
| 176 | |
| 177 | a = UC; // iyth row of U |
| 178 | for (iy=0; iy<dimy; iy++, a+=dimx) { |
| 179 | // a is a row |
| 180 | for (j=0,a_j=a,b_j=b,D_j=D; j<dimx; j++,b_j++,D_j++,a_j++) |
| 181 | *b_j=((int32)(*D_j)*(*a_j))>>15; |
| 182 | |
| 183 | alpha = R[iy]; //\alpha = R+vDv = R+a*b |
| 184 | // R in q15, a in q15, b=q15 |
| 185 | // gamma = (1<<15)/alpha; //(in q15) |
| 186 | //min alpha = R[iy] = 164 |
| 187 | //max gamma = 0.0061 => gamma_ref = q7 |
| 188 | for (j=0,a_j=a,b_j=b,D_j=D; j<dimx; j++,a_j++,b_j++,D_j++) { |
| 189 | /* beta=alpha; |
| 190 | * lambda = -((int32)(*a_j)<<15)/beta; |
| 191 | * alpha += ((int32)(*a_j)*(*b_j))>>15; |
| 192 | * D[j] = ((int32)beta**D_j)/alpha;*/ |
| 193 | /*xx*/ |
| 194 | lambda=alpha; |
| 195 | alpha += ((int32)(*a_j)*(*b_j))>>15; |
| 196 | D[j] = ((int32)lambda**D_j)/alpha; |
| 197 | // z_pom_int16 = -((int32)(*a_j)<<15)/lambda; |
| 198 | /*xx*/ |
| 199 | |
| 200 | if (*D_j==0) *D_j=1; |
| 201 | |
| 202 | for (i=0,b_i=b,U_ij=U+j; i<j; i++, b_i++,U_ij+=dimx) { |
| 203 | beta = *U_ij; |
| 204 | // *U_ij += ((int32)lambda*(*b_i))>>15; // puvodni reseni |
| 205 | *U_ij -= ((int32)(*a_j)*(*b_i))/lambda; // pozadovane optimalni reseni |
| 206 | // *U_ij -= ((int32)((int16)((int32)(*a_j)<<15)/lambda)**b_i)>>15; // tohle funguje - problem je s tim pretypovanim na (int16) |
| 207 | // *U_ij -= (int16)((int32)(*a_j)*(*b_i))/lambda; |
| 208 | // z_pom = (((int32)(*a_j)*(*b_i))/lambda); |
| 209 | /* z_pom = (int32)(*U_ij)-(int16)((int32)(*a_j)*(*b_i))/lambda; |
| 210 | * if (z_pom > 32767) z_pom = 32767; |
| 211 | * if (z_pom < - 32768) z_pom = -32768; |
| 212 | *U_ij = z_pom; /**/ |
| 213 | // *U_ij += ((int32)z_pom_int16*(*b_i))>>15; // puvodni reseni - jen jina konstanta |
| 214 | *b_i += ((int32)beta*(*b_j))>>15; |
| 215 | } |
| 216 | } |
| 217 | // no shift due to gamma |
| 218 | for (i=0,x_i=xp,b_i=b; i<dimx; i++,x_i++,b_i++) { |
| 219 | *x_i += ((int32)difz[iy]*(*b_i))/alpha; // multiply by unscaled Kalman gain |
| 220 | |
| 221 | } |
| 222 | } |
| 223 | } |
| 224 | |