1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* ---- includes ----------------------------------------------------------- */ 18 19 #include "b_TensorEm/Flt16Vec.h" 20 #include "b_BasicEm/Memory.h" 21 #include "b_BasicEm/Math.h" 22 #include "b_BasicEm/Functions.h" 23 24 /* ------------------------------------------------------------------------- */ 25 26 /* ========================================================================= */ 27 /* */ 28 /* ---- \ghd{ auxiliary functions } ---------------------------------------- */ 29 /* */ 30 /* ========================================================================= */ 31 32 /* ------------------------------------------------------------------------- */ 33 34 /* ========================================================================= */ 35 /* */ 36 /* ---- \ghd{ constructor / destructor } ----------------------------------- */ 37 /* */ 38 /* ========================================================================= */ 39 40 /* ------------------------------------------------------------------------- */ 41 42 void bts_Flt16Vec_init( struct bbs_Context* cpA, 43 struct bts_Flt16Vec* ptrA ) 44 { 45 bbs_Int16Arr_init( cpA, &ptrA->arrE ); 46 ptrA->expE = 0; 47 } 48 49 /* ------------------------------------------------------------------------- */ 50 51 void bts_Flt16Vec_exit( struct bbs_Context* cpA, 52 struct bts_Flt16Vec* ptrA ) 53 { 54 bbs_Int16Arr_exit( cpA, &ptrA->arrE ); 55 ptrA->expE = 0; 56 } 57 58 /* ------------------------------------------------------------------------- */ 59 60 /* ========================================================================= */ 61 /* */ 62 /* ---- \ghd{ operators } -------------------------------------------------- */ 63 /* */ 64 /* ========================================================================= */ 65 66 /* ------------------------------------------------------------------------- */ 67 68 void bts_Flt16Vec_copy( struct bbs_Context* cpA, 69 struct bts_Flt16Vec* ptrA, 70 const struct bts_Flt16Vec* srcPtrA ) 71 { 72 bbs_Int16Arr_copy( cpA, &ptrA->arrE, &srcPtrA->arrE ); 73 ptrA->expE = srcPtrA->expE; 74 } 75 76 /* ------------------------------------------------------------------------- */ 77 78 flag bts_Flt16Vec_equal( struct bbs_Context* cpA, 79 const struct bts_Flt16Vec* ptrA, 80 const struct bts_Flt16Vec* srcPtrA ) 81 { 82 if( !bbs_Int16Arr_equal( cpA, &ptrA->arrE, &srcPtrA->arrE ) ) return FALSE; 83 if( ptrA->expE != srcPtrA->expE ) return FALSE; 84 return TRUE; 85 } 86 87 /* ------------------------------------------------------------------------- */ 88 89 /* ========================================================================= */ 90 /* */ 91 /* ---- \ghd{ query functions } -------------------------------------------- */ 92 /* */ 93 /* ========================================================================= */ 94 95 /* ------------------------------------------------------------------------- */ 96 97 int16 bts_Flt16Vec_avg( struct bbs_Context* cpA, const struct bts_Flt16Vec* ptrA ) 98 { 99 uint16 iL; 100 uint16 sizeL = ptrA->arrE.sizeE; 101 int32 sumL = 0; 102 const int16* srcL = ptrA->arrE.arrPtrE; 103 for( iL = 0; iL < sizeL; iL++ ) 104 { 105 sumL += srcL[ iL ]; 106 } 107 return sumL / ( int32 )sizeL; 108 } 109 110 /* ------------------------------------------------------------------------- */ 111 112 uint32 bts_Flt16Vec_norm( struct bbs_Context* cpA, const struct bts_Flt16Vec* ptrA ) 113 { 114 return bbs_vecNorm16( ptrA->arrE.arrPtrE, ptrA->arrE.sizeE ); 115 } 116 117 /* ------------------------------------------------------------------------- */ 118 119 uint16 bts_Flt16Vec_maxAbs( struct bbs_Context* cpA, const struct bts_Flt16Vec* ptrA ) 120 { 121 uint16 iL; 122 uint16 sizeL = ptrA->arrE.sizeE; 123 uint16 maxL = 0; 124 const int16* srcL = ptrA->arrE.arrPtrE; 125 for( iL = 0; iL < sizeL; iL++ ) 126 { 127 uint16 vL = srcL[ iL ] > 0 ? srcL[ iL ] : -srcL[ iL ]; 128 maxL = vL > maxL ? vL : maxL; 129 } 130 return maxL; 131 } 132 133 /* ------------------------------------------------------------------------- */ 134 135 /* ========================================================================= */ 136 /* */ 137 /* ---- \ghd{ modify functions } ------------------------------------------- */ 138 /* */ 139 /* ========================================================================= */ 140 141 /* ------------------------------------------------------------------------- */ 142 143 void bts_Flt16Vec_create( struct bbs_Context* cpA, 144 struct bts_Flt16Vec* ptrA, 145 uint32 sizeA, 146 struct bbs_MemSeg* mspA ) 147 { 148 bbs_Int16Arr_create( cpA, &ptrA->arrE, sizeA, mspA ); 149 } 150 151 /* ------------------------------------------------------------------------- */ 152 153 void bts_Flt16Vec_size( struct bbs_Context* cpA, 154 struct bts_Flt16Vec* ptrA, 155 uint32 sizeA ) 156 { 157 bbs_Int16Arr_size( cpA, &ptrA->arrE, sizeA ); 158 } 159 160 /* ------------------------------------------------------------------------- */ 161 162 /* ========================================================================= */ 163 /* */ 164 /* ---- \ghd{ I/O } -------------------------------------------------------- */ 165 /* */ 166 /* ========================================================================= */ 167 168 /* ------------------------------------------------------------------------- */ 169 170 uint32 bts_Flt16Vec_memSize( struct bbs_Context* cpA, 171 const struct bts_Flt16Vec *ptrA ) 172 { 173 return bbs_SIZEOF16( uint32 ) /* mem size */ 174 + bbs_Int16Arr_memSize( cpA, &ptrA->arrE ) 175 + bbs_SIZEOF16( ptrA->expE ); 176 } 177 178 /* ------------------------------------------------------------------------- */ 179 180 uint32 bts_Flt16Vec_memWrite( struct bbs_Context* cpA, 181 const struct bts_Flt16Vec* ptrA, 182 uint16* memPtrA ) 183 { 184 uint32 memSizeL = bts_Flt16Vec_memSize( cpA, ptrA ); 185 memPtrA += bbs_memWrite32( &memSizeL, memPtrA ); 186 memPtrA += bbs_Int16Arr_memWrite( cpA, &ptrA->arrE, memPtrA ); 187 memPtrA += bbs_memWrite16( &ptrA->expE, memPtrA ); 188 return memSizeL; 189 } 190 191 /* ------------------------------------------------------------------------- */ 192 193 uint32 bts_Flt16Vec_memRead( struct bbs_Context* cpA, 194 struct bts_Flt16Vec* ptrA, 195 const uint16* memPtrA, 196 struct bbs_MemSeg* mspA ) 197 { 198 uint32 memSizeL; 199 if( bbs_Context_error( cpA ) ) return 0; 200 memPtrA += bbs_memRead32( &memSizeL, memPtrA ); 201 memPtrA += bbs_Int16Arr_memRead( cpA, &ptrA->arrE, memPtrA, mspA ); 202 memPtrA += bbs_memRead16( &ptrA->expE, memPtrA ); 203 204 if( memSizeL != bts_Flt16Vec_memSize( cpA, ptrA ) ) 205 { 206 bbs_ERR0( bbs_ERR_CORRUPT_DATA, "uint32 bts_Flt16Vec_memRead( const struct bts_Flt16Vec* ptrA, const void* memPtrA ):\n" 207 "size mismatch" ); 208 return 0; 209 } 210 return memSizeL; 211 } 212 213 /* ------------------------------------------------------------------------- */ 214 215 /* ========================================================================= */ 216 /* */ 217 /* ---- \ghd{ exec functions } --------------------------------------------- */ 218 /* */ 219 /* ========================================================================= */ 220 221 /* ------------------------------------------------------------------------- */ 222 223 void bts_Flt16Vec_maximizeMantisse( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA ) 224 { 225 uint32 maxAbsL = bts_Flt16Vec_maxAbs( cpA, ptrA ); 226 int16 shlL = 0; 227 228 if( maxAbsL == 0 ) return; /* cannot maximize 0 */ 229 230 while( maxAbsL < 0x4000 ) 231 { 232 shlL++; 233 maxAbsL <<= 1; 234 } 235 236 if( shlL > 0 ) 237 { 238 uint32 iL; 239 uint32 sizeL = ptrA->arrE.sizeE; 240 int16* dstL = ptrA->arrE.arrPtrE; 241 for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] <<= shlL; 242 ptrA->expE -= shlL; 243 } 244 } 245 246 /* ------------------------------------------------------------------------- */ 247 248 uint32 bts_Flt16Vec_maximizeAbsValue( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA ) 249 { 250 int32 maxAbsL = bts_Flt16Vec_maxAbs( cpA, ptrA ); 251 int32 fL; 252 if( maxAbsL == 0 ) return 0; /* vector is zero */ 253 254 fL = ( int32 )0x7FFF0000 / maxAbsL; 255 256 { 257 uint32 iL; 258 uint32 sizeL = ptrA->arrE.sizeE; 259 int16* dstL = ptrA->arrE.arrPtrE; 260 for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( int32 )dstL[ iL ] * fL + 32768 ) >> 16; 261 } 262 263 return fL; 264 } 265 266 /* ------------------------------------------------------------------------- */ 267 268 void bts_Flt16Vec_zeroAverage( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA ) 269 { 270 uint16 iL; 271 uint16 sizeL = ptrA->arrE.sizeE; 272 int16* dstL = ptrA->arrE.arrPtrE; 273 int16 avgL = bts_Flt16Vec_avg( cpA, ptrA ); 274 for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] -= avgL; 275 } 276 277 /* ------------------------------------------------------------------------- */ 278 279 void bts_Flt16Vec_normalize( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA ) 280 { 281 uint32 normL = bts_Flt16Vec_norm( cpA, ptrA ); 282 283 if( normL == 0 ) 284 { 285 /* vector is zero - do nothing */ 286 return; 287 } 288 else 289 { 290 int16* dstL = ptrA->arrE.arrPtrE; 291 uint16 iL; 292 uint16 sizeL = ptrA->arrE.sizeE; 293 int16 expL = 0; 294 int32 fL; 295 296 /* let norm occupy 17 bits */ 297 if( ( normL & 0xFFFE0000 ) != 0 ) 298 { 299 while( ( ( normL >> -expL ) & 0xFFFE0000 ) != 0 ) expL--; 300 normL >>= -expL; 301 } 302 else 303 { 304 while( ( ( normL << expL ) & 0xFFFF0000 ) == 0 ) expL++; 305 normL <<= expL; 306 } 307 308 /* fL is positive and occupies only 16 bits - a product with int16 fits in int32 */ 309 fL = ( uint32 )0xFFFFFFFF / normL; 310 311 /* multiply with factor */ 312 for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( ( ( int32 )dstL[ iL ] * fL ) >> 15 ) + 1 ) >> 1; 313 314 /* set exponent */ 315 ptrA->expE = expL - 16; 316 } 317 /* 318 { 319 uint32 testNormL = bts_Flt16Vec_norm( cpA, ptrA ); 320 printf( "test norm %f\n", ( float )testNormL / ( 1 << -ptrA->expE ) ); 321 } 322 */ 323 } 324 325 /* ------------------------------------------------------------------------- */ 326 327 void bts_Flt16Vec_setZero( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA ) 328 { 329 bbs_Int16Arr_fill( cpA, &ptrA->arrE, 0 ); 330 ptrA->expE = 0; 331 } 332 333 /* ------------------------------------------------------------------------- */ 334 335 void bts_Flt16Vec_mul( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA, int32 valA, int16 expA ) 336 { 337 int32 valL = valA; 338 int16 expL = expA; 339 340 if( valL == 0 ) 341 { 342 bts_Flt16Vec_setZero( cpA, ptrA ); 343 return; 344 } 345 else 346 { 347 uint32 iL; 348 uint32 sizeL = ptrA->arrE.sizeE; 349 int16* dstL = ptrA->arrE.arrPtrE; 350 351 /* adjust valL to maximum 16 bit accuracy */ 352 uint32 absValL = valL > 0 ? valL : -valL; 353 if( ( absValL & 0xFFFF8000 ) != 0 ) 354 { 355 int32 shrL = 0; 356 while( ( absValL & 0xFFFF8000 ) != 0 ) 357 { 358 absValL >>= 1; 359 shrL++; 360 } 361 362 if( shrL > 0 ) 363 { 364 valL = ( ( valL >> ( shrL - 1 ) ) + 1 ) >> 1; 365 expL += shrL; 366 if( valL >= 0x08000 ) valL = 0x07FFF; /* saturate */ 367 } 368 } 369 else 370 { 371 int32 shlL = 0; 372 while( ( absValL & 0xFFFFC000 ) == 0 ) 373 { 374 absValL <<= 1; 375 shlL++; 376 } 377 378 valL <<= shlL; 379 expL -= shlL; 380 } 381 382 for( iL = 0; iL < sizeL; iL++ ) 383 { 384 dstL[ iL ] = ( ( ( ( int32 )dstL[ iL ] * valL ) >> 15 ) + 1 ) >> 1; 385 } 386 ptrA->expE += expL + 16; 387 } 388 } 389 390 /* ------------------------------------------------------------------------- */ 391 392 void bts_Flt16Vec_dotPtrd( struct bbs_Context* cpA, struct bts_Flt16Vec* vp1A, struct bts_Flt16Vec* vp2A, int32* manPtrA, int32* expPtrA ) 393 { 394 bbs_DEF_fNameL( "void bts_Flt16Vec_dotPtrd( struct bbs_Context* cpA, struct bts_Flt16Vec* vp1A, struct bts_Flt16Vec* vp2A, int32* matPtrA, int32* expPtrA )" ) 395 uint16 iL; 396 uint16 sizeL = vp1A->arrE.sizeE; 397 const int16* arr1L = vp1A->arrE.arrPtrE; 398 const int16* arr2L = vp2A->arrE.arrPtrE; 399 int16 shrm1L = -1; /* shift minus 1 */ 400 int32 sumL; 401 402 if( vp1A->arrE.sizeE != vp2A->arrE.sizeE ) 403 { 404 bbs_ERROR1( "%s:\nVectors have different size", fNameL ); 405 return; 406 } 407 408 sumL = 0; 409 /* shrm1L == -1 */ 410 for( iL = 0; iL < sizeL; iL++ ) 411 { 412 sumL += ( int32 )arr1L[ iL ] * ( int32 )arr2L[ iL ]; 413 if( ( ( ( sumL > 0 ) ? sumL : -sumL ) & 0xC0000000 ) != 0 ) break; 414 } 415 416 if( iL < sizeL ) 417 { 418 /* danger of overflow: increase shift; adjust sum */ 419 shrm1L++; 420 sumL = ( ( sumL >> 1 ) + 1 ) >> 1; 421 422 /* shrm1L == 0 */ 423 for( iL = 0; iL < sizeL; iL++ ) 424 { 425 sumL += ( int32 )( ( arr1L[ iL ] + 1 ) >> 1 ) * ( int32 )( ( arr2L[ iL ] + 1 ) >> 1 ); 426 if( ( ( ( sumL > 0 ) ? sumL : -sumL ) & 0xC0000000 ) != 0 ) break; 427 } 428 429 for( iL = 0; iL < sizeL; iL++ ) 430 { 431 if( ( ( ( sumL > 0 ) ? sumL : -sumL ) & 0xC0000000 ) != 0 ) 432 { 433 /* danger of overflow: increase shift; adjust sum */ 434 shrm1L++; 435 sumL = ( ( sumL >> 1 ) + 1 ) >> 1; 436 } 437 438 sumL += ( int32 )( ( ( arr1L[ iL ] >> shrm1L ) + 1 ) >> 1 ) * ( int32 )( ( ( arr2L[ iL ] >> shrm1L ) + 1 ) >> 1 ); 439 } 440 } 441 442 if( manPtrA != NULL ) *manPtrA = sumL; 443 if( expPtrA != NULL ) *expPtrA = vp1A->expE + vp2A->expE + ( ( shrm1L + 1 ) << 1 ); 444 } 445 446 /* ------------------------------------------------------------------------- */ 447 448 void bts_Flt16Vec_append( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA, struct bts_Flt16Vec* srcPtrA ) 449 { 450 if( ptrA->arrE.sizeE == 0 ) 451 { 452 bts_Flt16Vec_copy( cpA, ptrA, srcPtrA ); 453 } 454 else 455 { 456 uint32 idxL = ptrA->arrE.sizeE; 457 bts_Flt16Vec_size( cpA, ptrA, idxL + srcPtrA->arrE.sizeE ); 458 459 /* copy data */ 460 bbs_memcpy16( ptrA->arrE.arrPtrE + idxL, srcPtrA->arrE.arrPtrE, srcPtrA->arrE.sizeE ); 461 462 /* equalize exponent */ 463 if( ptrA->expE > srcPtrA->expE ) 464 { 465 uint32 iL; 466 uint32 sizeL = srcPtrA->arrE.sizeE; 467 uint32 shrL = ptrA->expE - srcPtrA->expE; 468 int16* dstL = ptrA->arrE.arrPtrE + idxL; 469 for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( dstL[ iL ] >> ( shrL - 1 ) ) + 1 ) >> 1; 470 } 471 else if( ptrA->expE < srcPtrA->expE ) 472 { 473 uint32 iL; 474 uint32 sizeL = idxL; 475 uint32 shrL = srcPtrA->expE - ptrA->expE; 476 int16* dstL = ptrA->arrE.arrPtrE; 477 for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( dstL[ iL ] >> ( shrL - 1 ) ) + 1 ) >> 1; 478 ptrA->expE = srcPtrA->expE; 479 } 480 } 481 } 482 483 /* ------------------------------------------------------------------------- */ 484 485 /* ========================================================================= */ 486 487