Home | History | Annotate | Download | only in b_TensorEm
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /* ---- includes ----------------------------------------------------------- */
     18 
     19 #include "b_TensorEm/Flt16Vec.h"
     20 #include "b_BasicEm/Memory.h"
     21 #include "b_BasicEm/Math.h"
     22 #include "b_BasicEm/Functions.h"
     23 
     24 /* ------------------------------------------------------------------------- */
     25 
     26 /* ========================================================================= */
     27 /*                                                                           */
     28 /* ---- \ghd{ auxiliary functions } ---------------------------------------- */
     29 /*                                                                           */
     30 /* ========================================================================= */
     31 
     32 /* ------------------------------------------------------------------------- */
     33 
     34 /* ========================================================================= */
     35 /*                                                                           */
     36 /* ---- \ghd{ constructor / destructor } ----------------------------------- */
     37 /*                                                                           */
     38 /* ========================================================================= */
     39 
     40 /* ------------------------------------------------------------------------- */
     41 
     42 void bts_Flt16Vec_init( struct bbs_Context* cpA,
     43 						struct bts_Flt16Vec* ptrA )
     44 {
     45 	bbs_Int16Arr_init( cpA, &ptrA->arrE );
     46 	ptrA->expE = 0;
     47 }
     48 
     49 /* ------------------------------------------------------------------------- */
     50 
     51 void bts_Flt16Vec_exit( struct bbs_Context* cpA,
     52 						struct bts_Flt16Vec* ptrA )
     53 {
     54 	bbs_Int16Arr_exit( cpA, &ptrA->arrE );
     55 	ptrA->expE = 0;
     56 }
     57 
     58 /* ------------------------------------------------------------------------- */
     59 
     60 /* ========================================================================= */
     61 /*                                                                           */
     62 /* ---- \ghd{ operators } -------------------------------------------------- */
     63 /*                                                                           */
     64 /* ========================================================================= */
     65 
     66 /* ------------------------------------------------------------------------- */
     67 
     68 void bts_Flt16Vec_copy( struct bbs_Context* cpA,
     69 						struct bts_Flt16Vec* ptrA,
     70 						const struct bts_Flt16Vec* srcPtrA )
     71 {
     72 	bbs_Int16Arr_copy( cpA, &ptrA->arrE, &srcPtrA->arrE );
     73 	ptrA->expE = srcPtrA->expE;
     74 }
     75 
     76 /* ------------------------------------------------------------------------- */
     77 
     78 flag bts_Flt16Vec_equal( struct bbs_Context* cpA,
     79 						 const struct bts_Flt16Vec* ptrA,
     80 						 const struct bts_Flt16Vec* srcPtrA )
     81 {
     82 	if( !bbs_Int16Arr_equal( cpA, &ptrA->arrE, &srcPtrA->arrE ) ) return FALSE;
     83 	if( ptrA->expE != srcPtrA->expE ) return FALSE;
     84 	return TRUE;
     85 }
     86 
     87 /* ------------------------------------------------------------------------- */
     88 
     89 /* ========================================================================= */
     90 /*                                                                           */
     91 /* ---- \ghd{ query functions } -------------------------------------------- */
     92 /*                                                                           */
     93 /* ========================================================================= */
     94 
     95 /* ------------------------------------------------------------------------- */
     96 
     97 int16 bts_Flt16Vec_avg( struct bbs_Context* cpA, const struct bts_Flt16Vec* ptrA )
     98 {
     99 	uint16 iL;
    100 	uint16 sizeL = ptrA->arrE.sizeE;
    101 	int32 sumL = 0;
    102 	const int16* srcL = ptrA->arrE.arrPtrE;
    103 	for( iL = 0; iL < sizeL; iL++ )
    104 	{
    105 		sumL += srcL[ iL ];
    106 	}
    107 	return sumL / ( int32 )sizeL;
    108 }
    109 
    110 /* ------------------------------------------------------------------------- */
    111 
    112 uint32 bts_Flt16Vec_norm( struct bbs_Context* cpA, const struct bts_Flt16Vec* ptrA )
    113 {
    114 	return bbs_vecNorm16( ptrA->arrE.arrPtrE, ptrA->arrE.sizeE );
    115 }
    116 
    117 /* ------------------------------------------------------------------------- */
    118 
    119 uint16 bts_Flt16Vec_maxAbs( struct bbs_Context* cpA, const struct bts_Flt16Vec* ptrA )
    120 {
    121 	uint16 iL;
    122 	uint16 sizeL = ptrA->arrE.sizeE;
    123 	uint16 maxL = 0;
    124 	const int16* srcL = ptrA->arrE.arrPtrE;
    125 	for( iL = 0; iL < sizeL; iL++ )
    126 	{
    127 		uint16 vL = srcL[ iL ] > 0 ? srcL[ iL ] : -srcL[ iL ];
    128 		maxL = vL > maxL ? vL : maxL;
    129 	}
    130 	return maxL;
    131 }
    132 
    133 /* ------------------------------------------------------------------------- */
    134 
    135 /* ========================================================================= */
    136 /*                                                                           */
    137 /* ---- \ghd{ modify functions } ------------------------------------------- */
    138 /*                                                                           */
    139 /* ========================================================================= */
    140 
    141 /* ------------------------------------------------------------------------- */
    142 
    143 void bts_Flt16Vec_create( struct bbs_Context* cpA,
    144 						  struct bts_Flt16Vec* ptrA,
    145 						  uint32 sizeA,
    146 						  struct bbs_MemSeg* mspA )
    147 {
    148 	bbs_Int16Arr_create( cpA, &ptrA->arrE, sizeA, mspA );
    149 }
    150 
    151 /* ------------------------------------------------------------------------- */
    152 
    153 void bts_Flt16Vec_size( struct bbs_Context* cpA,
    154 						struct bts_Flt16Vec* ptrA,
    155 						uint32 sizeA )
    156 {
    157 	bbs_Int16Arr_size( cpA, &ptrA->arrE, sizeA );
    158 }
    159 
    160 /* ------------------------------------------------------------------------- */
    161 
    162 /* ========================================================================= */
    163 /*                                                                           */
    164 /* ---- \ghd{ I/O } -------------------------------------------------------- */
    165 /*                                                                           */
    166 /* ========================================================================= */
    167 
    168 /* ------------------------------------------------------------------------- */
    169 
    170 uint32 bts_Flt16Vec_memSize( struct bbs_Context* cpA,
    171 							  const struct bts_Flt16Vec *ptrA )
    172 {
    173 	return  bbs_SIZEOF16( uint32 ) /* mem size */
    174 		+ bbs_Int16Arr_memSize( cpA, &ptrA->arrE )
    175 		+ bbs_SIZEOF16( ptrA->expE );
    176 }
    177 
    178 /* ------------------------------------------------------------------------- */
    179 
    180 uint32 bts_Flt16Vec_memWrite( struct bbs_Context* cpA,
    181 							   const struct bts_Flt16Vec* ptrA,
    182 							   uint16* memPtrA )
    183 {
    184 	uint32 memSizeL = bts_Flt16Vec_memSize( cpA, ptrA );
    185 	memPtrA += bbs_memWrite32( &memSizeL, memPtrA );
    186 	memPtrA += bbs_Int16Arr_memWrite( cpA, &ptrA->arrE, memPtrA );
    187 	memPtrA += bbs_memWrite16( &ptrA->expE, memPtrA );
    188 	return memSizeL;
    189 }
    190 
    191 /* ------------------------------------------------------------------------- */
    192 
    193 uint32 bts_Flt16Vec_memRead( struct bbs_Context* cpA,
    194 							  struct bts_Flt16Vec* ptrA,
    195 							  const uint16* memPtrA,
    196 							  struct bbs_MemSeg* mspA )
    197 {
    198 	uint32 memSizeL;
    199 	if( bbs_Context_error( cpA ) ) return 0;
    200 	memPtrA += bbs_memRead32( &memSizeL, memPtrA );
    201 	memPtrA += bbs_Int16Arr_memRead( cpA, &ptrA->arrE, memPtrA, mspA );
    202 	memPtrA += bbs_memRead16( &ptrA->expE, memPtrA );
    203 
    204 	if( memSizeL != bts_Flt16Vec_memSize( cpA, ptrA ) )
    205 	{
    206 		bbs_ERR0( bbs_ERR_CORRUPT_DATA, "uint32 bts_Flt16Vec_memRead( const struct bts_Flt16Vec* ptrA, const void* memPtrA ):\n"
    207                    "size mismatch" );
    208 		return 0;
    209 	}
    210 	return memSizeL;
    211 }
    212 
    213 /* ------------------------------------------------------------------------- */
    214 
    215 /* ========================================================================= */
    216 /*                                                                           */
    217 /* ---- \ghd{ exec functions } --------------------------------------------- */
    218 /*                                                                           */
    219 /* ========================================================================= */
    220 
    221 /* ------------------------------------------------------------------------- */
    222 
    223 void bts_Flt16Vec_maximizeMantisse( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA )
    224 {
    225     uint32 maxAbsL = bts_Flt16Vec_maxAbs( cpA, ptrA );
    226 	int16 shlL = 0;
    227 
    228 	if( maxAbsL == 0 ) return; /* cannot maximize 0 */
    229 
    230 	while( maxAbsL < 0x4000 )
    231 	{
    232 		shlL++;
    233 		maxAbsL <<= 1;
    234 	}
    235 
    236 	if( shlL > 0 )
    237 	{
    238 		uint32 iL;
    239 		uint32 sizeL = ptrA->arrE.sizeE;
    240 		int16* dstL = ptrA->arrE.arrPtrE;
    241 		for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] <<= shlL;
    242 		ptrA->expE -= shlL;
    243 	}
    244 }
    245 
    246 /* ------------------------------------------------------------------------- */
    247 
    248 uint32 bts_Flt16Vec_maximizeAbsValue( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA )
    249 {
    250     int32 maxAbsL = bts_Flt16Vec_maxAbs( cpA, ptrA );
    251 	int32 fL;
    252 	if( maxAbsL == 0 ) return 0; /* vector is zero */
    253 
    254 	fL = ( int32 )0x7FFF0000 / maxAbsL;
    255 
    256 	{
    257 		uint32 iL;
    258 		uint32 sizeL = ptrA->arrE.sizeE;
    259 		int16* dstL = ptrA->arrE.arrPtrE;
    260 		for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( int32 )dstL[ iL ] * fL + 32768 ) >> 16;
    261 	}
    262 
    263 	return fL;
    264 }
    265 
    266 /* ------------------------------------------------------------------------- */
    267 
    268 void bts_Flt16Vec_zeroAverage( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA )
    269 {
    270 	uint16 iL;
    271 	uint16 sizeL = ptrA->arrE.sizeE;
    272 	int16* dstL = ptrA->arrE.arrPtrE;
    273 	int16 avgL = bts_Flt16Vec_avg( cpA, ptrA );
    274 	for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] -= avgL;
    275 }
    276 
    277 /* ------------------------------------------------------------------------- */
    278 
    279 void bts_Flt16Vec_normalize( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA )
    280 {
    281 	uint32 normL = bts_Flt16Vec_norm( cpA, ptrA );
    282 
    283 	if( normL == 0 )
    284 	{
    285 		/* vector is zero - do nothing */
    286 		return;
    287 	}
    288 	else
    289 	{
    290 		int16* dstL = ptrA->arrE.arrPtrE;
    291 		uint16 iL;
    292 		uint16 sizeL = ptrA->arrE.sizeE;
    293 	    int16 expL = 0;
    294 		int32 fL;
    295 
    296 		/* let norm occupy 17 bits */
    297 		if( ( normL & 0xFFFE0000 ) != 0 )
    298 		{
    299 			while( ( ( normL >> -expL ) & 0xFFFE0000 ) != 0 ) expL--;
    300 			normL >>= -expL;
    301 		}
    302 		else
    303 		{
    304 			while( ( ( normL <<  expL ) & 0xFFFF0000 ) == 0 ) expL++;
    305 			normL <<=  expL;
    306 		}
    307 
    308 		/* fL is positive and occupies only 16 bits - a product with int16 fits in int32 */
    309 		fL = ( uint32 )0xFFFFFFFF / normL;
    310 
    311 		/* multiply with factor */
    312 		for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( ( ( int32 )dstL[ iL ] * fL ) >> 15 ) + 1 ) >> 1;
    313 
    314 		/* set exponent */
    315 		ptrA->expE = expL - 16;
    316 	}
    317 /*
    318 	{
    319 		uint32 testNormL = bts_Flt16Vec_norm( cpA, ptrA );
    320 		printf( "test norm %f\n", ( float )testNormL / ( 1 << -ptrA->expE ) );
    321 	}
    322 */
    323 }
    324 
    325 /* ------------------------------------------------------------------------- */
    326 
    327 void bts_Flt16Vec_setZero( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA )
    328 {
    329 	bbs_Int16Arr_fill( cpA, &ptrA->arrE, 0 );
    330 	ptrA->expE = 0;
    331 }
    332 
    333 /* ------------------------------------------------------------------------- */
    334 
    335 void bts_Flt16Vec_mul( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA, int32 valA, int16 expA )
    336 {
    337 	int32 valL = valA;
    338 	int16 expL = expA;
    339 
    340 	if( valL == 0 )
    341 	{
    342 		bts_Flt16Vec_setZero( cpA, ptrA );
    343 		return;
    344 	}
    345 	else
    346 	{
    347 		uint32 iL;
    348 		uint32 sizeL = ptrA->arrE.sizeE;
    349 		int16* dstL = ptrA->arrE.arrPtrE;
    350 
    351 		/* adjust valL to maximum 16 bit accuracy  */
    352 		uint32 absValL = valL > 0 ? valL : -valL;
    353 		if( ( absValL & 0xFFFF8000 ) != 0 )
    354 		{
    355 			int32 shrL = 0;
    356 			while( ( absValL & 0xFFFF8000 ) != 0 )
    357 			{
    358 				absValL >>= 1;
    359 				shrL++;
    360 			}
    361 
    362 			if( shrL > 0 )
    363 			{
    364 				valL = ( ( valL >> ( shrL - 1 ) ) + 1 ) >> 1;
    365 				expL += shrL;
    366 				if( valL >= 0x08000 ) valL = 0x07FFF; /* saturate */
    367 			}
    368 		}
    369 		else
    370 		{
    371 			int32 shlL = 0;
    372 			while( ( absValL & 0xFFFFC000 ) == 0 )
    373 			{
    374 				absValL <<= 1;
    375 				shlL++;
    376 			}
    377 
    378 			valL <<= shlL;
    379 			expL -= shlL;
    380 		}
    381 
    382 		for( iL = 0; iL < sizeL; iL++ )
    383 		{
    384 			dstL[ iL ] = ( ( ( ( int32 )dstL[ iL ] * valL ) >> 15 ) + 1 ) >> 1;
    385 		}
    386 		ptrA->expE += expL + 16;
    387 	}
    388 }
    389 
    390 /* ------------------------------------------------------------------------- */
    391 
    392 void bts_Flt16Vec_dotPtrd( struct bbs_Context* cpA, struct bts_Flt16Vec* vp1A, struct bts_Flt16Vec* vp2A, int32* manPtrA, int32* expPtrA )
    393 {
    394 	bbs_DEF_fNameL( "void bts_Flt16Vec_dotPtrd( struct bbs_Context* cpA, struct bts_Flt16Vec* vp1A, struct bts_Flt16Vec* vp2A, int32* matPtrA, int32* expPtrA )" )
    395 	uint16 iL;
    396 	uint16 sizeL = vp1A->arrE.sizeE;
    397 	const int16* arr1L = vp1A->arrE.arrPtrE;
    398 	const int16* arr2L = vp2A->arrE.arrPtrE;
    399 	int16 shrm1L = -1; /* shift minus 1 */
    400 	int32 sumL;
    401 
    402 	if( vp1A->arrE.sizeE != vp2A->arrE.sizeE )
    403 	{
    404 		bbs_ERROR1( "%s:\nVectors have different size", fNameL );
    405 		return;
    406 	}
    407 
    408 	sumL = 0;
    409 	/* shrm1L == -1 */
    410 	for( iL = 0; iL < sizeL; iL++ )
    411 	{
    412 		sumL += ( int32 )arr1L[ iL ] * ( int32 )arr2L[ iL ];
    413 		if( ( ( ( sumL > 0 ) ? sumL : -sumL ) & 0xC0000000 ) != 0 ) break;
    414 	}
    415 
    416 	if( iL < sizeL )
    417 	{
    418 		/* danger of overflow: increase shift; adjust sum */
    419 		shrm1L++;
    420 		sumL = ( ( sumL >> 1 ) + 1 ) >> 1;
    421 
    422 		/* shrm1L == 0 */
    423 		for( iL = 0; iL < sizeL; iL++ )
    424 		{
    425 			sumL += ( int32 )( ( arr1L[ iL ] + 1 ) >> 1 ) * ( int32 )( ( arr2L[ iL ] + 1 ) >> 1 );
    426 			if( ( ( ( sumL > 0 ) ? sumL : -sumL ) & 0xC0000000 ) != 0 ) break;
    427 		}
    428 
    429 		for( iL = 0; iL < sizeL; iL++ )
    430 		{
    431 			if( ( ( ( sumL > 0 ) ? sumL : -sumL ) & 0xC0000000 ) != 0 )
    432 			{
    433 				/* danger of overflow: increase shift; adjust sum */
    434 				shrm1L++;
    435 				sumL = ( ( sumL >> 1 ) + 1 ) >> 1;
    436 			}
    437 
    438 			sumL += ( int32 )( ( ( arr1L[ iL ] >> shrm1L ) + 1 ) >> 1 ) * ( int32 )( ( ( arr2L[ iL ] >> shrm1L ) + 1 ) >> 1 );
    439 		}
    440 	}
    441 
    442 	if( manPtrA != NULL ) *manPtrA = sumL;
    443 	if( expPtrA != NULL ) *expPtrA = vp1A->expE + vp2A->expE + ( ( shrm1L + 1 ) << 1 );
    444 }
    445 
    446 /* ------------------------------------------------------------------------- */
    447 
    448 void bts_Flt16Vec_append( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA, struct bts_Flt16Vec* srcPtrA )
    449 {
    450 	if( ptrA->arrE.sizeE == 0 )
    451 	{
    452 		bts_Flt16Vec_copy( cpA, ptrA, srcPtrA );
    453 	}
    454 	else
    455 	{
    456 		uint32 idxL = ptrA->arrE.sizeE;
    457 		bts_Flt16Vec_size( cpA, ptrA, idxL + srcPtrA->arrE.sizeE );
    458 
    459 		/* copy data */
    460 		bbs_memcpy16( ptrA->arrE.arrPtrE + idxL, srcPtrA->arrE.arrPtrE, srcPtrA->arrE.sizeE );
    461 
    462 		/* equalize exponent */
    463 		if( ptrA->expE > srcPtrA->expE )
    464 		{
    465 			uint32 iL;
    466 			uint32 sizeL = srcPtrA->arrE.sizeE;
    467 			uint32 shrL = ptrA->expE - srcPtrA->expE;
    468 			int16* dstL = ptrA->arrE.arrPtrE + idxL;
    469 			for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( dstL[ iL ] >> ( shrL - 1 ) ) + 1 ) >> 1;
    470 		}
    471 		else if( ptrA->expE < srcPtrA->expE )
    472 		{
    473 			uint32 iL;
    474 			uint32 sizeL = idxL;
    475 			uint32 shrL = srcPtrA->expE - ptrA->expE;
    476 			int16* dstL = ptrA->arrE.arrPtrE;
    477 			for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( dstL[ iL ] >> ( shrL - 1 ) ) + 1 ) >> 1;
    478 			ptrA->expE = srcPtrA->expE;
    479 		}
    480 	}
    481 }
    482 
    483 /* ------------------------------------------------------------------------- */
    484 
    485 /* ========================================================================= */
    486 
    487