1 /*---------------------------------------------------------------------------* 2 * himul32.h * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 /* 21 //////////////////////////////////////////////////////////////////////////// 22 // 23 // FILE: himul32.cpp 24 // 25 // CREATED: 11-September-99 26 // 27 // DESCRIPTION: A multiplier returns most-significant 32 bits of the 64-bit 28 // product of its two signed 32-bit integers 29 // 30 // 31 // 32 // 33 // MODIFICATIONS: 34 // Revision history log 35 VSS revision history. Do not edit by hand. 36 37 $NoKeywords: $ 38 39 */ 40 41 /* do not use PPC. VxWorks defines the PPC in vxcpu.h */ 42 #if defined(_PPC_) 43 44 /* Reads timebase register for a higher precision clock */ 45 46 asm PINLINE int32 himul32(asr_int32_t factor1, asr_int32_T factor2) 47 { 48 % reg factor1; 49 reg factor2; 50 51 mulhw r3, factor1, factor2 # place the high order 32 bits of the product in the return register r3 52 } 53 54 #else 55 56 /****************************************************************** 57 himul32 returns the most-significant 32 bits of the 64-bit 58 product of its two signed 32-bit integer arguments. 59 In other words, it's the exact value of the mathematical expression 60 floor( (factor1 * factor2) / 2**32 ) 61 This is a platform-independent definition that needs to be 62 implemented in platform-specific ways. 63 64 Parameters: 65 factor1 -- first signed 32 bit integer 66 factor2 -- second signed 32 bit integer 67 68 Returns: 69 the most-significant 32 bits of the multiplication results 70 *********************************************************************/ 71 72 #if COMPILER == C_MICROSOFT 73 74 #if TARGET_CPU == CPU_I86 75 76 PINLINE asr_int32_t himul32(asr_int32_t factor1, asr_int32_t factor2) 77 { 78 asr_int32_t retval; 79 /* 80 // The x86 imul instruction, given a single 32-bit operand, computes 81 // the signed 64-bit product of register EAX and that operand, into 82 // the register pair EDX:EAX. So we have to move the first factor into 83 // EAX, then IMUL, then take the high 32 bits (in EDX) and move them 84 // back to EAX (because that's where a function's return value is 85 // taken from). 86 */ 87 __asm { 88 mov eax, factor1 89 imul factor2 90 mov retval, edx 91 } 92 return retval; 93 } 94 95 #else /* TARGET_CPU != CPU_I86 */ 96 97 PINLINE asr_int32_t himul32(asr_int32_t factor1, asr_int32_t factor2) 98 { 99 union { 100 __int64 full; 101 struct 102 { 103 asr_int32_t lo; 104 asr_int32_t hi; 105 } 106 pieces; 107 } result; 108 109 __int64 x = factor1; 110 __int64 y = factor2; 111 result.full = x * y; 112 return result.pieces.hi; 113 } 114 115 #endif /* TARGET_CPU == CPU_I86 */ 116 117 #else /* ~ COMPILER != C_MICROSOFT */ 118 119 /*** ANSI C ***/ 120 121 PINLINE asr_int32_t himul32(asr_int32_t factor1, asr_int32_t factor2) 122 { 123 124 asr_uint32_t x = (asr_uint32_t)factor1; 125 asr_uint32_t y = (asr_uint32_t)factor2; 126 asr_uint32_t xhi, xlo, yhi, ylo; 127 asr_uint32_t hi, lo, mid; 128 asr_uint32_t oldlo, carry; 129 int sign = 0; 130 131 if (factor1 < 0) 132 { 133 x = (asr_uint32_t) - factor1; 134 sign = 1; 135 } 136 if (factor2 < 0) 137 { 138 y = (asr_uint32_t) - factor2; 139 sign = 1 - sign; 140 } 141 xhi = x >> 16; /* <= 2**15 */ 142 xlo = x & 0xffff; /* < 2**16 */ 143 yhi = y >> 16; /* <= 2**15 */ 144 ylo = y & 0xffff; /* < 2**16 */ 145 146 lo = xlo * ylo; 147 /* 148 // xhi <= 2**15 and ylo <= 2**16-1, so 149 // xhi * ylo <= 2**31 - 2**15. 150 // Ditto for yhi * xlo, so their sum is 151 // <= 2*32 - 2**16, and so the next line can't overflow. 152 */ 153 mid = xhi * ylo + yhi * xlo; 154 hi = xhi * yhi; 155 156 /* 157 // Now add the low part of mid to the high part of lo, and the 158 // high part of mid to the low part of hi: 159 // xxxxxxxx xxxxxxxx lo 160 // xxxxxxxx xxxxxxxx mid 161 // xxxxxxxx xxxxxxxx hi 162 // ----------------------------------- 163 // xxxxxxxx xxxxxxxx lo 164 // xxxxxxxx xxxxxxxx hi 165 // Note that folding mid into lo can cause a carry. An old trick 166 // for portable carry-detection applies: if a and b are unsigned, 167 // their sum overflows if and only if it's less than a (or b; can 168 // check either one). 169 */ 170 171 oldlo = lo; 172 lo += mid << 16; 173 carry = lo < oldlo; 174 175 hi += carry + (mid >> 16); 176 177 if (sign) 178 { 179 /* 180 // Result must be negated, which is the same as taking the 181 // complement and adding 1. So there's a carry out of the low 182 // half if and only if it's 0 now. 183 */ 184 hi = ~hi; 185 hi += lo == 0; 186 } 187 188 return (asr_int32_t)hi; 189 } 190 191 #endif /* ~ COMPILER == C_MICROSOFT */ 192 193 194 #endif 195