Home | History | Annotate | Download | only in 4.2
      1 /* APPLE LOCAL file PPC_INTRINSICS */
      2 
      3 /* Definitions for PowerPC intrinsic instructions
      4    Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
      5 
      6 This file is part of GCC.
      7 
      8 GCC is free software; you can redistribute it and/or modify it under
      9 the terms of the GNU General Public License as published by the Free
     10 Software Foundation; either version 2, or (at your option) any later
     11 version.
     12 
     13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
     15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     16 for more details.
     17 
     18 You should have received a copy of the GNU General Public License
     19 along with GCC; see the file COPYING.  If not, write to the Free
     20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
     21 02111-1307, USA.  */
     22 
     23 /* As a special exception, if you include this header file into source
     24    files compiled by GCC, this header file does not by itself cause
     25    the resulting executable to be covered by the GNU General Public
     26    License.  This exception does not however invalidate any other
     27    reasons why the executable file might be covered by the GNU General
     28    Public License.  */
     29 
     30 /*
     31  * The following PowerPC intrinsics are provided by this header:
     32  *
     33  * Low-Level Processor Synchronization
     34  *   __eieio    - Enforce In-Order Execution of I/O
     35  *   __isync    - Instruction Synchronize
     36  *   __sync     - Synchronize
     37  *   __lwsync   - Lightweight Synchronize
     38  *
     39  * Manipulating the Contents of a Variable or Register
     40  *   __cntlzw   - Count Leading Zeros Word
     41  *   __cntlzd   - Count Leading Zeros Double Word
     42  *   __rlwimi   - Rotate Left Word Immediate then Mask Insert
     43  *   __rlwinm   - Rotate Left Word Immediate then AND with Mask
     44  *   __rlwnm    - Rotate Left Word then AND with Mask
     45  *
     46  * Byte-Reversing Functions
     47  *   __lhbrx    - Load Half Word Byte-Reverse Indexed
     48  *   __lwbrx    - Load Word Byte-Reverse Indexed
     49  *   __sthbrx   - Store Half Word Byte-Reverse Indexed
     50  *   __stwbrx   - Store Word Byte-Reverse Indexed
     51  *
     52  * Data Cache Manipulation
     53  *   __dcba     - Data Cache Block Allocate
     54  *   __dcbf     - Data Cache Block Flush
     55  *   __dcbst    - Data Cache Block Store
     56  *   __dcbt     - Data Cache Block Touch
     57  *   __dcbtst   - Data Cache Block Touch for Store
     58  *   __dcbzl    - Data Cache Block Set to Zero
     59  *   __dcbz     - Data Cache Block Set to Zero (32-bytes only)
     60  *
     61  * Setting the Floating-Point Environment
     62  *   __setflm   - Set Floating-point Mode
     63  *
     64  * Math Functions
     65  *   __fabs     - Floating-Point Absolute Value
     66  *   __fnabs    - Floating Negative Absolute Value
     67  *   __fctiw    - Floating Convert to Integer Word
     68  *   __fctiwz   - Floating Convert to Integer Word with Round toward Zero
     69  *   __fctidz   - Floating Convert to Integer Doubleword with Round toward Zero
     70  *   __fctid    - Floating Convert to Integer Doubleword
     71  *   __fcfid    - Floating Convert From Integer Doubleword
     72  *   __fmadd    - Floating Multiply-Add (Double-Precision)
     73  *   __fmadds   - Floating Multiply-Add Single
     74  *   __fmsub    - Floating Multiply-Subract (Double-Precision)
     75  *   __fmsubs   - Floating Multiply-Subract Single
     76  *   __fmul     - Floating Multiply (Double-Precision)
     77  *   __fmuls    - Floating Multiply Single
     78  *   __fnmadd   - Floating Negative Multiply-Add (Double-Precision)
     79  *   __fnmadds  - Floating Negative Multiply-Add Single
     80  *   __fnmsub   - Floating Negative Multiply-Subtract (Double-Precision)
     81  *   __fnmsubs  - Floating Negative Multiply-Subtract Single
     82  *   __fres     - Floating Reciprocal Estimate
     83  *   __frsp     - Floating Round to Single-Precision
     84  *   __frsqrte  - Floating Reciprocal Square Root Estimate
     85  *   __frsqrtes - Floating Reciprocal Square Root Estimate Single
     86  *   __fsel     - Floating Select
     87  *   __fsels    - Floating Select (Single-Precision variant)
     88  *   __fsqrt    - Floating-Point Square Root (Double-Precision)
     89  *   __fsqrts   - Floating-Point Square Root Single-Precision
     90  *   __mulhw    - Multiply High Word
     91  *   __mulhwu   - Multiply High Word Unsigned
     92  *   __stfiwx   - Store Floating-Point as Integer Word Indexed
     93  *
     94  * Miscellaneous Functions
     95  *   __nop      - PPC preferred form of no operation
     96  *   __astrcmp  - assembly strcmp
     97  *   __icbi     - Instruction Cache Block Invalidate
     98  *   __mffs     - Move from FPSCR
     99  *   __mfspr    - Move from Special Purpose Register
    100  *   __mtfsf    - Move to SPSCR Fields
    101  *   __mtspr    - Move to Special Purpose Register
    102  *   __OSReadSwapSInt16 - lhbrx for signed shorts
    103  *   __OSReadSwapUInt16 - lhbrx for unsigned shorts
    104  *
    105  * TO DO:
    106  * - Desired:
    107  *   mullw
    108  * - Available in CodeWarrior, not yet implemented here:
    109  *   abs, labs, fabsf, fnabsf
    110  *
    111  * NOTES:
    112  * - Some of the intrinsics need to be macros because certain
    113  *   parameters MUST be integer constants and not values in registers.
    114  * - The declarations use __asm__ instead of asm and __inline__ instead
    115  *   of inline to prevent errors when -ansi is specified.
    116  * - Some of the intrinsic definitions use the "volatile" specifier on
    117  *   the "asm" statements in order to work around what appears to be
    118  *   a bug in the compiler/optimizer.  In general we have avoided the
    119  *   use of "volatile" because it suppresses optimization on the
    120  *   generated instructions.  The instructions to which "volatile"
    121  *   has been added where it appears that it should not be needed are
    122  *   lhbrx and lwbrx.
    123  *
    124  * Contributors: Fred Forsman (editor), Turly O'Connor, Ian Ollmann, Sanjay Patel
    125  * Last modified: October 6, 2004
    126  */
    127 
    128 #ifndef _PPC_INTRINSICS_H_
    129 #define _PPC_INTRINSICS_H_
    130 
    131 #if (defined(__ppc__) || defined(__ppc64__)) && ! defined(__MWERKS__)
    132 
    133 /*******************************************************************
    134  *                 Special Purpose Registers (SPRs)                *
    135  *******************************************************************/
    136 
    137 #define __SPR_MQR       0               /* PPC 601 only */
    138 #define __SPR_XER       1
    139 #define __SPR_RTCU      4               /* Real time clock upper. PPC 601 only.*/
    140 #define __SPR_RTCL      5               /* Real time clock lower. PPC 601 only.*/
    141 #define __SPR_LR        8
    142 #define __SPR_CTR       9
    143 #define __SPR_VRSAVE    256             /* AltiVec */
    144 #define __SPR_TBL       268             /* Time-base Lower. Not on PPC 601 */
    145 #define __SPR_TBU       269             /* Time-base Upper. Not on PPC 601 */
    146 #define __SPR_UMMCR2    928             /* PPC 74xx */
    147 #define __SPR_UPMC5     929             /* PPC 745x */
    148 #define __SPR_UPMC6     930             /* PPC 745x */
    149 #define __SPR_UBAMR     935             /* PPC 7400 and 7410 */
    150 #define __SPR_UMMCR0    936             /* PPC 74xx and 750 */
    151 #define __SPR_UPMC1     937             /* PPC 74xx and 750 */
    152 #define __SPR_UPMC2     938             /* PPC 74xx and 750 */
    153 #define __SPR_USIAR     939             /* PPC 74xx and 750 */
    154 #define __SPR_UMMCR1    940             /* PPC 74xx and 750 */
    155 #define __SPR_UPMC3     941             /* PPC 74xx and 750 */
    156 #define __SPR_UPMC4     942             /* PPC 74xx and 750 */
    157 #define __SPR_PIR       1023            /* supervisor level only! */
    158 
    159 /*
    160  * Shorthand macros for some commonly used SPR's.
    161  */
    162 #define __mfxer()               __mfspr(__SPR_XER)
    163 #define __mflr()                __mfspr(__SPR_LR)
    164 #define __mfctr()               __mfspr(__SPR_CTR)
    165 #define __mfvrsave()            __mfspr(__SPR_VRSAVE)
    166 #define __mftb()                __mfspr(__SPR_TBL)
    167 #define __mftbu()               __mfspr(__SPR_TBU)
    168 
    169 #define __mtlr(value)           __mtspr(__SPR_LR, value)
    170 #define __mtxer(value)          __mtspr(__SPR_XER, value)
    171 #define __mtctr(value)          __mtspr(__SPR_CTR, value)
    172 #define __mtvrsave(value)       __mtspr(__SPR_VRSAVE, value)
    173 
    174 
    175 /*******************************************************************
    176  *               Low-Level Processor Synchronization               *
    177  *******************************************************************/
    178 
    179 /*
    180  * __eieio - Enforce In-Order Execution of I/O
    181  *
    182  *   void __eieio (void);
    183  */
    184 #define __eieio() __asm__ ("eieio" : : : "memory")
    185 
    186 /*
    187  * __isync - Instruction Synchronize
    188  *
    189  *   void __isync (void);
    190  */
    191 #define __isync()       \
    192   __asm__ volatile ("isync")
    193 
    194 /*
    195  * __sync - Synchronize
    196  *
    197  *  void __sync (void);
    198  */
    199 #define __sync() __asm__ volatile ("sync")
    200 
    201 /*
    202  * __lwsync - Lightweight Synchronize, see PPC2.01, Book 2
    203  *
    204  *  void __lwsync (void);
    205  */
    206 #define __lwsync() __asm__ volatile ("sync 1")
    207 
    208 
    209 /*******************************************************************
    210  *                     Byte-Reversing Functions                    *
    211  *******************************************************************/
    212 
    213 /*
    214  * __lhbrx - Load Half Word Byte-Reverse Indexed
    215  *
    216  *   int __lhbrx(void *, int);
    217  */
    218 #define __lhbrx(base, index)   \
    219   ({ unsigned short __ppc_i_lhbrxResult;       \
    220      __asm__ volatile ("lhbrx %0, %1, %2" : "=r" (__ppc_i_lhbrxResult) : "b%" (index), "r" (base) : "memory"); \
    221      /*return*/ __ppc_i_lhbrxResult; })
    222 
    223 /*
    224  * __lwbrx - Load Word Byte-Reverse Indexed
    225  *
    226  *   int __lwbrx(void *, int);
    227  */
    228 #define __lwbrx(base, index)    \
    229   ({ unsigned int __ppc_i_lwbrxResult; \
    230      __asm__ volatile ("lwbrx %0, %1, %2" : "=r" (__ppc_i_lwbrxResult) : "b%" (index), "r" (base) : "memory"); \
    231      /*return*/ __ppc_i_lwbrxResult; })
    232 
    233 /*
    234  * __sthbrx - Store Half Word Byte-Reverse Indexed
    235  *
    236  *   int __sthbrx(unsigned short, void *, int);
    237  */
    238 #define __sthbrx(value, base, index)    \
    239   __asm__ ("sthbrx %0, %1, %2" : : "r" (value), "b%" (index), "r" (base) : "memory")
    240 
    241 /*
    242  * __stwbrx - Store Word Byte-Reverse Indexed
    243  *
    244  *   int __sthbrx(unsigned int, void *, int);
    245  */
    246 #define __stwbrx(value, base, index)    \
    247   __asm__ ("stwbrx %0, %1, %2" : : "r" (value), "b%" (index), "r" (base) : "memory")
    248 
    249 
    250 /*******************************************************************
    251  *       Manipulating the Contents of a Variable or Register       *
    252  *******************************************************************/
    253 
    254 /*
    255  * __cntlzw - Count Leading Zeros Word
    256  * __cntlzd - Count Leading Zeros Double Word
    257  */
    258 
    259 #define __cntlzw(a)     __builtin_clz(a)
    260 #define __cntlzd(a)     __builtin_clzll(a)
    261 
    262 /*
    263  * __rlwimi - Rotate Left Word Immediate then Mask Insert
    264  *
    265  *   int __rlwimi(int, long, int, int, int);
    266  *
    267  * We don't mention "%1" below: operand[1] needs to be skipped as
    268  * it's just a placeholder to let the compiler know that rA is read
    269  * from as well as written to.
    270  */
    271 #define __rlwimi(rA, rS, cnt, mb, me)                               \
    272   ({ __asm__ ("rlwimi %0,%2,%3,%4,%5" : "=r" (rA)                   \
    273               : "0" (rA), "r" (rS), "n" (cnt), "n" (mb), "n" (me)); \
    274      /*return*/ rA;})
    275 
    276 /*
    277  * __rlwinm - Rotate Left Word Immediate then AND with Mask
    278  *
    279  *   int __rlwinm(long, int, int, int);
    280  */
    281 #define __rlwinm(rS, cnt, mb, me)						\
    282   ({ unsigned int __ppc_i_val;							\
    283      __asm__ ("rlwinm %0,%1,%2,%3,%4" : "=r" (__ppc_i_val)			\
    284               : "r" (rS), "n" (cnt), "n" (mb), "n" (me));			\
    285      /*return*/ __ppc_i_val;})
    286 
    287 /*
    288  * __rlwnm - Rotate Left Word then AND with Mask
    289  *
    290  *   int __rlwnm(long, int, int, int);
    291  */
    292 #define __rlwnm(value, leftRotateBits, maskStart, maskEnd)			\
    293   ({ unsigned int __ppc_i_result;						\
    294      __asm__ ("rlwnm %0, %1, %2, %3, %4" : "=r" (__ppc_i_result) :		\
    295               "r" (value), "r" (leftRotateBits), "n" (maskStart), "n" (maskEnd)); \
    296      /*return */ __ppc_i_result; })
    297 
    298 
    299 /*******************************************************************
    300  *                     Data Cache Manipulation                     *
    301  *******************************************************************/
    302 
    303 /*
    304  * --- Data Cache Block instructions ---
    305  *
    306  * Please see Motorola's "The Programming Environments for 32-Bit
    307  * Microprocessors" for a description of what these do.
    308  *
    309  *   Parameter descriptions:
    310  *
    311  *     base             starting address for figuring out where the
    312  *                      cacheline is
    313  *
    314  *     index            byte count to be added to the base address for
    315  *                      purposes of calculating the effective address
    316  *                      of the cacheline to be operated on.
    317  *
    318  *   Effective Address of cacheline to be manipulated =
    319  *     (char*) base + index
    320  *
    321  *   WARNING: The size and alignment of cachelines are subject to
    322  *     change on future processors!  Cachelines are 32 bytes in
    323  *     size and are aligned to 32 bytes on PowerPC 601, 603, 604,
    324  *     750, 7400, 7410, 7450, and 7455.
    325  *
    326  */
    327 
    328 /*
    329  * __dcba - Data Cache Block Allocate
    330  *
    331  *   void __dcba(void *, int)
    332  *
    333  * WARNING: dcba is a valid instruction only on PowerPC 7400, 7410,
    334  *          7450 and 7455.
    335  */
    336 #define __dcba(base, index)     \
    337   __asm__ ("dcba %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory")
    338 
    339 /*
    340  * __dcbf - Data Cache Block Flush
    341  *
    342  *   void __dcbf(void *, int);
    343  */
    344 #define __dcbf(base, index)     \
    345   __asm__ ("dcbf %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory")
    346 
    347 /*
    348  * __dcbst - Data Cache Block Store
    349  *
    350  *   void __dcbst(void *, int);
    351  */
    352 #define __dcbst(base, index)    \
    353   __asm__ ("dcbst %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory")
    354 
    355 /*
    356  * __dcbt - Data Cache Block Touch
    357  *
    358  *   void __dcbt(void *, int);
    359  */
    360 #define __dcbt(base, index)     \
    361   __asm__ ("dcbt %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory")
    362 
    363 /*
    364  * __dcbtst - Data Cache Block Touch for Store
    365  *
    366  *   void __dcbtst(void *, int);
    367  */
    368 #define __dcbtst(base, index)   \
    369   __asm__ ("dcbtst %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory")
    370 
    371 /*
    372  * __dcbzl - Data Cache Block Set to Zero
    373  *
    374  *   void __dcbzl(void *, int);
    375  */
    376 #define __dcbzl(base, index)     \
    377   __asm__ ("dcbzl %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory")
    378 
    379 /*
    380  * __dcbz - Data Cache Block Set to Zero (32-bytes only)
    381  *
    382  * WARNING: this is for legacy purposes only
    383  *
    384  *   void __dcbz(void *, int);
    385  */
    386 #define __dcbz(base, index)     \
    387   __asm__ ("dcbz %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory")
    388 
    389 
    390 /*******************************************************************
    391  *              Setting the Floating-Point Environment             *
    392  *******************************************************************/
    393 
    394 /*
    395  * __setflm - Set Floating-point Mode
    396  *
    397  * Sets the FPSCR (floating-point status and control register),
    398  * returning the original value.
    399  *
    400  *   ??? CW: float __setflm(float);
    401  */
    402 static inline double __setflm (double newflm) __attribute__((always_inline));
    403 static inline double
    404 __setflm(double newflm)
    405 {
    406   double original;
    407 
    408   __asm__ ("mffs %0"
    409            /* outputs:  */ : "=f" (original));
    410   __asm__ ("mtfsf 255,%0"
    411            /* outputs:  */ : /* none */
    412            /* inputs:   */ : "f" (newflm));
    413   return original;
    414 }
    415 
    416 
    417 /*******************************************************************
    418  *                          Math Functions                         *
    419  *******************************************************************/
    420 
    421 /*
    422  * __fabs - Floating-Point Absolute Value
    423  */
    424 static inline double __fabs (double value) __attribute__((always_inline));
    425 static inline double
    426 __fabs (double value)
    427 {
    428   double result;
    429   __asm__ ("fabs %0, %1"
    430            /* outputs:  */ : "=f" (result)
    431            /* inputs:   */ : "f" (value));
    432   return result;
    433 }
    434 
    435 /*
    436  * __fnabs - Floating Negative Absolute Value
    437  */
    438 static inline double __fnabs (double b) __attribute__((always_inline));
    439 static inline double
    440 __fnabs (double b)
    441 {
    442   double result;
    443   __asm__ ("fnabs %0, %1"
    444            /* outputs:  */ : "=f" (result)
    445            /* inputs:   */ : "f" (b));
    446   return result;
    447 }
    448 
    449 /*
    450  * fctiw - Floating Convert to Integer Word
    451  *
    452  * Convert the input value to a signed long and place in the low 32
    453  * bits of the FP register.  Clip to LONG_MIN or LONG_MAX if the FP
    454  * value exceeds the range representable by a long.  Use the rounding
    455  * mode indicated in the FPSCR.
    456  */
    457 static inline double __fctiw (double b) __attribute__((always_inline));
    458 static inline double
    459 __fctiw (double b)
    460 {
    461   double result;
    462   __asm__ ("fctiw %0, %1"
    463            /* outputs:  */ : "=f" (result)
    464            /* inputs:   */ : "f" (b));
    465   return result;
    466 }
    467 
    468 /*
    469  * fctiwz - Floating Convert to Integer Word with Round toward Zero
    470  *
    471  * Convert the input value to a signed long and place in the low 32
    472  * bits of the FP register.  Clip to LONG_MIN or LONG_MAX if the FP
    473  * value exceeds the range representable by a long.
    474  */
    475 static inline double __fctiwz (double b) __attribute__((always_inline));
    476 static inline double
    477 __fctiwz (double b)
    478 {
    479   double result;
    480   __asm__ ("fctiwz %0, %1"
    481            /* outputs:  */ : "=f" (result)
    482            /* inputs:   */ : "f" (b));
    483   return result;
    484 }
    485 
    486 /*
    487  * fctidz - Floating Convert to Integer Double Word with Round toward Zero
    488  *
    489  * Convert the input value to a signed 64-bit int and place in the FP
    490  * destination register.  Clip to LLONG_MIN (-2**63) or LLONG_MAX (2**63-1)
    491  * if the FP value exceeds the range representable by a int64_t.
    492  *
    493  * WARNING: fctidz is a valid instruction only on 64-bit PowerPC
    494  */
    495 static inline double __fctidz (double b) __attribute__((always_inline));
    496 static inline double
    497 __fctidz (double b)
    498 {
    499   double result;
    500   __asm__ ("fctidz %0, %1"
    501            /* outputs:  */ : "=f" (result)
    502            /* inputs:   */ : "f" (b));
    503   return result;
    504 }
    505 
    506 /*
    507  * fctid - Floating Convert to Integer Double Word
    508  *
    509  * Convert the input value to a signed 64-bit int and place in the FP
    510  * destination register.  Clip to LLONG_MIN (-2**63) or LLONG_MAX (2**63-1)
    511  * if the FP value exceeds the range representable by a int64_t. Use the
    512  * rounding mode indicated in the FPSCR.
    513  *
    514  * WARNING: fctid is a valid instruction only on 64-bit PowerPC
    515  */
    516 static inline double __fctid (double b) __attribute__((always_inline));
    517 static inline double
    518 __fctid (double b)
    519 {
    520   double result;
    521   __asm__ ("fctid %0, %1"
    522            /* outputs:  */ : "=f" (result)
    523            /* inputs:   */ : "f" (b));
    524   return result;
    525 }
    526 
    527 /*
    528  * fcfid - Floating Convert From Integer Double Word
    529  *
    530  * Convert the 64-bit signed integer input value to a 64-bit FP value.
    531  * Use the rounding mode indicated in the FPSCR if the integer is out of
    532  * double precision range.
    533  *
    534  * WARNING: fcfid is a valid instruction only on 64-bit PowerPC
    535  */
    536 static inline double __fcfid (double b) __attribute__((always_inline));
    537 static inline double
    538 __fcfid (double b)
    539 {
    540   double result;
    541   __asm__ ("fcfid %0, %1"
    542            /* outputs:  */ : "=f" (result)
    543            /* inputs:   */ : "f" (b));
    544   return result;
    545 }
    546 
    547 /*
    548  * fmadd - Floating Multiply-Add (Double-Precision)
    549  *
    550  *   (a * c + b) double precision
    551  */
    552 static inline double __fmadd (double a, double c, double b) __attribute__((always_inline));
    553 static inline double
    554 __fmadd (double  a, double c, double b)
    555 {
    556   double result;
    557   __asm__ ("fmadd %0, %1, %2, %3"
    558            /* outputs:  */ : "=f" (result)
    559            /* inputs:   */ : "f" (a), "f" (c), "f" (b));
    560   return result;
    561 }
    562 
    563 /*
    564  * fmadds - Floating Multiply-Add Single
    565  *
    566  *   (a * c + b) single precision
    567  *
    568  * Double precision arguments are used to prevent the compiler from
    569  * issuing frsp instructions upstream.
    570  */
    571 static inline float __fmadds (double a, double c, double b) __attribute__((always_inline));
    572 static inline float
    573 __fmadds (double  a, double c, double b)
    574 {
    575   float result;
    576   __asm__ ("fmadds %0, %1, %2, %3"
    577            /* outputs:  */ : "=f" (result)
    578            /* inputs:   */ : "f" (a), "f" (c), "f" (b));
    579   return result;
    580 }
    581 
    582 /*
    583  * fmsub - Floating Multiply-Subract (Double-Precision)
    584  *
    585  *   (a * c - b) double precision
    586  */
    587 static inline double __fmsub (double a, double c, double b) __attribute__((always_inline));
    588 static inline double
    589 __fmsub (double  a, double c, double b)
    590 {
    591   double result;
    592   __asm__ ("fmsub %0, %1, %2, %3"
    593            /* outputs:  */ : "=f" (result)
    594            /* inputs:   */ : "f" (a), "f" (c), "f" (b));
    595   return result;
    596 }
    597 
    598 /*
    599  * fmsubs - Floating Multiply-Subract Single
    600  *
    601  *   (a * c - b) single precision
    602  *
    603  * Double precision arguments are used to prevent the compiler from
    604  * issuing frsp instructions upstream.
    605  */
    606 static inline float __fmsubs (double a, double c, double b) __attribute__((always_inline));
    607 static inline float
    608 __fmsubs (double  a, double c, double b)
    609 {
    610   float result;
    611   __asm__ ("fmsubs %0, %1, %2, %3"
    612            /* outputs:  */ : "=f" (result)
    613            /* inputs:   */ : "f" (a), "f" (c), "f" (b));
    614   return result;
    615 }
    616 
    617 /*
    618  * fmul - Floating Multiply (Double-Precision)
    619  *
    620  *   (a * c) double precision
    621  */
    622 static inline double __fmul (double a, double c) __attribute__((always_inline));
    623 static inline double
    624 __fmul (double  a, double c)
    625 {
    626   double result;
    627   __asm__ ("fmul %0, %1, %2"
    628            /* outputs:  */ : "=f" (result)
    629            /* inputs:   */ : "f" (a), "f" (c));
    630   return result;
    631 }
    632 
    633 /*
    634  * fmuls - Floating Multiply Single
    635  *
    636  *   (a * c) single precision
    637  *
    638  * Double precision arguments are used to prevent the compiler from
    639  * issuing frsp instructions upstream.
    640  */
    641 static inline float __fmuls (double a, double c) __attribute__((always_inline));
    642 static inline float
    643 __fmuls (double  a, double c)
    644 {
    645   float result;
    646   __asm__ ("fmuls %0, %1, %2"
    647            /* outputs:  */ : "=f" (result)
    648            /* inputs:   */ : "f" (a), "f" (c));
    649   return result;
    650 }
    651 
    652 /*
    653  * __fnmadd - Floating Negative Multiply-Add (Double-Precision)
    654  *
    655  *   -(a * c + b) double precision
    656  */
    657 static inline double __fnmadd (double a, double c, double b) __attribute__((always_inline));
    658 static inline double
    659 __fnmadd (double  a, double c, double b)
    660 {
    661   double result;
    662   __asm__ ("fnmadd %0, %1, %2, %3"
    663            /* outputs:  */ : "=f" (result)
    664            /* inputs:   */ : "f" (a), "f" (c), "f" (b));
    665   return result;
    666 }
    667 
    668 /*
    669  * __fnmadds - Floating Negative Multiply-Add Single
    670  *
    671  *   -(a * c + b) single precision
    672  *
    673  * Double precision arguments are used to prevent the compiler from
    674  * issuing frsp instructions upstream.
    675  */
    676 static inline float __fnmadds (double a, double c, double b) __attribute__((always_inline));
    677 static inline float
    678 __fnmadds (double  a, double c, double b)
    679 {
    680   float result;
    681   __asm__ ("fnmadds %0, %1, %2, %3"
    682            /* outputs:  */ : "=f" (result)
    683            /* inputs:   */ : "f" (a), "f" (c), "f" (b));
    684   return result;
    685 }
    686 
    687 /*
    688  * __fnmsub - Floating Negative Multiply-Subtract (Double-Precision)
    689  *
    690  *   -(a * c - B) double precision
    691  */
    692 static inline double __fnmsub (double a, double c, double b) __attribute__((always_inline));
    693 static inline double
    694 __fnmsub (double  a, double c, double b)
    695 {
    696   double result;
    697   __asm__ ("fnmsub %0, %1, %2, %3"
    698            /* outputs:  */ : "=f" (result)
    699            /* inputs:   */ : "f" (a), "f" (c), "f" (b));
    700   return result;
    701 }
    702 
    703 /*
    704  * __fnmsubs - Floating Negative Multiply-Subtract Single
    705  *
    706  *   -(a * c - b) single precision
    707  *
    708  * Double precision arguments are used to prevent the compiler from
    709  * issuing frsp instructions upstream.
    710  */
    711 static inline float __fnmsubs (double a, double c, double b) __attribute__((always_inline));
    712 static inline float
    713 __fnmsubs (double  a, double c, double b)
    714 {
    715   float result;
    716   __asm__ ("fnmsubs %0, %1, %2, %3"
    717            /* outputs:  */ : "=f" (result)
    718            /* inputs:   */ : "f" (a), "f" (c), "f" (b));
    719   return result;
    720 }
    721 
    722 /*
    723  * __fres - Floating Reciprocal Estimate
    724  *
    725  * Produces a double precision result with 5 bits of accuracy.
    726  * Note: not valid on the PowerPC 601.
    727  *
    728  * ??? CW: float __fres(float)
    729  */
    730 static inline float __fres (float val) __attribute__((always_inline));
    731 static inline float
    732 __fres (float val)
    733 {
    734   float estimate;
    735   __asm__ ("fres %0,%1"
    736            /* outputs:  */ : "=f" (estimate)
    737            /* inputs:   */ : "f" (val));
    738   return estimate;
    739 }
    740 
    741 /*
    742  * __frsp - Floating Round to Single-Precision
    743  */
    744 static inline float __frsp (double d) __attribute__((always_inline));
    745 static inline float
    746 __frsp (double d)
    747 {
    748   float result;
    749   __asm__ ("frsp %0, %1"
    750            /* outputs:  */ : "=f" (result)
    751            /* inputs:   */ : "f" (d));
    752   return result;
    753 }
    754 
    755 /*
    756  * __frsqrte - Floating Reciprocal Square Root Estimate
    757  *
    758  * Note: not valid on the PowerPC 601.
    759  */
    760 static inline double __frsqrte (double val) __attribute__((always_inline));
    761 static inline double
    762 __frsqrte (double val)
    763 {
    764   double estimate;
    765 
    766   __asm__ ("frsqrte %0,%1"
    767            /* outputs:  */ : "=f" (estimate)
    768            /* inputs:   */ : "f" (val));
    769   return estimate;
    770 }
    771 
    772 /*
    773  * __frsqrtes - Floating Reciprocal Square Root Estimate Single
    774  */
    775 static inline float __frsqrtes (double f) __attribute__((always_inline));
    776 static inline float
    777 __frsqrtes (double f)
    778 {
    779   float result;
    780   __asm__ ("frsqrte %0, %1"
    781            /* outputs:  */ : "=f" (result)
    782            /* inputs:   */ : "f" (f));
    783   return result;
    784 }
    785 
    786 /*
    787  * __fsel - Floating Select
    788  *
    789  *   if (test >= 0) return a; else return b;
    790  *
    791  * Note: not valid on the PowerPC 601.
    792  */
    793 static inline double __fsel (double test, double a, double b) __attribute__((always_inline));
    794 static inline double
    795 __fsel (double test, double a, double b)
    796 {
    797   double result;
    798   __asm__ ("fsel %0,%1,%2,%3"
    799            /* outputs:  */ : "=f" (result)
    800            /* inputs:   */ : "f" (test), "f" (a), "f" (b));
    801   return result;
    802 }
    803 
    804 /*
    805  * __fsels - Floating Select (Single-Precision variant)
    806  *
    807  * An artificial single precision variant of fsel. This produces the
    808  * same results as fsel, but is useful because the result is cast as
    809  * a float, discouraging the compiler from issuing a frsp instruction
    810  * afterward.
    811  */
    812 static inline float __fsels (double test, double a, double b) __attribute__((always_inline));
    813 static inline float
    814 __fsels (double test, double a, double b)
    815 {
    816   float result;
    817   __asm__ ("fsel %0,%1,%2,%3"
    818            /* outputs:  */ : "=f" (result)
    819            /* inputs:   */ : "f" (test), "f" (a), "f" (b));
    820   return result;
    821 }
    822 
    823 /*
    824  * __fsqrt - Floating-Point Square Root (Double-Precision)
    825  *
    826  * WARNING: Illegal instruction for PowerPC 603, 604, 750, 7400, 7410,
    827  * 7450, and 7455
    828  */
    829 static inline double __fsqrt (double b) __attribute__((always_inline));
    830 static inline double
    831 __fsqrt(double d)
    832 {
    833   double result;
    834   __asm__ ("fsqrt %0, %1"
    835            /* outputs:  */ : "=f" (result)
    836            /* inputs:   */ : "f" (d));
    837   return result;
    838 }
    839 
    840 /*
    841  * __fsqrts - Floating-Point Square Root Single-Precision
    842  *
    843  * WARNING: Illegal instruction for PowerPC 603, 604, 750, 7400, 7410,
    844  * 7450, and 7455
    845  */
    846 static inline float __fsqrts (float f) __attribute__((always_inline));
    847 static inline float
    848 __fsqrts (float f)
    849 {
    850   float result;
    851   __asm__ ("fsqrts %0, %1"
    852            /* outputs:  */ : "=f" (result)
    853            /* inputs:   */ : "f" (f));
    854   return result;
    855 }
    856 
    857 /*
    858  * __mulhw - Multiply High Word
    859  */
    860 static inline int __mulhw (int a, int b) __attribute__((always_inline));
    861 static inline int
    862 __mulhw (int a, int b)
    863 {
    864   int result;
    865   __asm__ ("mulhw %0, %1, %2"
    866            /* outputs:  */ : "=r" (result)
    867            /* inputs:   */ : "r" (a), "r"(b));
    868   return result;
    869 }
    870 
    871 /*
    872  * __mulhwu - Multiply High Word Unsigned
    873  */
    874 static inline unsigned int __mulhwu (unsigned int a, unsigned int b) __attribute__((always_inline));
    875 static inline unsigned int
    876 __mulhwu (unsigned int a, unsigned int b)
    877 {
    878   unsigned int result;
    879   __asm__ ("mulhwu %0, %1, %2"
    880            /* outputs:  */ : "=r" (result)
    881            /* inputs:   */ : "r" (a), "r"(b));
    882   return result;
    883 }
    884 
    885 /*
    886  * __stfiwx - Store Floating-Point as Integer Word Indexed
    887  *
    888  *   void x(int, void *, int);
    889  */
    890 #define __stfiwx(value, base, index)    \
    891   __asm__ ("stfiwx %0, %1, %2" : /*no result*/  \
    892            : "f" (value), "b%" (index), "r" (base) : "memory")
    893 
    894 
    895 /*******************************************************************
    896  *                     Miscellaneous Functions                     *
    897  *******************************************************************/
    898 
    899 /*
    900  * __nop - no operation (PowerPC preferred form)
    901  *
    902  *   void __nop();
    903  */
    904 #define __nop()    \
    905   __asm__ ("ori 0,0,0")
    906 
    907 /*
    908  * __icbi - Instruction Cache Block Invalidate
    909  *
    910  *   void __icbi(void *, int);
    911  */
    912 #define __icbi(base, index)    \
    913   __asm__ ("icbi %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory")
    914 
    915 /*
    916  * __mffs - Move from FPSCR
    917  */
    918 static inline double __mffs (void) __attribute__((always_inline));
    919 static inline double
    920 __mffs (void)
    921 {
    922   double result;
    923   __asm__ volatile ("mffs %0"
    924                     /* outputs:  */ : "=f" (result));
    925   return result;
    926 }
    927 
    928 /*
    929  * __mfspr - Move from Special Purpose Register
    930  *
    931  *   int __mfspr(int);
    932  */
    933 #define __mfspr(spr)							\
    934   __extension__ ({ long __ppc_i_mfsprResult;				\
    935      __asm__ volatile ("mfspr %0, %1" : "=r" (__ppc_i_mfsprResult) : "n" (spr)); \
    936      /*return*/ __ppc_i_mfsprResult; })
    937 
    938 /*
    939  * __mtfsf - Move to SPSCR Fields
    940  *
    941  *   void __mtfsf(int, int);
    942  */
    943 #define __mtfsf(mask, newValue) \
    944   __asm__ volatile ("mtfsf %0, %1" : : "n" (mask), "f" (newValue))
    945 
    946 /*
    947  * __mtspr - Move to Special Purpose Register
    948  *
    949  *   __mtspr x(int, int);
    950  */
    951 #define __mtspr(spr, value)     \
    952   __asm__ volatile ("mtspr %0, %1" : : "n" (spr), "r" (value))
    953 
    954 /*
    955  * __OSReadSwapSInt16
    956  *
    957  * lhbrx for signed shorts.  This will do the required sign
    958  * extension after load and byteswap.
    959  */
    960 static inline signed short __OSReadSwapSInt16 (signed short *base, int index) __attribute__((always_inline));
    961 static inline signed short
    962 __OSReadSwapSInt16 (signed short *base, int index)
    963 {
    964   signed long result;
    965   __asm__ volatile ("lhbrx %0, %1, %2"
    966 		    /* outputs:  */ : "=r" (result)
    967 		    /* inputs:   */ : "b%" (index), "r" (base)
    968 		    /* clobbers: */ : "memory");
    969   return result;
    970 }
    971 
    972 /*
    973  * __OSReadSwapUInt16
    974  */
    975 static inline unsigned short __OSReadSwapUInt16 (volatile void *base, int inex) __attribute__((always_inline));
    976 static inline unsigned short
    977 __OSReadSwapUInt16 (volatile void *base, int index)
    978 {
    979   unsigned long result;
    980   __asm__ volatile ("lhbrx %0, %1, %2"
    981 		    /* outputs:  */ : "=r" (result)
    982 		    /* inputs:   */ : "b" (index), "r" (base)
    983 		    /* clobbers: */ : "memory");
    984   return result;
    985 }
    986 
    987 /*
    988  * __astrcmp - assembly strcmp
    989  */
    990 static inline int astrcmp (const char *in_s1, const char *in_s2) __attribute__((always_inline));
    991 static inline int
    992 astrcmp (const char *in_s1, const char *in_s2)
    993 {
    994   int result, temp;
    995   register const char *s1 = in_s1 - 1;
    996   register const char *s2 = in_s2 - 1;
    997 
    998   __asm__ ("1:lbzu %0,1(%1)\n"
    999            "\tcmpwi cr1,%0,0\n"
   1000            "\tlbzu %3,1(%2)\n"
   1001            "\tsubf. %0,%3,%0\n"
   1002            "\tbeq- cr1,2f\n"
   1003            "\tbeq+ 1b\n2:"
   1004             /* outputs: */  : "=&r" (result), "+b" (s1), "+b" (s2), "=r" (temp)
   1005             /* inputs: */   :
   1006             /* clobbers: */ : "cr0", "cr1", "memory");
   1007 
   1008   return result;
   1009 
   1010   /*
   1011    * "=&r" (result)     means: 'result' is written on (the '='), it's any GP
   1012    *                    register (the 'r'), and it must not be the same as
   1013    *                    any of the input registers (the '&').
   1014    * "+b" (s1)          means: 's1' is read from and written to (the '+'),
   1015    *                    and it must be a base GP register (i.e., not R0.)
   1016    * "=r" (temp)        means: 'temp' is any GP reg and it's only written to.
   1017    *
   1018    * "memory"           in the 'clobbers' section means that gcc will make
   1019    *                    sure that anything that should be in memory IS there
   1020    *                    before calling this routine.
   1021    */
   1022 }
   1023 
   1024 #endif  /* (defined(__ppc__) || defined(__ppc64__)) && ! defined(__MWERKS__) */
   1025 
   1026 #endif /* _PPC_INTRINSICS_H_ */
   1027