1 /* APPLE LOCAL file PPC_INTRINSICS */ 2 3 /* Definitions for PowerPC intrinsic instructions 4 Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free 10 Software Foundation; either version 2, or (at your option) any later 11 version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING. If not, write to the Free 20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA 21 02111-1307, USA. */ 22 23 /* As a special exception, if you include this header file into source 24 files compiled by GCC, this header file does not by itself cause 25 the resulting executable to be covered by the GNU General Public 26 License. This exception does not however invalidate any other 27 reasons why the executable file might be covered by the GNU General 28 Public License. */ 29 30 /* 31 * The following PowerPC intrinsics are provided by this header: 32 * 33 * Low-Level Processor Synchronization 34 * __eieio - Enforce In-Order Execution of I/O 35 * __isync - Instruction Synchronize 36 * __sync - Synchronize 37 * __lwsync - Lightweight Synchronize 38 * 39 * Manipulating the Contents of a Variable or Register 40 * __cntlzw - Count Leading Zeros Word 41 * __cntlzd - Count Leading Zeros Double Word 42 * __rlwimi - Rotate Left Word Immediate then Mask Insert 43 * __rlwinm - Rotate Left Word Immediate then AND with Mask 44 * __rlwnm - Rotate Left Word then AND with Mask 45 * 46 * Byte-Reversing Functions 47 * __lhbrx - Load Half Word Byte-Reverse Indexed 48 * __lwbrx - Load Word Byte-Reverse Indexed 49 * __sthbrx - Store Half Word Byte-Reverse Indexed 50 * __stwbrx - Store Word Byte-Reverse Indexed 51 * 52 * Data Cache Manipulation 53 * __dcba - Data Cache Block Allocate 54 * __dcbf - Data Cache Block Flush 55 * __dcbst - Data Cache Block Store 56 * __dcbt - Data Cache Block Touch 57 * __dcbtst - Data Cache Block Touch for Store 58 * __dcbzl - Data Cache Block Set to Zero 59 * __dcbz - Data Cache Block Set to Zero (32-bytes only) 60 * 61 * Setting the Floating-Point Environment 62 * __setflm - Set Floating-point Mode 63 * 64 * Math Functions 65 * __fabs - Floating-Point Absolute Value 66 * __fnabs - Floating Negative Absolute Value 67 * __fctiw - Floating Convert to Integer Word 68 * __fctiwz - Floating Convert to Integer Word with Round toward Zero 69 * __fctidz - Floating Convert to Integer Doubleword with Round toward Zero 70 * __fctid - Floating Convert to Integer Doubleword 71 * __fcfid - Floating Convert From Integer Doubleword 72 * __fmadd - Floating Multiply-Add (Double-Precision) 73 * __fmadds - Floating Multiply-Add Single 74 * __fmsub - Floating Multiply-Subract (Double-Precision) 75 * __fmsubs - Floating Multiply-Subract Single 76 * __fmul - Floating Multiply (Double-Precision) 77 * __fmuls - Floating Multiply Single 78 * __fnmadd - Floating Negative Multiply-Add (Double-Precision) 79 * __fnmadds - Floating Negative Multiply-Add Single 80 * __fnmsub - Floating Negative Multiply-Subtract (Double-Precision) 81 * __fnmsubs - Floating Negative Multiply-Subtract Single 82 * __fres - Floating Reciprocal Estimate 83 * __frsp - Floating Round to Single-Precision 84 * __frsqrte - Floating Reciprocal Square Root Estimate 85 * __frsqrtes - Floating Reciprocal Square Root Estimate Single 86 * __fsel - Floating Select 87 * __fsels - Floating Select (Single-Precision variant) 88 * __fsqrt - Floating-Point Square Root (Double-Precision) 89 * __fsqrts - Floating-Point Square Root Single-Precision 90 * __mulhw - Multiply High Word 91 * __mulhwu - Multiply High Word Unsigned 92 * __stfiwx - Store Floating-Point as Integer Word Indexed 93 * 94 * Miscellaneous Functions 95 * __nop - PPC preferred form of no operation 96 * __astrcmp - assembly strcmp 97 * __icbi - Instruction Cache Block Invalidate 98 * __mffs - Move from FPSCR 99 * __mfspr - Move from Special Purpose Register 100 * __mtfsf - Move to SPSCR Fields 101 * __mtspr - Move to Special Purpose Register 102 * __OSReadSwapSInt16 - lhbrx for signed shorts 103 * __OSReadSwapUInt16 - lhbrx for unsigned shorts 104 * 105 * TO DO: 106 * - Desired: 107 * mullw 108 * - Available in CodeWarrior, not yet implemented here: 109 * abs, labs, fabsf, fnabsf 110 * 111 * NOTES: 112 * - Some of the intrinsics need to be macros because certain 113 * parameters MUST be integer constants and not values in registers. 114 * - The declarations use __asm__ instead of asm and __inline__ instead 115 * of inline to prevent errors when -ansi is specified. 116 * - Some of the intrinsic definitions use the "volatile" specifier on 117 * the "asm" statements in order to work around what appears to be 118 * a bug in the compiler/optimizer. In general we have avoided the 119 * use of "volatile" because it suppresses optimization on the 120 * generated instructions. The instructions to which "volatile" 121 * has been added where it appears that it should not be needed are 122 * lhbrx and lwbrx. 123 * 124 * Contributors: Fred Forsman (editor), Turly O'Connor, Ian Ollmann, Sanjay Patel 125 * Last modified: October 6, 2004 126 */ 127 128 #ifndef _PPC_INTRINSICS_H_ 129 #define _PPC_INTRINSICS_H_ 130 131 #if (defined(__ppc__) || defined(__ppc64__)) && ! defined(__MWERKS__) 132 133 /******************************************************************* 134 * Special Purpose Registers (SPRs) * 135 *******************************************************************/ 136 137 #define __SPR_MQR 0 /* PPC 601 only */ 138 #define __SPR_XER 1 139 #define __SPR_RTCU 4 /* Real time clock upper. PPC 601 only.*/ 140 #define __SPR_RTCL 5 /* Real time clock lower. PPC 601 only.*/ 141 #define __SPR_LR 8 142 #define __SPR_CTR 9 143 #define __SPR_VRSAVE 256 /* AltiVec */ 144 #define __SPR_TBL 268 /* Time-base Lower. Not on PPC 601 */ 145 #define __SPR_TBU 269 /* Time-base Upper. Not on PPC 601 */ 146 #define __SPR_UMMCR2 928 /* PPC 74xx */ 147 #define __SPR_UPMC5 929 /* PPC 745x */ 148 #define __SPR_UPMC6 930 /* PPC 745x */ 149 #define __SPR_UBAMR 935 /* PPC 7400 and 7410 */ 150 #define __SPR_UMMCR0 936 /* PPC 74xx and 750 */ 151 #define __SPR_UPMC1 937 /* PPC 74xx and 750 */ 152 #define __SPR_UPMC2 938 /* PPC 74xx and 750 */ 153 #define __SPR_USIAR 939 /* PPC 74xx and 750 */ 154 #define __SPR_UMMCR1 940 /* PPC 74xx and 750 */ 155 #define __SPR_UPMC3 941 /* PPC 74xx and 750 */ 156 #define __SPR_UPMC4 942 /* PPC 74xx and 750 */ 157 #define __SPR_PIR 1023 /* supervisor level only! */ 158 159 /* 160 * Shorthand macros for some commonly used SPR's. 161 */ 162 #define __mfxer() __mfspr(__SPR_XER) 163 #define __mflr() __mfspr(__SPR_LR) 164 #define __mfctr() __mfspr(__SPR_CTR) 165 #define __mfvrsave() __mfspr(__SPR_VRSAVE) 166 #define __mftb() __mfspr(__SPR_TBL) 167 #define __mftbu() __mfspr(__SPR_TBU) 168 169 #define __mtlr(value) __mtspr(__SPR_LR, value) 170 #define __mtxer(value) __mtspr(__SPR_XER, value) 171 #define __mtctr(value) __mtspr(__SPR_CTR, value) 172 #define __mtvrsave(value) __mtspr(__SPR_VRSAVE, value) 173 174 175 /******************************************************************* 176 * Low-Level Processor Synchronization * 177 *******************************************************************/ 178 179 /* 180 * __eieio - Enforce In-Order Execution of I/O 181 * 182 * void __eieio (void); 183 */ 184 #define __eieio() __asm__ ("eieio" : : : "memory") 185 186 /* 187 * __isync - Instruction Synchronize 188 * 189 * void __isync (void); 190 */ 191 #define __isync() \ 192 __asm__ volatile ("isync") 193 194 /* 195 * __sync - Synchronize 196 * 197 * void __sync (void); 198 */ 199 #define __sync() __asm__ volatile ("sync") 200 201 /* 202 * __lwsync - Lightweight Synchronize, see PPC2.01, Book 2 203 * 204 * void __lwsync (void); 205 */ 206 #define __lwsync() __asm__ volatile ("sync 1") 207 208 209 /******************************************************************* 210 * Byte-Reversing Functions * 211 *******************************************************************/ 212 213 /* 214 * __lhbrx - Load Half Word Byte-Reverse Indexed 215 * 216 * int __lhbrx(void *, int); 217 */ 218 #define __lhbrx(base, index) \ 219 ({ unsigned short __ppc_i_lhbrxResult; \ 220 __asm__ volatile ("lhbrx %0, %1, %2" : "=r" (__ppc_i_lhbrxResult) : "b%" (index), "r" (base) : "memory"); \ 221 /*return*/ __ppc_i_lhbrxResult; }) 222 223 /* 224 * __lwbrx - Load Word Byte-Reverse Indexed 225 * 226 * int __lwbrx(void *, int); 227 */ 228 #define __lwbrx(base, index) \ 229 ({ unsigned int __ppc_i_lwbrxResult; \ 230 __asm__ volatile ("lwbrx %0, %1, %2" : "=r" (__ppc_i_lwbrxResult) : "b%" (index), "r" (base) : "memory"); \ 231 /*return*/ __ppc_i_lwbrxResult; }) 232 233 /* 234 * __sthbrx - Store Half Word Byte-Reverse Indexed 235 * 236 * int __sthbrx(unsigned short, void *, int); 237 */ 238 #define __sthbrx(value, base, index) \ 239 __asm__ ("sthbrx %0, %1, %2" : : "r" (value), "b%" (index), "r" (base) : "memory") 240 241 /* 242 * __stwbrx - Store Word Byte-Reverse Indexed 243 * 244 * int __sthbrx(unsigned int, void *, int); 245 */ 246 #define __stwbrx(value, base, index) \ 247 __asm__ ("stwbrx %0, %1, %2" : : "r" (value), "b%" (index), "r" (base) : "memory") 248 249 250 /******************************************************************* 251 * Manipulating the Contents of a Variable or Register * 252 *******************************************************************/ 253 254 /* 255 * __cntlzw - Count Leading Zeros Word 256 * __cntlzd - Count Leading Zeros Double Word 257 */ 258 259 #define __cntlzw(a) __builtin_clz(a) 260 #define __cntlzd(a) __builtin_clzll(a) 261 262 /* 263 * __rlwimi - Rotate Left Word Immediate then Mask Insert 264 * 265 * int __rlwimi(int, long, int, int, int); 266 * 267 * We don't mention "%1" below: operand[1] needs to be skipped as 268 * it's just a placeholder to let the compiler know that rA is read 269 * from as well as written to. 270 */ 271 #define __rlwimi(rA, rS, cnt, mb, me) \ 272 ({ __asm__ ("rlwimi %0,%2,%3,%4,%5" : "=r" (rA) \ 273 : "0" (rA), "r" (rS), "n" (cnt), "n" (mb), "n" (me)); \ 274 /*return*/ rA;}) 275 276 /* 277 * __rlwinm - Rotate Left Word Immediate then AND with Mask 278 * 279 * int __rlwinm(long, int, int, int); 280 */ 281 #define __rlwinm(rS, cnt, mb, me) \ 282 ({ unsigned int __ppc_i_val; \ 283 __asm__ ("rlwinm %0,%1,%2,%3,%4" : "=r" (__ppc_i_val) \ 284 : "r" (rS), "n" (cnt), "n" (mb), "n" (me)); \ 285 /*return*/ __ppc_i_val;}) 286 287 /* 288 * __rlwnm - Rotate Left Word then AND with Mask 289 * 290 * int __rlwnm(long, int, int, int); 291 */ 292 #define __rlwnm(value, leftRotateBits, maskStart, maskEnd) \ 293 ({ unsigned int __ppc_i_result; \ 294 __asm__ ("rlwnm %0, %1, %2, %3, %4" : "=r" (__ppc_i_result) : \ 295 "r" (value), "r" (leftRotateBits), "n" (maskStart), "n" (maskEnd)); \ 296 /*return */ __ppc_i_result; }) 297 298 299 /******************************************************************* 300 * Data Cache Manipulation * 301 *******************************************************************/ 302 303 /* 304 * --- Data Cache Block instructions --- 305 * 306 * Please see Motorola's "The Programming Environments for 32-Bit 307 * Microprocessors" for a description of what these do. 308 * 309 * Parameter descriptions: 310 * 311 * base starting address for figuring out where the 312 * cacheline is 313 * 314 * index byte count to be added to the base address for 315 * purposes of calculating the effective address 316 * of the cacheline to be operated on. 317 * 318 * Effective Address of cacheline to be manipulated = 319 * (char*) base + index 320 * 321 * WARNING: The size and alignment of cachelines are subject to 322 * change on future processors! Cachelines are 32 bytes in 323 * size and are aligned to 32 bytes on PowerPC 601, 603, 604, 324 * 750, 7400, 7410, 7450, and 7455. 325 * 326 */ 327 328 /* 329 * __dcba - Data Cache Block Allocate 330 * 331 * void __dcba(void *, int) 332 * 333 * WARNING: dcba is a valid instruction only on PowerPC 7400, 7410, 334 * 7450 and 7455. 335 */ 336 #define __dcba(base, index) \ 337 __asm__ ("dcba %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory") 338 339 /* 340 * __dcbf - Data Cache Block Flush 341 * 342 * void __dcbf(void *, int); 343 */ 344 #define __dcbf(base, index) \ 345 __asm__ ("dcbf %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory") 346 347 /* 348 * __dcbst - Data Cache Block Store 349 * 350 * void __dcbst(void *, int); 351 */ 352 #define __dcbst(base, index) \ 353 __asm__ ("dcbst %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory") 354 355 /* 356 * __dcbt - Data Cache Block Touch 357 * 358 * void __dcbt(void *, int); 359 */ 360 #define __dcbt(base, index) \ 361 __asm__ ("dcbt %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory") 362 363 /* 364 * __dcbtst - Data Cache Block Touch for Store 365 * 366 * void __dcbtst(void *, int); 367 */ 368 #define __dcbtst(base, index) \ 369 __asm__ ("dcbtst %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory") 370 371 /* 372 * __dcbzl - Data Cache Block Set to Zero 373 * 374 * void __dcbzl(void *, int); 375 */ 376 #define __dcbzl(base, index) \ 377 __asm__ ("dcbzl %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory") 378 379 /* 380 * __dcbz - Data Cache Block Set to Zero (32-bytes only) 381 * 382 * WARNING: this is for legacy purposes only 383 * 384 * void __dcbz(void *, int); 385 */ 386 #define __dcbz(base, index) \ 387 __asm__ ("dcbz %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory") 388 389 390 /******************************************************************* 391 * Setting the Floating-Point Environment * 392 *******************************************************************/ 393 394 /* 395 * __setflm - Set Floating-point Mode 396 * 397 * Sets the FPSCR (floating-point status and control register), 398 * returning the original value. 399 * 400 * ??? CW: float __setflm(float); 401 */ 402 static inline double __setflm (double newflm) __attribute__((always_inline)); 403 static inline double 404 __setflm(double newflm) 405 { 406 double original; 407 408 __asm__ ("mffs %0" 409 /* outputs: */ : "=f" (original)); 410 __asm__ ("mtfsf 255,%0" 411 /* outputs: */ : /* none */ 412 /* inputs: */ : "f" (newflm)); 413 return original; 414 } 415 416 417 /******************************************************************* 418 * Math Functions * 419 *******************************************************************/ 420 421 /* 422 * __fabs - Floating-Point Absolute Value 423 */ 424 static inline double __fabs (double value) __attribute__((always_inline)); 425 static inline double 426 __fabs (double value) 427 { 428 double result; 429 __asm__ ("fabs %0, %1" 430 /* outputs: */ : "=f" (result) 431 /* inputs: */ : "f" (value)); 432 return result; 433 } 434 435 /* 436 * __fnabs - Floating Negative Absolute Value 437 */ 438 static inline double __fnabs (double b) __attribute__((always_inline)); 439 static inline double 440 __fnabs (double b) 441 { 442 double result; 443 __asm__ ("fnabs %0, %1" 444 /* outputs: */ : "=f" (result) 445 /* inputs: */ : "f" (b)); 446 return result; 447 } 448 449 /* 450 * fctiw - Floating Convert to Integer Word 451 * 452 * Convert the input value to a signed long and place in the low 32 453 * bits of the FP register. Clip to LONG_MIN or LONG_MAX if the FP 454 * value exceeds the range representable by a long. Use the rounding 455 * mode indicated in the FPSCR. 456 */ 457 static inline double __fctiw (double b) __attribute__((always_inline)); 458 static inline double 459 __fctiw (double b) 460 { 461 double result; 462 __asm__ ("fctiw %0, %1" 463 /* outputs: */ : "=f" (result) 464 /* inputs: */ : "f" (b)); 465 return result; 466 } 467 468 /* 469 * fctiwz - Floating Convert to Integer Word with Round toward Zero 470 * 471 * Convert the input value to a signed long and place in the low 32 472 * bits of the FP register. Clip to LONG_MIN or LONG_MAX if the FP 473 * value exceeds the range representable by a long. 474 */ 475 static inline double __fctiwz (double b) __attribute__((always_inline)); 476 static inline double 477 __fctiwz (double b) 478 { 479 double result; 480 __asm__ ("fctiwz %0, %1" 481 /* outputs: */ : "=f" (result) 482 /* inputs: */ : "f" (b)); 483 return result; 484 } 485 486 /* 487 * fctidz - Floating Convert to Integer Double Word with Round toward Zero 488 * 489 * Convert the input value to a signed 64-bit int and place in the FP 490 * destination register. Clip to LLONG_MIN (-2**63) or LLONG_MAX (2**63-1) 491 * if the FP value exceeds the range representable by a int64_t. 492 * 493 * WARNING: fctidz is a valid instruction only on 64-bit PowerPC 494 */ 495 static inline double __fctidz (double b) __attribute__((always_inline)); 496 static inline double 497 __fctidz (double b) 498 { 499 double result; 500 __asm__ ("fctidz %0, %1" 501 /* outputs: */ : "=f" (result) 502 /* inputs: */ : "f" (b)); 503 return result; 504 } 505 506 /* 507 * fctid - Floating Convert to Integer Double Word 508 * 509 * Convert the input value to a signed 64-bit int and place in the FP 510 * destination register. Clip to LLONG_MIN (-2**63) or LLONG_MAX (2**63-1) 511 * if the FP value exceeds the range representable by a int64_t. Use the 512 * rounding mode indicated in the FPSCR. 513 * 514 * WARNING: fctid is a valid instruction only on 64-bit PowerPC 515 */ 516 static inline double __fctid (double b) __attribute__((always_inline)); 517 static inline double 518 __fctid (double b) 519 { 520 double result; 521 __asm__ ("fctid %0, %1" 522 /* outputs: */ : "=f" (result) 523 /* inputs: */ : "f" (b)); 524 return result; 525 } 526 527 /* 528 * fcfid - Floating Convert From Integer Double Word 529 * 530 * Convert the 64-bit signed integer input value to a 64-bit FP value. 531 * Use the rounding mode indicated in the FPSCR if the integer is out of 532 * double precision range. 533 * 534 * WARNING: fcfid is a valid instruction only on 64-bit PowerPC 535 */ 536 static inline double __fcfid (double b) __attribute__((always_inline)); 537 static inline double 538 __fcfid (double b) 539 { 540 double result; 541 __asm__ ("fcfid %0, %1" 542 /* outputs: */ : "=f" (result) 543 /* inputs: */ : "f" (b)); 544 return result; 545 } 546 547 /* 548 * fmadd - Floating Multiply-Add (Double-Precision) 549 * 550 * (a * c + b) double precision 551 */ 552 static inline double __fmadd (double a, double c, double b) __attribute__((always_inline)); 553 static inline double 554 __fmadd (double a, double c, double b) 555 { 556 double result; 557 __asm__ ("fmadd %0, %1, %2, %3" 558 /* outputs: */ : "=f" (result) 559 /* inputs: */ : "f" (a), "f" (c), "f" (b)); 560 return result; 561 } 562 563 /* 564 * fmadds - Floating Multiply-Add Single 565 * 566 * (a * c + b) single precision 567 * 568 * Double precision arguments are used to prevent the compiler from 569 * issuing frsp instructions upstream. 570 */ 571 static inline float __fmadds (double a, double c, double b) __attribute__((always_inline)); 572 static inline float 573 __fmadds (double a, double c, double b) 574 { 575 float result; 576 __asm__ ("fmadds %0, %1, %2, %3" 577 /* outputs: */ : "=f" (result) 578 /* inputs: */ : "f" (a), "f" (c), "f" (b)); 579 return result; 580 } 581 582 /* 583 * fmsub - Floating Multiply-Subract (Double-Precision) 584 * 585 * (a * c - b) double precision 586 */ 587 static inline double __fmsub (double a, double c, double b) __attribute__((always_inline)); 588 static inline double 589 __fmsub (double a, double c, double b) 590 { 591 double result; 592 __asm__ ("fmsub %0, %1, %2, %3" 593 /* outputs: */ : "=f" (result) 594 /* inputs: */ : "f" (a), "f" (c), "f" (b)); 595 return result; 596 } 597 598 /* 599 * fmsubs - Floating Multiply-Subract Single 600 * 601 * (a * c - b) single precision 602 * 603 * Double precision arguments are used to prevent the compiler from 604 * issuing frsp instructions upstream. 605 */ 606 static inline float __fmsubs (double a, double c, double b) __attribute__((always_inline)); 607 static inline float 608 __fmsubs (double a, double c, double b) 609 { 610 float result; 611 __asm__ ("fmsubs %0, %1, %2, %3" 612 /* outputs: */ : "=f" (result) 613 /* inputs: */ : "f" (a), "f" (c), "f" (b)); 614 return result; 615 } 616 617 /* 618 * fmul - Floating Multiply (Double-Precision) 619 * 620 * (a * c) double precision 621 */ 622 static inline double __fmul (double a, double c) __attribute__((always_inline)); 623 static inline double 624 __fmul (double a, double c) 625 { 626 double result; 627 __asm__ ("fmul %0, %1, %2" 628 /* outputs: */ : "=f" (result) 629 /* inputs: */ : "f" (a), "f" (c)); 630 return result; 631 } 632 633 /* 634 * fmuls - Floating Multiply Single 635 * 636 * (a * c) single precision 637 * 638 * Double precision arguments are used to prevent the compiler from 639 * issuing frsp instructions upstream. 640 */ 641 static inline float __fmuls (double a, double c) __attribute__((always_inline)); 642 static inline float 643 __fmuls (double a, double c) 644 { 645 float result; 646 __asm__ ("fmuls %0, %1, %2" 647 /* outputs: */ : "=f" (result) 648 /* inputs: */ : "f" (a), "f" (c)); 649 return result; 650 } 651 652 /* 653 * __fnmadd - Floating Negative Multiply-Add (Double-Precision) 654 * 655 * -(a * c + b) double precision 656 */ 657 static inline double __fnmadd (double a, double c, double b) __attribute__((always_inline)); 658 static inline double 659 __fnmadd (double a, double c, double b) 660 { 661 double result; 662 __asm__ ("fnmadd %0, %1, %2, %3" 663 /* outputs: */ : "=f" (result) 664 /* inputs: */ : "f" (a), "f" (c), "f" (b)); 665 return result; 666 } 667 668 /* 669 * __fnmadds - Floating Negative Multiply-Add Single 670 * 671 * -(a * c + b) single precision 672 * 673 * Double precision arguments are used to prevent the compiler from 674 * issuing frsp instructions upstream. 675 */ 676 static inline float __fnmadds (double a, double c, double b) __attribute__((always_inline)); 677 static inline float 678 __fnmadds (double a, double c, double b) 679 { 680 float result; 681 __asm__ ("fnmadds %0, %1, %2, %3" 682 /* outputs: */ : "=f" (result) 683 /* inputs: */ : "f" (a), "f" (c), "f" (b)); 684 return result; 685 } 686 687 /* 688 * __fnmsub - Floating Negative Multiply-Subtract (Double-Precision) 689 * 690 * -(a * c - B) double precision 691 */ 692 static inline double __fnmsub (double a, double c, double b) __attribute__((always_inline)); 693 static inline double 694 __fnmsub (double a, double c, double b) 695 { 696 double result; 697 __asm__ ("fnmsub %0, %1, %2, %3" 698 /* outputs: */ : "=f" (result) 699 /* inputs: */ : "f" (a), "f" (c), "f" (b)); 700 return result; 701 } 702 703 /* 704 * __fnmsubs - Floating Negative Multiply-Subtract Single 705 * 706 * -(a * c - b) single precision 707 * 708 * Double precision arguments are used to prevent the compiler from 709 * issuing frsp instructions upstream. 710 */ 711 static inline float __fnmsubs (double a, double c, double b) __attribute__((always_inline)); 712 static inline float 713 __fnmsubs (double a, double c, double b) 714 { 715 float result; 716 __asm__ ("fnmsubs %0, %1, %2, %3" 717 /* outputs: */ : "=f" (result) 718 /* inputs: */ : "f" (a), "f" (c), "f" (b)); 719 return result; 720 } 721 722 /* 723 * __fres - Floating Reciprocal Estimate 724 * 725 * Produces a double precision result with 5 bits of accuracy. 726 * Note: not valid on the PowerPC 601. 727 * 728 * ??? CW: float __fres(float) 729 */ 730 static inline float __fres (float val) __attribute__((always_inline)); 731 static inline float 732 __fres (float val) 733 { 734 float estimate; 735 __asm__ ("fres %0,%1" 736 /* outputs: */ : "=f" (estimate) 737 /* inputs: */ : "f" (val)); 738 return estimate; 739 } 740 741 /* 742 * __frsp - Floating Round to Single-Precision 743 */ 744 static inline float __frsp (double d) __attribute__((always_inline)); 745 static inline float 746 __frsp (double d) 747 { 748 float result; 749 __asm__ ("frsp %0, %1" 750 /* outputs: */ : "=f" (result) 751 /* inputs: */ : "f" (d)); 752 return result; 753 } 754 755 /* 756 * __frsqrte - Floating Reciprocal Square Root Estimate 757 * 758 * Note: not valid on the PowerPC 601. 759 */ 760 static inline double __frsqrte (double val) __attribute__((always_inline)); 761 static inline double 762 __frsqrte (double val) 763 { 764 double estimate; 765 766 __asm__ ("frsqrte %0,%1" 767 /* outputs: */ : "=f" (estimate) 768 /* inputs: */ : "f" (val)); 769 return estimate; 770 } 771 772 /* 773 * __frsqrtes - Floating Reciprocal Square Root Estimate Single 774 */ 775 static inline float __frsqrtes (double f) __attribute__((always_inline)); 776 static inline float 777 __frsqrtes (double f) 778 { 779 float result; 780 __asm__ ("frsqrte %0, %1" 781 /* outputs: */ : "=f" (result) 782 /* inputs: */ : "f" (f)); 783 return result; 784 } 785 786 /* 787 * __fsel - Floating Select 788 * 789 * if (test >= 0) return a; else return b; 790 * 791 * Note: not valid on the PowerPC 601. 792 */ 793 static inline double __fsel (double test, double a, double b) __attribute__((always_inline)); 794 static inline double 795 __fsel (double test, double a, double b) 796 { 797 double result; 798 __asm__ ("fsel %0,%1,%2,%3" 799 /* outputs: */ : "=f" (result) 800 /* inputs: */ : "f" (test), "f" (a), "f" (b)); 801 return result; 802 } 803 804 /* 805 * __fsels - Floating Select (Single-Precision variant) 806 * 807 * An artificial single precision variant of fsel. This produces the 808 * same results as fsel, but is useful because the result is cast as 809 * a float, discouraging the compiler from issuing a frsp instruction 810 * afterward. 811 */ 812 static inline float __fsels (double test, double a, double b) __attribute__((always_inline)); 813 static inline float 814 __fsels (double test, double a, double b) 815 { 816 float result; 817 __asm__ ("fsel %0,%1,%2,%3" 818 /* outputs: */ : "=f" (result) 819 /* inputs: */ : "f" (test), "f" (a), "f" (b)); 820 return result; 821 } 822 823 /* 824 * __fsqrt - Floating-Point Square Root (Double-Precision) 825 * 826 * WARNING: Illegal instruction for PowerPC 603, 604, 750, 7400, 7410, 827 * 7450, and 7455 828 */ 829 static inline double __fsqrt (double b) __attribute__((always_inline)); 830 static inline double 831 __fsqrt(double d) 832 { 833 double result; 834 __asm__ ("fsqrt %0, %1" 835 /* outputs: */ : "=f" (result) 836 /* inputs: */ : "f" (d)); 837 return result; 838 } 839 840 /* 841 * __fsqrts - Floating-Point Square Root Single-Precision 842 * 843 * WARNING: Illegal instruction for PowerPC 603, 604, 750, 7400, 7410, 844 * 7450, and 7455 845 */ 846 static inline float __fsqrts (float f) __attribute__((always_inline)); 847 static inline float 848 __fsqrts (float f) 849 { 850 float result; 851 __asm__ ("fsqrts %0, %1" 852 /* outputs: */ : "=f" (result) 853 /* inputs: */ : "f" (f)); 854 return result; 855 } 856 857 /* 858 * __mulhw - Multiply High Word 859 */ 860 static inline int __mulhw (int a, int b) __attribute__((always_inline)); 861 static inline int 862 __mulhw (int a, int b) 863 { 864 int result; 865 __asm__ ("mulhw %0, %1, %2" 866 /* outputs: */ : "=r" (result) 867 /* inputs: */ : "r" (a), "r"(b)); 868 return result; 869 } 870 871 /* 872 * __mulhwu - Multiply High Word Unsigned 873 */ 874 static inline unsigned int __mulhwu (unsigned int a, unsigned int b) __attribute__((always_inline)); 875 static inline unsigned int 876 __mulhwu (unsigned int a, unsigned int b) 877 { 878 unsigned int result; 879 __asm__ ("mulhwu %0, %1, %2" 880 /* outputs: */ : "=r" (result) 881 /* inputs: */ : "r" (a), "r"(b)); 882 return result; 883 } 884 885 /* 886 * __stfiwx - Store Floating-Point as Integer Word Indexed 887 * 888 * void x(int, void *, int); 889 */ 890 #define __stfiwx(value, base, index) \ 891 __asm__ ("stfiwx %0, %1, %2" : /*no result*/ \ 892 : "f" (value), "b%" (index), "r" (base) : "memory") 893 894 895 /******************************************************************* 896 * Miscellaneous Functions * 897 *******************************************************************/ 898 899 /* 900 * __nop - no operation (PowerPC preferred form) 901 * 902 * void __nop(); 903 */ 904 #define __nop() \ 905 __asm__ ("ori 0,0,0") 906 907 /* 908 * __icbi - Instruction Cache Block Invalidate 909 * 910 * void __icbi(void *, int); 911 */ 912 #define __icbi(base, index) \ 913 __asm__ ("icbi %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory") 914 915 /* 916 * __mffs - Move from FPSCR 917 */ 918 static inline double __mffs (void) __attribute__((always_inline)); 919 static inline double 920 __mffs (void) 921 { 922 double result; 923 __asm__ volatile ("mffs %0" 924 /* outputs: */ : "=f" (result)); 925 return result; 926 } 927 928 /* 929 * __mfspr - Move from Special Purpose Register 930 * 931 * int __mfspr(int); 932 */ 933 #define __mfspr(spr) \ 934 __extension__ ({ long __ppc_i_mfsprResult; \ 935 __asm__ volatile ("mfspr %0, %1" : "=r" (__ppc_i_mfsprResult) : "n" (spr)); \ 936 /*return*/ __ppc_i_mfsprResult; }) 937 938 /* 939 * __mtfsf - Move to SPSCR Fields 940 * 941 * void __mtfsf(int, int); 942 */ 943 #define __mtfsf(mask, newValue) \ 944 __asm__ volatile ("mtfsf %0, %1" : : "n" (mask), "f" (newValue)) 945 946 /* 947 * __mtspr - Move to Special Purpose Register 948 * 949 * __mtspr x(int, int); 950 */ 951 #define __mtspr(spr, value) \ 952 __asm__ volatile ("mtspr %0, %1" : : "n" (spr), "r" (value)) 953 954 /* 955 * __OSReadSwapSInt16 956 * 957 * lhbrx for signed shorts. This will do the required sign 958 * extension after load and byteswap. 959 */ 960 static inline signed short __OSReadSwapSInt16 (signed short *base, int index) __attribute__((always_inline)); 961 static inline signed short 962 __OSReadSwapSInt16 (signed short *base, int index) 963 { 964 signed long result; 965 __asm__ volatile ("lhbrx %0, %1, %2" 966 /* outputs: */ : "=r" (result) 967 /* inputs: */ : "b%" (index), "r" (base) 968 /* clobbers: */ : "memory"); 969 return result; 970 } 971 972 /* 973 * __OSReadSwapUInt16 974 */ 975 static inline unsigned short __OSReadSwapUInt16 (volatile void *base, int inex) __attribute__((always_inline)); 976 static inline unsigned short 977 __OSReadSwapUInt16 (volatile void *base, int index) 978 { 979 unsigned long result; 980 __asm__ volatile ("lhbrx %0, %1, %2" 981 /* outputs: */ : "=r" (result) 982 /* inputs: */ : "b" (index), "r" (base) 983 /* clobbers: */ : "memory"); 984 return result; 985 } 986 987 /* 988 * __astrcmp - assembly strcmp 989 */ 990 static inline int astrcmp (const char *in_s1, const char *in_s2) __attribute__((always_inline)); 991 static inline int 992 astrcmp (const char *in_s1, const char *in_s2) 993 { 994 int result, temp; 995 register const char *s1 = in_s1 - 1; 996 register const char *s2 = in_s2 - 1; 997 998 __asm__ ("1:lbzu %0,1(%1)\n" 999 "\tcmpwi cr1,%0,0\n" 1000 "\tlbzu %3,1(%2)\n" 1001 "\tsubf. %0,%3,%0\n" 1002 "\tbeq- cr1,2f\n" 1003 "\tbeq+ 1b\n2:" 1004 /* outputs: */ : "=&r" (result), "+b" (s1), "+b" (s2), "=r" (temp) 1005 /* inputs: */ : 1006 /* clobbers: */ : "cr0", "cr1", "memory"); 1007 1008 return result; 1009 1010 /* 1011 * "=&r" (result) means: 'result' is written on (the '='), it's any GP 1012 * register (the 'r'), and it must not be the same as 1013 * any of the input registers (the '&'). 1014 * "+b" (s1) means: 's1' is read from and written to (the '+'), 1015 * and it must be a base GP register (i.e., not R0.) 1016 * "=r" (temp) means: 'temp' is any GP reg and it's only written to. 1017 * 1018 * "memory" in the 'clobbers' section means that gcc will make 1019 * sure that anything that should be in memory IS there 1020 * before calling this routine. 1021 */ 1022 } 1023 1024 #endif /* (defined(__ppc__) || defined(__ppc64__)) && ! defined(__MWERKS__) */ 1025 1026 #endif /* _PPC_INTRINSICS_H_ */ 1027