Home | History | Annotate | Download | only in include
      1 /* APPLE LOCAL file mainline 2005-06-30 Radar 4131077 */
      2 /* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007
      3    Free Software Foundation, Inc.
      4 
      5    This file is part of GCC.
      6 
      7    GCC is free software; you can redistribute it and/or modify
      8    it under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 2, or (at your option)
     10    any later version.
     11 
     12    GCC is distributed in the hope that it will be useful,
     13    but WITHOUT ANY WARRANTY; without even the implied warranty of
     14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15    GNU General Public License for more details.
     16 
     17    You should have received a copy of the GNU General Public License
     18    along with GCC; see the file COPYING.  If not, write to
     19    the Free Software Foundation, 51 Franklin Street, Fifth Floor,
     20    Boston, MA 02110-1301, USA.  */
     21 
     22 /* As a special exception, if you include this header file into source
     23    files compiled by GCC, this header file does not by itself cause
     24    the resulting executable to be covered by the GNU General Public
     25    License.  This exception does not however invalidate any other
     26    reasons why the executable file might be covered by the GNU General
     27    Public License.  */
     28 
     29 /* Implemented from the specification included in the Intel C++ Compiler
     30    User Guide and Reference, version 9.0.  */
     31 
     32 #ifndef _XMMINTRIN_H_INCLUDED
     33 #define _XMMINTRIN_H_INCLUDED
     34 
     35 #ifndef __SSE__
     36 # error "SSE instruction set not enabled"
     37 #else
     38 
     39 /* We need type definitions from the MMX header file.  */
     40 #include <mmintrin.h>
     41 
     42 /* Get _mm_malloc () and _mm_free ().  */
     43 /* APPLE LOCAL begin xmmintrin.h for kernel 4123064 */
     44 #if __STDC_HOSTED__
     45 #include <mm_malloc.h>
     46 #endif
     47 /* APPLE LOCAL end xmmintrin.h for kernel 4123064 */
     48 
     49 /* The Intel API is flexible enough that we must allow aliasing with other
     50    vector types, and their scalar components.  */
     51 typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
     52 
     53 /* Internal data types for implementing the intrinsics.  */
     54 typedef float __v4sf __attribute__ ((__vector_size__ (16)));
     55 
     56 #if defined(__clang__) && defined(WITH_SYNTAX_CHECK)
     57 /* Workaround for "clang -fsyntax-only" happens to use this header, but may
     58  * choke on something not supported in clang
     59  */
     60 int __builtin_ia32_cvtss2si (__v4sf);
     61 int __builtin_ia32_cvttss2si (__v4sf);
     62 __m128 __builtin_ia32_addps (__v4sf, __v4sf);
     63 __m128 __builtin_ia32_addss (__v4sf, __v4sf);
     64 __m128 __builtin_ia32_addss (__v4sf, __v4sf);
     65 __m128 __builtin_ia32_addss (__v4sf, __v4sf);
     66 __m128 __builtin_ia32_andnps (__m128, __m128);
     67 __m128 __builtin_ia32_andps (__m128, __m128);
     68 __m128 __builtin_ia32_cmpeqps (__v4sf, __v4sf);
     69 __m128 __builtin_ia32_cmpeqss (__v4sf, __v4sf);
     70 __m128 __builtin_ia32_cmpgeps (__v4sf, __v4sf);
     71 __m128 __builtin_ia32_cmpgtps (__v4sf, __v4sf);
     72 __m128 __builtin_ia32_cmpleps (__v4sf, __v4sf);
     73 __m128 __builtin_ia32_cmpless (__v4sf, __v4sf);
     74 __m128 __builtin_ia32_cmpltps (__v4sf, __v4sf);
     75 __m128 __builtin_ia32_cmpltss (__v4sf, __v4sf);
     76 __m128 __builtin_ia32_cmpneqps (__v4sf, __v4sf);
     77 __m128 __builtin_ia32_cmpneqss (__v4sf, __v4sf);
     78 __m128 __builtin_ia32_cmpngeps (__v4sf, __v4sf);
     79 __m128 __builtin_ia32_cmpngtps (__v4sf, __v4sf);
     80 __m128 __builtin_ia32_cmpnleps (__v4sf, __v4sf);
     81 __m128 __builtin_ia32_cmpnless (__v4sf, __v4sf);
     82 __m128 __builtin_ia32_cmpnltps (__v4sf, __v4sf);
     83 __m128 __builtin_ia32_cmpnltss (__v4sf, __v4sf);
     84 __m128 __builtin_ia32_cmpordps (__v4sf, __v4sf);
     85 __m128 __builtin_ia32_cmpordss (__v4sf, __v4sf);
     86 __m128 __builtin_ia32_cmpunordps (__v4sf, __v4sf);
     87 __m128 __builtin_ia32_cmpunordss (__v4sf, __v4sf);
     88 __m128 __builtin_ia32_cvtsi2ss (__v4sf, int);
     89 __m128 __builtin_ia32_divps (__v4sf, __v4sf);
     90 __m128 __builtin_ia32_divss (__v4sf, __v4sf);
     91 __m128 __builtin_ia32_movss (__v4sf, __v4sf);
     92 __m128 __builtin_ia32_mulps (__v4sf, __v4sf);
     93 __m128 __builtin_ia32_mulps (__v4sf, __v4sf);
     94 __m128 __builtin_ia32_mulss (__v4sf, __v4sf);
     95 __m128 __builtin_ia32_mulss (__v4sf, __v4sf);
     96 __m128 __builtin_ia32_orps (__m128, __m128);
     97 __m128 __builtin_ia32_subps (__v4sf, __v4sf);
     98 __m128 __builtin_ia32_subss (__v4sf, __v4sf);
     99 __m128 __builtin_ia32_subss (__v4sf, __v4sf);
    100 __m128 __builtin_ia32_xorps (__m128, __m128);
    101 __m128 __builtin_ia32_loadhps (__v4sf, const __v2si *);
    102 __m128 __builtin_ia32_loadlps (__v4sf, const __v2si *);
    103 __m128 __builtin_ia32_movhlps (__v4sf, __v4sf);
    104 __m128 __builtin_ia32_movlhps (__v4sf, __v4sf);
    105 __m128 __builtin_ia32_shufps (__v4sf, __v4sf, int const);
    106 __m128 __builtin_ia32_unpckhps (__v4sf, __v4sf);
    107 __m128 __builtin_ia32_unpcklps (__v4sf, __v4sf);
    108 __m128 __builtin_ia32_loadups (float const *);
    109 __m64 __builtin_ia32_vec_set_v4hi (__v4hi, int const, int const);
    110 float __builtin_ia32_vec_ext_v4sf (__v4sf, const int);
    111 int __builtin_ia32_vec_ext_v4hi (__v4hi, const int);
    112 long long __builtin_ia32_cvtss2si64 (__v4sf);
    113 long long __builtin_ia32_cvttss2si64 (__v4sf);
    114 __m128 __builtin_ia32_cvtsi642ss (__v4sf, long long);
    115 #endif
    116 
    117 /* Create a selector for use with the SHUFPS instruction.  */
    118 #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
    119  (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
    120 
    121 /* Constants for use with _mm_prefetch.  */
    122 enum _mm_hint
    123 {
    124   _MM_HINT_T0 = 3,
    125   _MM_HINT_T1 = 2,
    126   _MM_HINT_T2 = 1,
    127   _MM_HINT_NTA = 0
    128 };
    129 
    130 /* Bits in the MXCSR.  */
    131 #define _MM_EXCEPT_MASK       0x003f
    132 #define _MM_EXCEPT_INVALID    0x0001
    133 #define _MM_EXCEPT_DENORM     0x0002
    134 #define _MM_EXCEPT_DIV_ZERO   0x0004
    135 #define _MM_EXCEPT_OVERFLOW   0x0008
    136 #define _MM_EXCEPT_UNDERFLOW  0x0010
    137 #define _MM_EXCEPT_INEXACT    0x0020
    138 
    139 #define _MM_MASK_MASK         0x1f80
    140 #define _MM_MASK_INVALID      0x0080
    141 #define _MM_MASK_DENORM       0x0100
    142 #define _MM_MASK_DIV_ZERO     0x0200
    143 #define _MM_MASK_OVERFLOW     0x0400
    144 #define _MM_MASK_UNDERFLOW    0x0800
    145 #define _MM_MASK_INEXACT      0x1000
    146 
    147 #define _MM_ROUND_MASK        0x6000
    148 #define _MM_ROUND_NEAREST     0x0000
    149 #define _MM_ROUND_DOWN        0x2000
    150 #define _MM_ROUND_UP          0x4000
    151 #define _MM_ROUND_TOWARD_ZERO 0x6000
    152 
    153 #define _MM_FLUSH_ZERO_MASK   0x8000
    154 #define _MM_FLUSH_ZERO_ON     0x8000
    155 #define _MM_FLUSH_ZERO_OFF    0x0000
    156 
    157 /* APPLE LOCAL begin nodebug inline 4152603 */
    158 #define __always_inline__ __always_inline__, __nodebug__
    159 /* APPLE LOCAL end nodebug inline 4152603 */
    160 
    161 /* APPLE LOCAL begin radar 5618945 */
    162 #undef __STATIC_INLINE
    163 #ifdef __GNUC_STDC_INLINE__
    164 #define __STATIC_INLINE __inline
    165 #else
    166 #define __STATIC_INLINE static __inline
    167 #endif
    168 /* APPLE LOCAL end radar 5618945 */
    169 
    170 /* Create a vector of zeros.  */
    171 /* APPLE LOCAL begin radar 4152603 */
    172 /* APPLE LOCAL begin radar 5618945 */
    173 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    174 /* APPLE LOCAL end radar 5618945 */
    175 _mm_setzero_ps (void)
    176 {
    177   return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
    178 }
    179 
    180 /* Perform the respective operation on the lower SPFP (single-precision
    181    floating-point) values of A and B; the upper three SPFP values are
    182    passed through from A.  */
    183 
    184 /* APPLE LOCAL begin radar 5618945 */
    185 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    186 /* APPLE LOCAL end radar 5618945 */
    187 _mm_add_ss (__m128 __A, __m128 __B)
    188 {
    189   return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B);
    190 }
    191 
    192 /* APPLE LOCAL begin radar 5618945 */
    193 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    194 /* APPLE LOCAL end radar 5618945 */
    195 _mm_sub_ss (__m128 __A, __m128 __B)
    196 {
    197   return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B);
    198 }
    199 
    200 /* APPLE LOCAL begin radar 5618945 */
    201 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    202 /* APPLE LOCAL end radar 5618945 */
    203 _mm_mul_ss (__m128 __A, __m128 __B)
    204 {
    205   return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B);
    206 }
    207 
    208 /* APPLE LOCAL begin radar 5618945 */
    209 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    210 /* APPLE LOCAL end radar 5618945 */
    211 _mm_div_ss (__m128 __A, __m128 __B)
    212 {
    213   return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B);
    214 }
    215 
    216 /* APPLE LOCAL begin radar 5618945 */
    217 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    218 /* APPLE LOCAL end radar 5618945 */
    219 _mm_sqrt_ss (__m128 __A)
    220 {
    221   return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A);
    222 }
    223 
    224 /* APPLE LOCAL begin radar 5618945 */
    225 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    226 /* APPLE LOCAL end radar 5618945 */
    227 _mm_rcp_ss (__m128 __A)
    228 {
    229   return (__m128) __builtin_ia32_rcpss ((__v4sf)__A);
    230 }
    231 
    232 /* APPLE LOCAL begin radar 5618945 */
    233 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    234 /* APPLE LOCAL end radar 5618945 */
    235 _mm_rsqrt_ss (__m128 __A)
    236 {
    237   return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A);
    238 }
    239 
    240 /* APPLE LOCAL begin radar 5618945 */
    241 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    242 /* APPLE LOCAL end radar 5618945 */
    243 _mm_min_ss (__m128 __A, __m128 __B)
    244 {
    245   return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B);
    246 }
    247 
    248 /* APPLE LOCAL begin radar 5618945 */
    249 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    250 /* APPLE LOCAL end radar 5618945 */
    251 _mm_max_ss (__m128 __A, __m128 __B)
    252 {
    253   return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B);
    254 }
    255 
    256 /* Perform the respective operation on the four SPFP values in A and B.  */
    257 
    258 /* APPLE LOCAL begin radar 5618945 */
    259 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    260 /* APPLE LOCAL end radar 5618945 */
    261 _mm_add_ps (__m128 __A, __m128 __B)
    262 {
    263   return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B);
    264 }
    265 
    266 /* APPLE LOCAL begin radar 5618945 */
    267 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    268 /* APPLE LOCAL end radar 5618945 */
    269 _mm_sub_ps (__m128 __A, __m128 __B)
    270 {
    271   return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B);
    272 }
    273 
    274 /* APPLE LOCAL begin radar 5618945 */
    275 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    276 /* APPLE LOCAL end radar 5618945 */
    277 _mm_mul_ps (__m128 __A, __m128 __B)
    278 {
    279   return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B);
    280 }
    281 
    282 /* APPLE LOCAL begin radar 5618945 */
    283 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    284 /* APPLE LOCAL end radar 5618945 */
    285 _mm_div_ps (__m128 __A, __m128 __B)
    286 {
    287   return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B);
    288 }
    289 
    290 /* APPLE LOCAL begin radar 5618945 */
    291 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    292 /* APPLE LOCAL end radar 5618945 */
    293 _mm_sqrt_ps (__m128 __A)
    294 {
    295   return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A);
    296 }
    297 
    298 /* APPLE LOCAL begin radar 5618945 */
    299 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    300 /* APPLE LOCAL end radar 5618945 */
    301 _mm_rcp_ps (__m128 __A)
    302 {
    303   return (__m128) __builtin_ia32_rcpps ((__v4sf)__A);
    304 }
    305 
    306 /* APPLE LOCAL begin radar 5618945 */
    307 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    308 /* APPLE LOCAL end radar 5618945 */
    309 _mm_rsqrt_ps (__m128 __A)
    310 {
    311   return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A);
    312 }
    313 
    314 /* APPLE LOCAL begin radar 5618945 */
    315 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    316 /* APPLE LOCAL end radar 5618945 */
    317 _mm_min_ps (__m128 __A, __m128 __B)
    318 {
    319   return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B);
    320 }
    321 
    322 /* APPLE LOCAL begin radar 5618945 */
    323 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    324 /* APPLE LOCAL end radar 5618945 */
    325 _mm_max_ps (__m128 __A, __m128 __B)
    326 {
    327   return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B);
    328 }
    329 
    330 /* Perform logical bit-wise operations on 128-bit values.  */
    331 
    332 /* APPLE LOCAL begin radar 5618945 */
    333 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    334 /* APPLE LOCAL end radar 5618945 */
    335 _mm_and_ps (__m128 __A, __m128 __B)
    336 {
    337   return __builtin_ia32_andps (__A, __B);
    338 }
    339 
    340 /* APPLE LOCAL begin radar 5618945 */
    341 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    342 /* APPLE LOCAL end radar 5618945 */
    343 _mm_andnot_ps (__m128 __A, __m128 __B)
    344 {
    345   return __builtin_ia32_andnps (__A, __B);
    346 }
    347 
    348 /* APPLE LOCAL begin radar 5618945 */
    349 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    350 /* APPLE LOCAL end radar 5618945 */
    351 _mm_or_ps (__m128 __A, __m128 __B)
    352 {
    353   return __builtin_ia32_orps (__A, __B);
    354 }
    355 
    356 /* APPLE LOCAL begin radar 5618945 */
    357 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    358 /* APPLE LOCAL end radar 5618945 */
    359 _mm_xor_ps (__m128 __A, __m128 __B)
    360 {
    361   return __builtin_ia32_xorps (__A, __B);
    362 }
    363 
    364 /* Perform a comparison on the lower SPFP values of A and B.  If the
    365    comparison is true, place a mask of all ones in the result, otherwise a
    366    mask of zeros.  The upper three SPFP values are passed through from A.  */
    367 
    368 /* APPLE LOCAL begin radar 5618945 */
    369 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    370 /* APPLE LOCAL end radar 5618945 */
    371 _mm_cmpeq_ss (__m128 __A, __m128 __B)
    372 {
    373   return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B);
    374 }
    375 
    376 /* APPLE LOCAL begin radar 5618945 */
    377 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    378 /* APPLE LOCAL end radar 5618945 */
    379 _mm_cmplt_ss (__m128 __A, __m128 __B)
    380 {
    381   return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B);
    382 }
    383 
    384 /* APPLE LOCAL begin radar 5618945 */
    385 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    386 /* APPLE LOCAL end radar 5618945 */
    387 _mm_cmple_ss (__m128 __A, __m128 __B)
    388 {
    389   return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B);
    390 }
    391 
    392 /* APPLE LOCAL begin radar 5618945 */
    393 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    394 /* APPLE LOCAL end radar 5618945 */
    395 _mm_cmpgt_ss (__m128 __A, __m128 __B)
    396 {
    397   return (__m128) __builtin_ia32_movss ((__v4sf) __A,
    398 					(__v4sf)
    399 					__builtin_ia32_cmpltss ((__v4sf) __B,
    400 								(__v4sf)
    401 								__A));
    402 }
    403 
    404 /* APPLE LOCAL begin radar 5618945 */
    405 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    406 /* APPLE LOCAL end radar 5618945 */
    407 _mm_cmpge_ss (__m128 __A, __m128 __B)
    408 {
    409   return (__m128) __builtin_ia32_movss ((__v4sf) __A,
    410 					(__v4sf)
    411 					__builtin_ia32_cmpless ((__v4sf) __B,
    412 								(__v4sf)
    413 								__A));
    414 }
    415 
    416 /* APPLE LOCAL begin radar 5618945 */
    417 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    418 /* APPLE LOCAL end radar 5618945 */
    419 _mm_cmpneq_ss (__m128 __A, __m128 __B)
    420 {
    421   return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B);
    422 }
    423 
    424 /* APPLE LOCAL begin radar 5618945 */
    425 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    426 /* APPLE LOCAL end radar 5618945 */
    427 _mm_cmpnlt_ss (__m128 __A, __m128 __B)
    428 {
    429   return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B);
    430 }
    431 
    432 /* APPLE LOCAL begin radar 5618945 */
    433 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    434 /* APPLE LOCAL end radar 5618945 */
    435 _mm_cmpnle_ss (__m128 __A, __m128 __B)
    436 {
    437   return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B);
    438 }
    439 
    440 /* APPLE LOCAL begin radar 5618945 */
    441 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    442 /* APPLE LOCAL end radar 5618945 */
    443 _mm_cmpngt_ss (__m128 __A, __m128 __B)
    444 {
    445   return (__m128) __builtin_ia32_movss ((__v4sf) __A,
    446 					(__v4sf)
    447 					__builtin_ia32_cmpnltss ((__v4sf) __B,
    448 								 (__v4sf)
    449 								 __A));
    450 }
    451 
    452 /* APPLE LOCAL begin radar 5618945 */
    453 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    454 /* APPLE LOCAL end radar 5618945 */
    455 _mm_cmpnge_ss (__m128 __A, __m128 __B)
    456 {
    457   return (__m128) __builtin_ia32_movss ((__v4sf) __A,
    458 					(__v4sf)
    459 					__builtin_ia32_cmpnless ((__v4sf) __B,
    460 								 (__v4sf)
    461 								 __A));
    462 }
    463 
    464 /* APPLE LOCAL begin radar 5618945 */
    465 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    466 /* APPLE LOCAL end radar 5618945 */
    467 _mm_cmpord_ss (__m128 __A, __m128 __B)
    468 {
    469   return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B);
    470 }
    471 
    472 /* APPLE LOCAL begin radar 5618945 */
    473 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    474 /* APPLE LOCAL end radar 5618945 */
    475 _mm_cmpunord_ss (__m128 __A, __m128 __B)
    476 {
    477   return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B);
    478 }
    479 
    480 /* Perform a comparison on the four SPFP values of A and B.  For each
    481    element, if the comparison is true, place a mask of all ones in the
    482    result, otherwise a mask of zeros.  */
    483 
    484 /* APPLE LOCAL begin radar 5618945 */
    485 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    486 /* APPLE LOCAL end radar 5618945 */
    487 _mm_cmpeq_ps (__m128 __A, __m128 __B)
    488 {
    489   return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
    490 }
    491 
    492 /* APPLE LOCAL begin radar 5618945 */
    493 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    494 /* APPLE LOCAL end radar 5618945 */
    495 _mm_cmplt_ps (__m128 __A, __m128 __B)
    496 {
    497   return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B);
    498 }
    499 
    500 /* APPLE LOCAL begin radar 5618945 */
    501 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    502 /* APPLE LOCAL end radar 5618945 */
    503 _mm_cmple_ps (__m128 __A, __m128 __B)
    504 {
    505   return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B);
    506 }
    507 
    508 /* APPLE LOCAL begin radar 5618945 */
    509 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    510 /* APPLE LOCAL end radar 5618945 */
    511 _mm_cmpgt_ps (__m128 __A, __m128 __B)
    512 {
    513   return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B);
    514 }
    515 
    516 /* APPLE LOCAL begin radar 5618945 */
    517 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    518 /* APPLE LOCAL end radar 5618945 */
    519 _mm_cmpge_ps (__m128 __A, __m128 __B)
    520 {
    521   return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B);
    522 }
    523 
    524 /* APPLE LOCAL begin radar 5618945 */
    525 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    526 /* APPLE LOCAL end radar 5618945 */
    527 _mm_cmpneq_ps (__m128 __A, __m128 __B)
    528 {
    529   return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B);
    530 }
    531 
    532 /* APPLE LOCAL begin radar 5618945 */
    533 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    534 /* APPLE LOCAL end radar 5618945 */
    535 _mm_cmpnlt_ps (__m128 __A, __m128 __B)
    536 {
    537   return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B);
    538 }
    539 
    540 /* APPLE LOCAL begin radar 5618945 */
    541 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    542 /* APPLE LOCAL end radar 5618945 */
    543 _mm_cmpnle_ps (__m128 __A, __m128 __B)
    544 {
    545   return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B);
    546 }
    547 
    548 /* APPLE LOCAL begin radar 5618945 */
    549 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    550 /* APPLE LOCAL end radar 5618945 */
    551 _mm_cmpngt_ps (__m128 __A, __m128 __B)
    552 {
    553   return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B);
    554 }
    555 
    556 /* APPLE LOCAL begin radar 5618945 */
    557 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    558 /* APPLE LOCAL end radar 5618945 */
    559 _mm_cmpnge_ps (__m128 __A, __m128 __B)
    560 {
    561   return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B);
    562 }
    563 
    564 /* APPLE LOCAL begin radar 5618945 */
    565 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    566 /* APPLE LOCAL end radar 5618945 */
    567 _mm_cmpord_ps (__m128 __A, __m128 __B)
    568 {
    569   return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B);
    570 }
    571 
    572 /* APPLE LOCAL begin radar 5618945 */
    573 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    574 /* APPLE LOCAL end radar 5618945 */
    575 _mm_cmpunord_ps (__m128 __A, __m128 __B)
    576 {
    577   return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B);
    578 }
    579 
    580 /* Compare the lower SPFP values of A and B and return 1 if true
    581    and 0 if false.  */
    582 
    583 /* APPLE LOCAL begin radar 5618945 */
    584 __STATIC_INLINE int __attribute__((__always_inline__))
    585 /* APPLE LOCAL end radar 5618945 */
    586 _mm_comieq_ss (__m128 __A, __m128 __B)
    587 {
    588   return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B);
    589 }
    590 
    591 /* APPLE LOCAL begin radar 5618945 */
    592 __STATIC_INLINE int __attribute__((__always_inline__))
    593 /* APPLE LOCAL end radar 5618945 */
    594 _mm_comilt_ss (__m128 __A, __m128 __B)
    595 {
    596   return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B);
    597 }
    598 
    599 /* APPLE LOCAL begin radar 5618945 */
    600 __STATIC_INLINE int __attribute__((__always_inline__))
    601 /* APPLE LOCAL end radar 5618945 */
    602 _mm_comile_ss (__m128 __A, __m128 __B)
    603 {
    604   return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B);
    605 }
    606 
    607 /* APPLE LOCAL begin radar 5618945 */
    608 __STATIC_INLINE int __attribute__((__always_inline__))
    609 /* APPLE LOCAL end radar 5618945 */
    610 _mm_comigt_ss (__m128 __A, __m128 __B)
    611 {
    612   return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B);
    613 }
    614 
    615 /* APPLE LOCAL begin radar 5618945 */
    616 __STATIC_INLINE int __attribute__((__always_inline__))
    617 /* APPLE LOCAL end radar 5618945 */
    618 _mm_comige_ss (__m128 __A, __m128 __B)
    619 {
    620   return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B);
    621 }
    622 
    623 /* APPLE LOCAL begin radar 5618945 */
    624 __STATIC_INLINE int __attribute__((__always_inline__))
    625 /* APPLE LOCAL end radar 5618945 */
    626 _mm_comineq_ss (__m128 __A, __m128 __B)
    627 {
    628   return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B);
    629 }
    630 
    631 /* APPLE LOCAL begin radar 5618945 */
    632 __STATIC_INLINE int __attribute__((__always_inline__))
    633 /* APPLE LOCAL end radar 5618945 */
    634 _mm_ucomieq_ss (__m128 __A, __m128 __B)
    635 {
    636   return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B);
    637 }
    638 
    639 /* APPLE LOCAL begin radar 5618945 */
    640 __STATIC_INLINE int __attribute__((__always_inline__))
    641 /* APPLE LOCAL end radar 5618945 */
    642 _mm_ucomilt_ss (__m128 __A, __m128 __B)
    643 {
    644   return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B);
    645 }
    646 
    647 /* APPLE LOCAL begin radar 5618945 */
    648 __STATIC_INLINE int __attribute__((__always_inline__))
    649 /* APPLE LOCAL end radar 5618945 */
    650 _mm_ucomile_ss (__m128 __A, __m128 __B)
    651 {
    652   return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B);
    653 }
    654 
    655 /* APPLE LOCAL begin radar 5618945 */
    656 __STATIC_INLINE int __attribute__((__always_inline__))
    657 /* APPLE LOCAL end radar 5618945 */
    658 _mm_ucomigt_ss (__m128 __A, __m128 __B)
    659 {
    660   return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B);
    661 }
    662 
    663 /* APPLE LOCAL begin radar 5618945 */
    664 __STATIC_INLINE int __attribute__((__always_inline__))
    665 /* APPLE LOCAL end radar 5618945 */
    666 _mm_ucomige_ss (__m128 __A, __m128 __B)
    667 {
    668   return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B);
    669 }
    670 
    671 /* APPLE LOCAL begin radar 5618945 */
    672 __STATIC_INLINE int __attribute__((__always_inline__))
    673 /* APPLE LOCAL end radar 5618945 */
    674 _mm_ucomineq_ss (__m128 __A, __m128 __B)
    675 {
    676   return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B);
    677 }
    678 
    679 /* Convert the lower SPFP value to a 32-bit integer according to the current
    680    rounding mode.  */
    681 /* APPLE LOCAL begin radar 5618945 */
    682 __STATIC_INLINE int __attribute__((__always_inline__))
    683 /* APPLE LOCAL end radar 5618945 */
    684 _mm_cvtss_si32 (__m128 __A)
    685 {
    686   return __builtin_ia32_cvtss2si ((__v4sf) __A);
    687 }
    688 
    689 /* APPLE LOCAL begin radar 5618945 */
    690 __STATIC_INLINE int __attribute__((__always_inline__))
    691 /* APPLE LOCAL end radar 5618945 */
    692 _mm_cvt_ss2si (__m128 __A)
    693 {
    694   return _mm_cvtss_si32 (__A);
    695 }
    696 
    697 #ifdef __x86_64__
    698 /* Convert the lower SPFP value to a 32-bit integer according to the
    699    current rounding mode.  */
    700 
    701 /* Intel intrinsic.  */
    702 /* APPLE LOCAL begin radar 5618945 */
    703 __STATIC_INLINE long long __attribute__((__always_inline__))
    704 /* APPLE LOCAL end radar 5618945 */
    705 _mm_cvtss_si64 (__m128 __A)
    706 {
    707   return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
    708 }
    709 
    710 /* Microsoft intrinsic.  */
    711 /* APPLE LOCAL begin radar 5618945 */
    712 __STATIC_INLINE long long __attribute__((__always_inline__))
    713 /* APPLE LOCAL end radar 5618945 */
    714 _mm_cvtss_si64x (__m128 __A)
    715 {
    716   return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
    717 }
    718 #endif
    719 
    720 /* Convert the two lower SPFP values to 32-bit integers according to the
    721    current rounding mode.  Return the integers in packed form.  */
    722 /* APPLE LOCAL begin radar 5618945 */
    723 __STATIC_INLINE __m64 __attribute__((__always_inline__))
    724 /* APPLE LOCAL end radar 5618945 */
    725 _mm_cvtps_pi32 (__m128 __A)
    726 {
    727   return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A);
    728 }
    729 
    730 /* APPLE LOCAL begin radar 5618945 */
    731 __STATIC_INLINE __m64 __attribute__((__always_inline__))
    732 /* APPLE LOCAL end radar 5618945 */
    733 _mm_cvt_ps2pi (__m128 __A)
    734 {
    735   return _mm_cvtps_pi32 (__A);
    736 }
    737 
    738 /* Truncate the lower SPFP value to a 32-bit integer.  */
    739 /* APPLE LOCAL begin radar 5618945 */
    740 __STATIC_INLINE int __attribute__((__always_inline__))
    741 /* APPLE LOCAL end radar 5618945 */
    742 _mm_cvttss_si32 (__m128 __A)
    743 {
    744   return __builtin_ia32_cvttss2si ((__v4sf) __A);
    745 }
    746 
    747 /* APPLE LOCAL begin radar 5618945 */
    748 __STATIC_INLINE int __attribute__((__always_inline__))
    749 /* APPLE LOCAL end radar 5618945 */
    750 _mm_cvtt_ss2si (__m128 __A)
    751 {
    752   return _mm_cvttss_si32 (__A);
    753 }
    754 
    755 #ifdef __x86_64__
    756 /* Truncate the lower SPFP value to a 32-bit integer.  */
    757 
    758 /* Intel intrinsic.  */
    759 /* APPLE LOCAL begin radar 5618945 */
    760 __STATIC_INLINE long long __attribute__((__always_inline__))
    761 /* APPLE LOCAL end radar 5618945 */
    762 _mm_cvttss_si64 (__m128 __A)
    763 {
    764   return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
    765 }
    766 
    767 /* Microsoft intrinsic.  */
    768 /* APPLE LOCAL begin radar 5618945 */
    769 __STATIC_INLINE long long __attribute__((__always_inline__))
    770 /* APPLE LOCAL end radar 5618945 */
    771 _mm_cvttss_si64x (__m128 __A)
    772 {
    773   return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
    774 }
    775 #endif
    776 
    777 /* Truncate the two lower SPFP values to 32-bit integers.  Return the
    778    integers in packed form.  */
    779 /* APPLE LOCAL begin radar 5618945 */
    780 __STATIC_INLINE __m64 __attribute__((__always_inline__))
    781 /* APPLE LOCAL end radar 5618945 */
    782 _mm_cvttps_pi32 (__m128 __A)
    783 {
    784   return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A);
    785 }
    786 
    787 /* APPLE LOCAL begin radar 5618945 */
    788 __STATIC_INLINE __m64 __attribute__((__always_inline__))
    789 /* APPLE LOCAL end radar 5618945 */
    790 _mm_cvtt_ps2pi (__m128 __A)
    791 {
    792   return _mm_cvttps_pi32 (__A);
    793 }
    794 
    795 /* Convert B to a SPFP value and insert it as element zero in A.  */
    796 /* APPLE LOCAL begin radar 5618945 */
    797 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    798 /* APPLE LOCAL end radar 5618945 */
    799 _mm_cvtsi32_ss (__m128 __A, int __B)
    800 {
    801   return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
    802 }
    803 
    804 /* APPLE LOCAL begin radar 5618945 */
    805 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    806 /* APPLE LOCAL end radar 5618945 */
    807 _mm_cvt_si2ss (__m128 __A, int __B)
    808 {
    809   return _mm_cvtsi32_ss (__A, __B);
    810 }
    811 
    812 #ifdef __x86_64__
    813 /* Convert B to a SPFP value and insert it as element zero in A.  */
    814 
    815 /* Intel intrinsic.  */
    816 /* APPLE LOCAL begin radar 5618945 */
    817 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    818 /* APPLE LOCAL end radar 5618945 */
    819 _mm_cvtsi64_ss (__m128 __A, long long __B)
    820 {
    821   return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
    822 }
    823 
    824 /* Microsoft intrinsic.  */
    825 /* APPLE LOCAL begin radar 5618945 */
    826 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    827 /* APPLE LOCAL end radar 5618945 */
    828 _mm_cvtsi64x_ss (__m128 __A, long long __B)
    829 {
    830   return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
    831 }
    832 #endif
    833 
    834 /* Convert the two 32-bit values in B to SPFP form and insert them
    835    as the two lower elements in A.  */
    836 /* APPLE LOCAL begin radar 5618945 */
    837 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    838 /* APPLE LOCAL end radar 5618945 */
    839 _mm_cvtpi32_ps (__m128 __A, __m64 __B)
    840 {
    841   return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B);
    842 }
    843 
    844 /* APPLE LOCAL begin radar 5618945 */
    845 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    846 /* APPLE LOCAL end radar 5618945 */
    847 _mm_cvt_pi2ps (__m128 __A, __m64 __B)
    848 {
    849   return _mm_cvtpi32_ps (__A, __B);
    850 }
    851 
    852 /* Convert the four signed 16-bit values in A to SPFP form.  */
    853 /* APPLE LOCAL begin radar 5618945 */
    854 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    855 /* APPLE LOCAL end radar 5618945 */
    856 _mm_cvtpi16_ps (__m64 __A)
    857 {
    858   __v4hi __sign;
    859   __v2si __hisi, __losi;
    860   __v4sf __r;
    861 
    862   /* This comparison against zero gives us a mask that can be used to
    863      fill in the missing sign bits in the unpack operations below, so
    864      that we get signed values after unpacking.  */
    865   __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A);
    866 
    867   /* Convert the four words to doublewords.  */
    868   __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign);
    869   __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign);
    870 
    871   /* Convert the doublewords to floating point two at a time.  */
    872   __r = (__v4sf) _mm_setzero_ps ();
    873   __r = __builtin_ia32_cvtpi2ps (__r, __hisi);
    874   __r = __builtin_ia32_movlhps (__r, __r);
    875   __r = __builtin_ia32_cvtpi2ps (__r, __losi);
    876 
    877   return (__m128) __r;
    878 }
    879 
    880 /* Convert the four unsigned 16-bit values in A to SPFP form.  */
    881 /* APPLE LOCAL begin radar 5618945 */
    882 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    883 /* APPLE LOCAL end radar 5618945 */
    884 _mm_cvtpu16_ps (__m64 __A)
    885 {
    886   __v2si __hisi, __losi;
    887   __v4sf __r;
    888 
    889   /* Convert the four words to doublewords.  */
    890   __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL);
    891   __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL);
    892 
    893   /* Convert the doublewords to floating point two at a time.  */
    894   __r = (__v4sf) _mm_setzero_ps ();
    895   __r = __builtin_ia32_cvtpi2ps (__r, __hisi);
    896   __r = __builtin_ia32_movlhps (__r, __r);
    897   __r = __builtin_ia32_cvtpi2ps (__r, __losi);
    898 
    899   return (__m128) __r;
    900 }
    901 
    902 /* Convert the low four signed 8-bit values in A to SPFP form.  */
    903 /* APPLE LOCAL begin radar 5618945 */
    904 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    905 /* APPLE LOCAL end radar 5618945 */
    906 _mm_cvtpi8_ps (__m64 __A)
    907 {
    908   __v8qi __sign;
    909 
    910   /* This comparison against zero gives us a mask that can be used to
    911      fill in the missing sign bits in the unpack operations below, so
    912      that we get signed values after unpacking.  */
    913   __sign = __builtin_ia32_pcmpgtb ((__v8qi)0LL, (__v8qi)__A);
    914 
    915   /* Convert the four low bytes to words.  */
    916   __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __sign);
    917 
    918   return _mm_cvtpi16_ps(__A);
    919 }
    920 
    921 /* Convert the low four unsigned 8-bit values in A to SPFP form.  */
    922 /* APPLE LOCAL begin radar 5618945 */
    923 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    924 /* APPLE LOCAL end radar 5618945 */
    925 _mm_cvtpu8_ps(__m64 __A)
    926 {
    927   __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL);
    928   return _mm_cvtpu16_ps(__A);
    929 }
    930 
    931 /* Convert the four signed 32-bit values in A and B to SPFP form.  */
    932 /* APPLE LOCAL begin radar 5618945 */
    933 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    934 /* APPLE LOCAL end radar 5618945 */
    935 _mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
    936 {
    937   __v4sf __zero = (__v4sf) _mm_setzero_ps ();
    938   __v4sf __sfa = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__A);
    939   __v4sf __sfb = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__B);
    940   return (__m128) __builtin_ia32_movlhps (__sfa, __sfb);
    941 }
    942 
    943 /* Convert the four SPFP values in A to four signed 16-bit integers.  */
    944 /* APPLE LOCAL begin radar 5618945 */
    945 __STATIC_INLINE __m64 __attribute__((__always_inline__))
    946 /* APPLE LOCAL end radar 5618945 */
    947 _mm_cvtps_pi16(__m128 __A)
    948 {
    949   __v4sf __hisf = (__v4sf)__A;
    950   __v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf);
    951   __v2si __hisi = __builtin_ia32_cvtps2pi (__hisf);
    952   __v2si __losi = __builtin_ia32_cvtps2pi (__losf);
    953   return (__m64) __builtin_ia32_packssdw (__hisi, __losi);
    954 }
    955 
    956 /* Convert the four SPFP values in A to four signed 8-bit integers.  */
    957 /* APPLE LOCAL begin radar 5618945 */
    958 __STATIC_INLINE __m64 __attribute__((__always_inline__))
    959 /* APPLE LOCAL end radar 5618945 */
    960 _mm_cvtps_pi8(__m128 __A)
    961 {
    962   __v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A);
    963   return (__m64) __builtin_ia32_packsswb (__tmp, (__v4hi)0LL);
    964 }
    965 
    966 /* Selects four specific SPFP values from A and B based on MASK.  */
    967 #if 0
    968 /* APPLE LOCAL begin radar 5618945 */
    969 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    970 /* APPLE LOCAL end radar 5618945 */
    971 _mm_shuffle_ps (__m128 __A, __m128 __B, int __mask)
    972 {
    973   return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask);
    974 }
    975 #else
    976 #define _mm_shuffle_ps(A, B, MASK) \
    977  ((__m128) __builtin_ia32_shufps ((__v4sf)(A), (__v4sf)(B), (MASK)))
    978 #endif
    979 
    980 
    981 /* Selects and interleaves the upper two SPFP values from A and B.  */
    982 /* APPLE LOCAL begin radar 5618945 */
    983 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    984 /* APPLE LOCAL end radar 5618945 */
    985 _mm_unpackhi_ps (__m128 __A, __m128 __B)
    986 {
    987   return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B);
    988 }
    989 
    990 /* Selects and interleaves the lower two SPFP values from A and B.  */
    991 /* APPLE LOCAL begin radar 5618945 */
    992 __STATIC_INLINE __m128 __attribute__((__always_inline__))
    993 /* APPLE LOCAL end radar 5618945 */
    994 _mm_unpacklo_ps (__m128 __A, __m128 __B)
    995 {
    996   return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B);
    997 }
    998 
    999 /* Sets the upper two SPFP values with 64-bits of data loaded from P;
   1000    the lower two values are passed through from A.  */
   1001 /* APPLE LOCAL begin radar 5618945 */
   1002 __STATIC_INLINE __m128 __attribute__((__always_inline__))
   1003 /* APPLE LOCAL end radar 5618945 */
   1004 _mm_loadh_pi (__m128 __A, __m64 const *__P)
   1005 {
   1006   return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (__v2si *)__P);
   1007 }
   1008 
   1009 /* Stores the upper two SPFP values of A into P.  */
   1010 /* APPLE LOCAL begin radar 5618945 */
   1011 __STATIC_INLINE void __attribute__((__always_inline__))
   1012 /* APPLE LOCAL end radar 5618945 */
   1013 _mm_storeh_pi (__m64 *__P, __m128 __A)
   1014 {
   1015   __builtin_ia32_storehps ((__v2si *)__P, (__v4sf)__A);
   1016 }
   1017 
   1018 /* Moves the upper two values of B into the lower two values of A.  */
   1019 /* APPLE LOCAL begin radar 5618945 */
   1020 __STATIC_INLINE __m128 __attribute__((__always_inline__))
   1021 /* APPLE LOCAL end radar 5618945 */
   1022 _mm_movehl_ps (__m128 __A, __m128 __B)
   1023 {
   1024   return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B);
   1025 }
   1026 
   1027 /* Moves the lower two values of B into the upper two values of A.  */
   1028 /* APPLE LOCAL begin radar 5618945 */
   1029 __STATIC_INLINE __m128 __attribute__((__always_inline__))
   1030 /* APPLE LOCAL end radar 5618945 */
   1031 _mm_movelh_ps (__m128 __A, __m128 __B)
   1032 {
   1033   return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B);
   1034 }
   1035 
   1036 /* Sets the lower two SPFP values with 64-bits of data loaded from P;
   1037    the upper two values are passed through from A.  */
   1038 /* APPLE LOCAL begin radar 5618945 */
   1039 __STATIC_INLINE __m128 __attribute__((__always_inline__))
   1040 /* APPLE LOCAL end radar 5618945 */
   1041 _mm_loadl_pi (__m128 __A, __m64 const *__P)
   1042 {
   1043   return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (__v2si *)__P);
   1044 }
   1045 
   1046 /* Stores the lower two SPFP values of A into P.  */
   1047 /* APPLE LOCAL begin radar 5618945 */
   1048 __STATIC_INLINE void __attribute__((__always_inline__))
   1049 /* APPLE LOCAL end radar 5618945 */
   1050 _mm_storel_pi (__m64 *__P, __m128 __A)
   1051 {
   1052   __builtin_ia32_storelps ((__v2si *)__P, (__v4sf)__A);
   1053 }
   1054 
   1055 /* Creates a 4-bit mask from the most significant bits of the SPFP values.  */
   1056 /* APPLE LOCAL begin radar 5618945 */
   1057 __STATIC_INLINE int __attribute__((__always_inline__))
   1058 /* APPLE LOCAL end radar 5618945 */
   1059 _mm_movemask_ps (__m128 __A)
   1060 {
   1061   return __builtin_ia32_movmskps ((__v4sf)__A);
   1062 }
   1063 
   1064 /* Return the contents of the control register.  */
   1065 /* APPLE LOCAL begin radar 5618945 */
   1066 __STATIC_INLINE unsigned int __attribute__((__always_inline__))
   1067 /* APPLE LOCAL end radar 5618945 */
   1068 _mm_getcsr (void)
   1069 {
   1070   return __builtin_ia32_stmxcsr ();
   1071 }
   1072 
   1073 /* Read exception bits from the control register.  */
   1074 /* APPLE LOCAL begin radar 5618945 */
   1075 __STATIC_INLINE unsigned int __attribute__((__always_inline__))
   1076 /* APPLE LOCAL end radar 5618945 */
   1077 _MM_GET_EXCEPTION_STATE (void)
   1078 {
   1079   return _mm_getcsr() & _MM_EXCEPT_MASK;
   1080 }
   1081 
   1082 /* APPLE LOCAL begin radar 5618945 */
   1083 __STATIC_INLINE unsigned int __attribute__((__always_inline__))
   1084 /* APPLE LOCAL end radar 5618945 */
   1085 _MM_GET_EXCEPTION_MASK (void)
   1086 {
   1087   return _mm_getcsr() & _MM_MASK_MASK;
   1088 }
   1089 
   1090 /* APPLE LOCAL begin radar 5618945 */
   1091 __STATIC_INLINE unsigned int __attribute__((__always_inline__))
   1092 /* APPLE LOCAL end radar 5618945 */
   1093 _MM_GET_ROUNDING_MODE (void)
   1094 {
   1095   return _mm_getcsr() & _MM_ROUND_MASK;
   1096 }
   1097 
   1098 /* APPLE LOCAL begin radar 5618945 */
   1099 __STATIC_INLINE unsigned int __attribute__((__always_inline__))
   1100 /* APPLE LOCAL end radar 5618945 */
   1101 _MM_GET_FLUSH_ZERO_MODE (void)
   1102 {
   1103   return _mm_getcsr() & _MM_FLUSH_ZERO_MASK;
   1104 }
   1105 
   1106 /* Set the control register to I.  */
   1107 /* APPLE LOCAL begin radar 5618945 */
   1108 __STATIC_INLINE void __attribute__((__always_inline__))
   1109 /* APPLE LOCAL end radar 5618945 */
   1110 _mm_setcsr (unsigned int __I)
   1111 {
   1112   __builtin_ia32_ldmxcsr (__I);
   1113 }
   1114 
   1115 /* Set exception bits in the control register.  */
   1116 /* APPLE LOCAL begin radar 5618945 */
   1117 __STATIC_INLINE void __attribute__((__always_inline__))
   1118 /* APPLE LOCAL end radar 5618945 */
   1119 _MM_SET_EXCEPTION_STATE(unsigned int __mask)
   1120 {
   1121   _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask);
   1122 }
   1123 
   1124 /* APPLE LOCAL begin radar 5618945 */
   1125 __STATIC_INLINE void __attribute__((__always_inline__))
   1126 /* APPLE LOCAL end radar 5618945 */
   1127 _MM_SET_EXCEPTION_MASK (unsigned int __mask)
   1128 {
   1129   _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask);
   1130 }
   1131 
   1132 /* APPLE LOCAL begin radar 5618945 */
   1133 __STATIC_INLINE void __attribute__((__always_inline__))
   1134 /* APPLE LOCAL end radar 5618945 */
   1135 _MM_SET_ROUNDING_MODE (unsigned int __mode)
   1136 {
   1137   _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | __mode);
   1138 }
   1139 
   1140 /* APPLE LOCAL begin radar 5618945 */
   1141 __STATIC_INLINE void __attribute__((__always_inline__))
   1142 /* APPLE LOCAL end radar 5618945 */
   1143 _MM_SET_FLUSH_ZERO_MODE (unsigned int __mode)
   1144 {
   1145   _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode);
   1146 }
   1147 
   1148 /* Create a vector with element 0 as F and the rest zero.  */
   1149 /* APPLE LOCAL begin radar 5618945 */
   1150 __STATIC_INLINE __m128 __attribute__((__always_inline__))
   1151 /* APPLE LOCAL end radar 5618945 */
   1152 _mm_set_ss (float __F)
   1153 {
   1154   return __extension__ (__m128)(__v4sf){ __F, 0, 0, 0 };
   1155 }
   1156 
   1157 /* Create a vector with all four elements equal to F.  */
   1158 /* APPLE LOCAL begin radar 5618945 */
   1159 __STATIC_INLINE __m128 __attribute__((__always_inline__))
   1160 /* APPLE LOCAL end radar 5618945 */
   1161 _mm_set1_ps (float __F)
   1162 {
   1163   return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
   1164 }
   1165 
   1166 /* APPLE LOCAL begin radar 5618945 */
   1167 __STATIC_INLINE __m128 __attribute__((__always_inline__))
   1168 /* APPLE LOCAL end radar 5618945 */
   1169 _mm_set_ps1 (float __F)
   1170 {
   1171   return _mm_set1_ps (__F);
   1172 }
   1173 
   1174 /* Create a vector with element 0 as *P and the rest zero.  */
   1175 /* APPLE LOCAL begin radar 5618945 */
   1176 __STATIC_INLINE __m128 __attribute__((__always_inline__))
   1177 /* APPLE LOCAL end radar 5618945 */
   1178 _mm_load_ss (float const *__P)
   1179 {
   1180   return _mm_set_ss (*__P);
   1181 }
   1182 
   1183 /* Create a vector with all four elements equal to *P.  */
   1184 /* APPLE LOCAL begin radar 5618945 */
   1185 __STATIC_INLINE __m128 __attribute__((__always_inline__))
   1186 /* APPLE LOCAL end radar 5618945 */
   1187 _mm_load1_ps (float const *__P)
   1188 {
   1189   return _mm_set1_ps (*__P);
   1190 }
   1191 
   1192 /* APPLE LOCAL begin radar 5618945 */
   1193 __STATIC_INLINE __m128 __attribute__((__always_inline__))
   1194 /* APPLE LOCAL end radar 5618945 */
   1195 _mm_load_ps1 (float const *__P)
   1196 {
   1197   return _mm_load1_ps (__P);
   1198 }
   1199 
   1200 /* Load four SPFP values from P.  The address must be 16-byte aligned.  */
   1201 /* APPLE LOCAL begin radar 5618945 */
   1202 __STATIC_INLINE __m128 __attribute__((__always_inline__))
   1203 /* APPLE LOCAL end radar 5618945 */
   1204 _mm_load_ps (float const *__P)
   1205 {
   1206   return (__m128) *(__v4sf *)__P;
   1207 }
   1208 
   1209 /* Load four SPFP values from P.  The address need not be 16-byte aligned.  */
   1210 /* APPLE LOCAL begin radar 5618945 */
   1211 __STATIC_INLINE __m128 __attribute__((__always_inline__))
   1212 /* APPLE LOCAL end radar 5618945 */
   1213 _mm_loadu_ps (float const *__P)
   1214 {
   1215   return (__m128) __builtin_ia32_loadups (__P);
   1216 }
   1217 
   1218 /* Load four SPFP values in reverse order.  The address must be aligned.  */
   1219 /* APPLE LOCAL begin radar 5618945 */
   1220 __STATIC_INLINE __m128 __attribute__((__always_inline__))
   1221 /* APPLE LOCAL end radar 5618945 */
   1222 _mm_loadr_ps (float const *__P)
   1223 {
   1224   __v4sf __tmp = *(__v4sf *)__P;
   1225   return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,1,2,3));
   1226 }
   1227 
   1228 /* Create the vector [Z Y X W].  */
   1229 /* APPLE LOCAL begin radar 5618945 */
   1230 __STATIC_INLINE __m128 __attribute__((__always_inline__))
   1231 /* APPLE LOCAL end radar 5618945 */
   1232 _mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
   1233 {
   1234   return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
   1235 }
   1236 
   1237 /* Create the vector [W X Y Z].  */
   1238 /* APPLE LOCAL begin radar 5618945 */
   1239 __STATIC_INLINE __m128 __attribute__((__always_inline__))
   1240 /* APPLE LOCAL end radar 5618945 */
   1241 _mm_setr_ps (float __Z, float __Y, float __X, float __W)
   1242 {
   1243   return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W };
   1244 }
   1245 
   1246 /* Stores the lower SPFP value.  */
   1247 /* APPLE LOCAL begin radar 5618945 */
   1248 __STATIC_INLINE void __attribute__((__always_inline__))
   1249 /* APPLE LOCAL end radar 5618945 */
   1250 _mm_store_ss (float *__P, __m128 __A)
   1251 {
   1252   *__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
   1253 }
   1254 
   1255 /* APPLE LOCAL begin radar 5618945 */
   1256 __STATIC_INLINE float __attribute__((__always_inline__))
   1257 /* APPLE LOCAL end radar 5618945 */
   1258 _mm_cvtss_f32 (__m128 __A)
   1259 {
   1260   return __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
   1261 }
   1262 
   1263 /* Store four SPFP values.  The address must be 16-byte aligned.  */
   1264 /* APPLE LOCAL begin radar 5618945 */
   1265 __STATIC_INLINE void __attribute__((__always_inline__))
   1266 /* APPLE LOCAL end radar 5618945 */
   1267 _mm_store_ps (float *__P, __m128 __A)
   1268 {
   1269   *(__v4sf *)__P = (__v4sf)__A;
   1270 }
   1271 
   1272 /* Store four SPFP values.  The address need not be 16-byte aligned.  */
   1273 /* APPLE LOCAL begin radar 5618945 */
   1274 __STATIC_INLINE void __attribute__((__always_inline__))
   1275 /* APPLE LOCAL end radar 5618945 */
   1276 _mm_storeu_ps (float *__P, __m128 __A)
   1277 {
   1278   __builtin_ia32_storeups (__P, (__v4sf)__A);
   1279 }
   1280 
   1281 /* Store the lower SPFP value across four words.  */
   1282 /* APPLE LOCAL begin radar 5618945 */
   1283 __STATIC_INLINE void __attribute__((__always_inline__))
   1284 /* APPLE LOCAL end radar 5618945 */
   1285 _mm_store1_ps (float *__P, __m128 __A)
   1286 {
   1287   __v4sf __va = (__v4sf)__A;
   1288   __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,0,0,0));
   1289   _mm_storeu_ps (__P, __tmp);
   1290 }
   1291 
   1292 /* APPLE LOCAL begin radar 5618945 */
   1293 __STATIC_INLINE void __attribute__((__always_inline__))
   1294 /* APPLE LOCAL end radar 5618945 */
   1295 _mm_store_ps1 (float *__P, __m128 __A)
   1296 {
   1297   _mm_store1_ps (__P, __A);
   1298 }
   1299 
   1300 /* Store four SPFP values in reverse order.  The address must be aligned.  */
   1301 /* APPLE LOCAL begin radar 5618945 */
   1302 __STATIC_INLINE void __attribute__((__always_inline__))
   1303 /* APPLE LOCAL end radar 5618945 */
   1304 _mm_storer_ps (float *__P, __m128 __A)
   1305 {
   1306   __v4sf __va = (__v4sf)__A;
   1307   __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,1,2,3));
   1308   _mm_store_ps (__P, __tmp);
   1309 }
   1310 
   1311 /* Sets the low SPFP value of A from the low value of B.  */
   1312 /* APPLE LOCAL begin radar 5618945 */
   1313 __STATIC_INLINE __m128 __attribute__((__always_inline__))
   1314 /* APPLE LOCAL end radar 5618945 */
   1315 _mm_move_ss (__m128 __A, __m128 __B)
   1316 {
   1317   return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B);
   1318 }
   1319 
   1320 /* Extracts one of the four words of A.  The selector N must be immediate.  */
   1321 #if 0
   1322 /* APPLE LOCAL begin radar 5618945 */
   1323 __STATIC_INLINE int __attribute__((__always_inline__))
   1324 /* APPLE LOCAL end radar 5618945 */
   1325 _mm_extract_pi16 (__m64 const __A, int const __N)
   1326 {
   1327   return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
   1328 }
   1329 
   1330 /* APPLE LOCAL begin radar 5618945 */
   1331 __STATIC_INLINE int __attribute__((__always_inline__))
   1332 /* APPLE LOCAL end radar 5618945 */
   1333 _m_pextrw (__m64 const __A, int const __N)
   1334 {
   1335   return _mm_extract_pi16 (__A, __N);
   1336 }
   1337 #else
   1338 #define _mm_extract_pi16(A, N)	__builtin_ia32_vec_ext_v4hi ((__v4hi)(A), (N))
   1339 #define _m_pextrw(A, N)		_mm_extract_pi16((A), (N))
   1340 #endif
   1341 
   1342 /* Inserts word D into one of four words of A.  The selector N must be
   1343    immediate.  */
   1344 #if 0
   1345 /* APPLE LOCAL begin radar 5618945 */
   1346 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1347 /* APPLE LOCAL end radar 5618945 */
   1348 _mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
   1349 {
   1350   return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N);
   1351 }
   1352 
   1353 /* APPLE LOCAL begin radar 5618945 */
   1354 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1355 /* APPLE LOCAL end radar 5618945 */
   1356 _m_pinsrw (__m64 const __A, int const __D, int const __N)
   1357 {
   1358   return _mm_insert_pi16 (__A, __D, __N);
   1359 }
   1360 #else
   1361 #define _mm_insert_pi16(A, D, N) \
   1362   ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(A), (D), (N)))
   1363 #define _m_pinsrw(A, D, N)	 _mm_insert_pi16((A), (D), (N))
   1364 #endif
   1365 
   1366 /* Compute the element-wise maximum of signed 16-bit values.  */
   1367 /* APPLE LOCAL begin radar 5618945 */
   1368 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1369 /* APPLE LOCAL end radar 5618945 */
   1370 _mm_max_pi16 (__m64 __A, __m64 __B)
   1371 {
   1372   return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B);
   1373 }
   1374 
   1375 /* APPLE LOCAL begin radar 5618945 */
   1376 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1377 /* APPLE LOCAL end radar 5618945 */
   1378 _m_pmaxsw (__m64 __A, __m64 __B)
   1379 {
   1380   return _mm_max_pi16 (__A, __B);
   1381 }
   1382 
   1383 /* Compute the element-wise maximum of unsigned 8-bit values.  */
   1384 /* APPLE LOCAL begin radar 5618945 */
   1385 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1386 /* APPLE LOCAL end radar 5618945 */
   1387 _mm_max_pu8 (__m64 __A, __m64 __B)
   1388 {
   1389   return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B);
   1390 }
   1391 
   1392 /* APPLE LOCAL begin radar 5618945 */
   1393 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1394 /* APPLE LOCAL end radar 5618945 */
   1395 _m_pmaxub (__m64 __A, __m64 __B)
   1396 {
   1397   return _mm_max_pu8 (__A, __B);
   1398 }
   1399 
   1400 /* Compute the element-wise minimum of signed 16-bit values.  */
   1401 /* APPLE LOCAL begin radar 5618945 */
   1402 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1403 /* APPLE LOCAL end radar 5618945 */
   1404 _mm_min_pi16 (__m64 __A, __m64 __B)
   1405 {
   1406   return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B);
   1407 }
   1408 
   1409 /* APPLE LOCAL begin radar 5618945 */
   1410 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1411 /* APPLE LOCAL end radar 5618945 */
   1412 _m_pminsw (__m64 __A, __m64 __B)
   1413 {
   1414   return _mm_min_pi16 (__A, __B);
   1415 }
   1416 
   1417 /* Compute the element-wise minimum of unsigned 8-bit values.  */
   1418 /* APPLE LOCAL begin radar 5618945 */
   1419 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1420 /* APPLE LOCAL end radar 5618945 */
   1421 _mm_min_pu8 (__m64 __A, __m64 __B)
   1422 {
   1423   return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B);
   1424 }
   1425 
   1426 /* APPLE LOCAL begin radar 5618945 */
   1427 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1428 /* APPLE LOCAL end radar 5618945 */
   1429 _m_pminub (__m64 __A, __m64 __B)
   1430 {
   1431   return _mm_min_pu8 (__A, __B);
   1432 }
   1433 
   1434 /* Create an 8-bit mask of the signs of 8-bit values.  */
   1435 /* APPLE LOCAL begin radar 5618945 */
   1436 __STATIC_INLINE int __attribute__((__always_inline__))
   1437 /* APPLE LOCAL end radar 5618945 */
   1438 _mm_movemask_pi8 (__m64 __A)
   1439 {
   1440   return __builtin_ia32_pmovmskb ((__v8qi)__A);
   1441 }
   1442 
   1443 /* APPLE LOCAL begin radar 5618945 */
   1444 __STATIC_INLINE int __attribute__((__always_inline__))
   1445 /* APPLE LOCAL end radar 5618945 */
   1446 _m_pmovmskb (__m64 __A)
   1447 {
   1448   return _mm_movemask_pi8 (__A);
   1449 }
   1450 
   1451 /* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values
   1452    in B and produce the high 16 bits of the 32-bit results.  */
   1453 /* APPLE LOCAL begin radar 5618945 */
   1454 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1455 /* APPLE LOCAL end radar 5618945 */
   1456 _mm_mulhi_pu16 (__m64 __A, __m64 __B)
   1457 {
   1458   return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B);
   1459 }
   1460 
   1461 /* APPLE LOCAL begin radar 5618945 */
   1462 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1463 /* APPLE LOCAL end radar 5618945 */
   1464 _m_pmulhuw (__m64 __A, __m64 __B)
   1465 {
   1466   return _mm_mulhi_pu16 (__A, __B);
   1467 }
   1468 
   1469 /* Return a combination of the four 16-bit values in A.  The selector
   1470    must be an immediate.  */
   1471 #if 0
   1472 /* APPLE LOCAL begin radar 5618945 */
   1473 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1474 /* APPLE LOCAL end radar 5618945 */
   1475 _mm_shuffle_pi16 (__m64 __A, int __N)
   1476 {
   1477   return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N);
   1478 }
   1479 
   1480 /* APPLE LOCAL begin radar 5618945 */
   1481 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1482 /* APPLE LOCAL end radar 5618945 */
   1483 _m_pshufw (__m64 __A, int __N)
   1484 {
   1485   return _mm_shuffle_pi16 (__A, __N);
   1486 }
   1487 #else
   1488 #define _mm_shuffle_pi16(A, N) \
   1489   ((__m64) __builtin_ia32_pshufw ((__v4hi)(A), (N)))
   1490 #define _m_pshufw(A, N)		_mm_shuffle_pi16 ((A), (N))
   1491 #endif
   1492 
   1493 /* Conditionally store byte elements of A into P.  The high bit of each
   1494    byte in the selector N determines whether the corresponding byte from
   1495    A is stored.  */
   1496 /* APPLE LOCAL begin radar 5618945 */
   1497 __STATIC_INLINE void __attribute__((__always_inline__))
   1498 /* APPLE LOCAL end radar 5618945 */
   1499 _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
   1500 {
   1501   __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
   1502 }
   1503 
   1504 /* APPLE LOCAL begin radar 5618945 */
   1505 __STATIC_INLINE void __attribute__((__always_inline__))
   1506 /* APPLE LOCAL end radar 5618945 */
   1507 _m_maskmovq (__m64 __A, __m64 __N, char *__P)
   1508 {
   1509   _mm_maskmove_si64 (__A, __N, __P);
   1510 }
   1511 
   1512 /* Compute the rounded averages of the unsigned 8-bit values in A and B.  */
   1513 /* APPLE LOCAL begin radar 5618945 */
   1514 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1515 /* APPLE LOCAL end radar 5618945 */
   1516 _mm_avg_pu8 (__m64 __A, __m64 __B)
   1517 {
   1518   return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B);
   1519 }
   1520 
   1521 /* APPLE LOCAL begin radar 5618945 */
   1522 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1523 /* APPLE LOCAL end radar 5618945 */
   1524 _m_pavgb (__m64 __A, __m64 __B)
   1525 {
   1526   return _mm_avg_pu8 (__A, __B);
   1527 }
   1528 
   1529 /* Compute the rounded averages of the unsigned 16-bit values in A and B.  */
   1530 /* APPLE LOCAL begin radar 5618945 */
   1531 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1532 /* APPLE LOCAL end radar 5618945 */
   1533 _mm_avg_pu16 (__m64 __A, __m64 __B)
   1534 {
   1535   return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B);
   1536 }
   1537 
   1538 /* APPLE LOCAL begin radar 5618945 */
   1539 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1540 /* APPLE LOCAL end radar 5618945 */
   1541 _m_pavgw (__m64 __A, __m64 __B)
   1542 {
   1543   return _mm_avg_pu16 (__A, __B);
   1544 }
   1545 
   1546 /* Compute the sum of the absolute differences of the unsigned 8-bit
   1547    values in A and B.  Return the value in the lower 16-bit word; the
   1548    upper words are cleared.  */
   1549 /* APPLE LOCAL begin radar 5618945 */
   1550 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1551 /* APPLE LOCAL end radar 5618945 */
   1552 _mm_sad_pu8 (__m64 __A, __m64 __B)
   1553 {
   1554   return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B);
   1555 }
   1556 
   1557 /* APPLE LOCAL begin radar 5618945 */
   1558 __STATIC_INLINE __m64 __attribute__((__always_inline__))
   1559 /* APPLE LOCAL end radar 5618945 */
   1560 _m_psadbw (__m64 __A, __m64 __B)
   1561 {
   1562   return _mm_sad_pu8 (__A, __B);
   1563 }
   1564 
   1565 /* Loads one cache line from address P to a location "closer" to the
   1566    processor.  The selector I specifies the type of prefetch operation.  */
   1567 #if 0
   1568 /* APPLE LOCAL begin radar 5618945 */
   1569 __STATIC_INLINE void __attribute__((__always_inline__))
   1570 /* APPLE LOCAL end radar 5618945 */
   1571 _mm_prefetch (void *__P, enum _mm_hint __I)
   1572 {
   1573   __builtin_prefetch (__P, 0, __I);
   1574 }
   1575 #else
   1576 #define _mm_prefetch(P, I) \
   1577   __builtin_prefetch ((P), 0, (I))
   1578 #endif
   1579 
   1580 /* Stores the data in A to the address P without polluting the caches.  */
   1581 /* APPLE LOCAL begin radar 5618945 */
   1582 __STATIC_INLINE void __attribute__((__always_inline__))
   1583 /* APPLE LOCAL end radar 5618945 */
   1584 _mm_stream_pi (__m64 *__P, __m64 __A)
   1585 {
   1586   /* APPLE LOCAL 4656532 use V1DImode for _m64 */
   1587   __builtin_ia32_movntq (__P, __A);
   1588 }
   1589 
   1590 /* Likewise.  The address must be 16-byte aligned.  */
   1591 /* APPLE LOCAL begin radar 5618945 */
   1592 __STATIC_INLINE void __attribute__((__always_inline__))
   1593 /* APPLE LOCAL end radar 5618945 */
   1594 _mm_stream_ps (float *__P, __m128 __A)
   1595 {
   1596   __builtin_ia32_movntps (__P, (__v4sf)__A);
   1597 }
   1598 
   1599 /* Guarantees that every preceding store is globally visible before
   1600    any subsequent store.  */
   1601 /* APPLE LOCAL begin radar 5618945 */
   1602 __STATIC_INLINE void __attribute__((__always_inline__))
   1603 /* APPLE LOCAL end radar 5618945 */
   1604 _mm_sfence (void)
   1605 {
   1606   __builtin_ia32_sfence ();
   1607 }
   1608 
   1609 /* The execution of the next instruction is delayed by an implementation
   1610    specific amount of time.  The instruction does not modify the
   1611    architectural state.  */
   1612 /* APPLE LOCAL begin radar 5618945 */
   1613 __STATIC_INLINE void __attribute__((__always_inline__))
   1614 /* APPLE LOCAL end radar 5618945 */
   1615 _mm_pause (void)
   1616 {
   1617   __asm__ __volatile__ ("rep; nop" : : );
   1618 }
   1619 /* APPLE LOCAL end radar 4152603 */
   1620 
   1621 /* Transpose the 4x4 matrix composed of row[0-3].  */
   1622 #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3)			\
   1623 do {									\
   1624   __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3);	\
   1625   __v4sf __t0 = __builtin_ia32_unpcklps (__r0, __r1);			\
   1626   __v4sf __t1 = __builtin_ia32_unpcklps (__r2, __r3);			\
   1627   __v4sf __t2 = __builtin_ia32_unpckhps (__r0, __r1);			\
   1628   __v4sf __t3 = __builtin_ia32_unpckhps (__r2, __r3);			\
   1629   (row0) = __builtin_ia32_movlhps (__t0, __t1);				\
   1630   (row1) = __builtin_ia32_movhlps (__t1, __t0);				\
   1631   (row2) = __builtin_ia32_movlhps (__t2, __t3);				\
   1632   (row3) = __builtin_ia32_movhlps (__t3, __t2);				\
   1633 } while (0)
   1634 
   1635 /* APPLE LOCAL begin nodebug inline 4152603 */
   1636 #undef __always_inline__
   1637 /* APPLE LOCAL end nodebug inline 4152603 */
   1638 
   1639 /* For backward source compatibility.  */
   1640 #include <emmintrin.h>
   1641 
   1642 #endif /* __SSE__ */
   1643 #endif /* _XMMINTRIN_H_INCLUDED */
   1644