Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=AVX
      4 
      5 define <2 x double> @floor_v2f64(<2 x double> %p) {
      6 ; SSE41-LABEL: floor_v2f64:
      7 ; SSE41:       ## BB#0:
      8 ; SSE41-NEXT:    roundpd $9, %xmm0, %xmm0
      9 ; SSE41-NEXT:    retq
     10 ;
     11 ; AVX-LABEL: floor_v2f64:
     12 ; AVX:       ## BB#0:
     13 ; AVX-NEXT:    vroundpd $9, %xmm0, %xmm0
     14 ; AVX-NEXT:    retq
     15   %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
     16   ret <2 x double> %t
     17 }
     18 declare <2 x double> @llvm.floor.v2f64(<2 x double> %p)
     19 
     20 define <4 x float> @floor_v4f32(<4 x float> %p) {
     21 ; SSE41-LABEL: floor_v4f32:
     22 ; SSE41:       ## BB#0:
     23 ; SSE41-NEXT:    roundps $9, %xmm0, %xmm0
     24 ; SSE41-NEXT:    retq
     25 ;
     26 ; AVX-LABEL: floor_v4f32:
     27 ; AVX:       ## BB#0:
     28 ; AVX-NEXT:    vroundps $9, %xmm0, %xmm0
     29 ; AVX-NEXT:    retq
     30   %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
     31   ret <4 x float> %t
     32 }
     33 declare <4 x float> @llvm.floor.v4f32(<4 x float> %p)
     34 
     35 define <4 x double> @floor_v4f64(<4 x double> %p){
     36 ; SSE41-LABEL: floor_v4f64:
     37 ; SSE41:       ## BB#0:
     38 ; SSE41-NEXT:    roundpd $9, %xmm0, %xmm0
     39 ; SSE41-NEXT:    roundpd $9, %xmm1, %xmm1
     40 ; SSE41-NEXT:    retq
     41 ;
     42 ; AVX-LABEL: floor_v4f64:
     43 ; AVX:       ## BB#0:
     44 ; AVX-NEXT:    vroundpd $9, %ymm0, %ymm0
     45 ; AVX-NEXT:    retq
     46   %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
     47   ret <4 x double> %t
     48 }
     49 declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
     50 
     51 define <8 x float> @floor_v8f32(<8 x float> %p) {
     52 ; SSE41-LABEL: floor_v8f32:
     53 ; SSE41:       ## BB#0:
     54 ; SSE41-NEXT:    roundps $9, %xmm0, %xmm0
     55 ; SSE41-NEXT:    roundps $9, %xmm1, %xmm1
     56 ; SSE41-NEXT:    retq
     57 ;
     58 ; AVX-LABEL: floor_v8f32:
     59 ; AVX:       ## BB#0:
     60 ; AVX-NEXT:    vroundps $9, %ymm0, %ymm0
     61 ; AVX-NEXT:    retq
     62   %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
     63   ret <8 x float> %t
     64 }
     65 declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
     66 
     67 define <2 x double> @ceil_v2f64(<2 x double> %p) {
     68 ; SSE41-LABEL: ceil_v2f64:
     69 ; SSE41:       ## BB#0:
     70 ; SSE41-NEXT:    roundpd $10, %xmm0, %xmm0
     71 ; SSE41-NEXT:    retq
     72 ;
     73 ; AVX-LABEL: ceil_v2f64:
     74 ; AVX:       ## BB#0:
     75 ; AVX-NEXT:    vroundpd $10, %xmm0, %xmm0
     76 ; AVX-NEXT:    retq
     77   %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
     78   ret <2 x double> %t
     79 }
     80 declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
     81 
     82 define <4 x float> @ceil_v4f32(<4 x float> %p) {
     83 ; SSE41-LABEL: ceil_v4f32:
     84 ; SSE41:       ## BB#0:
     85 ; SSE41-NEXT:    roundps $10, %xmm0, %xmm0
     86 ; SSE41-NEXT:    retq
     87 ;
     88 ; AVX-LABEL: ceil_v4f32:
     89 ; AVX:       ## BB#0:
     90 ; AVX-NEXT:    vroundps $10, %xmm0, %xmm0
     91 ; AVX-NEXT:    retq
     92   %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
     93   ret <4 x float> %t
     94 }
     95 declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
     96 
     97 define <4 x double> @ceil_v4f64(<4 x double> %p) {
     98 ; SSE41-LABEL: ceil_v4f64:
     99 ; SSE41:       ## BB#0:
    100 ; SSE41-NEXT:    roundpd $10, %xmm0, %xmm0
    101 ; SSE41-NEXT:    roundpd $10, %xmm1, %xmm1
    102 ; SSE41-NEXT:    retq
    103 ;
    104 ; AVX-LABEL: ceil_v4f64:
    105 ; AVX:       ## BB#0:
    106 ; AVX-NEXT:    vroundpd $10, %ymm0, %ymm0
    107 ; AVX-NEXT:    retq
    108   %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
    109   ret <4 x double> %t
    110 }
    111 declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
    112 
    113 define <8 x float> @ceil_v8f32(<8 x float> %p) {
    114 ; SSE41-LABEL: ceil_v8f32:
    115 ; SSE41:       ## BB#0:
    116 ; SSE41-NEXT:    roundps $10, %xmm0, %xmm0
    117 ; SSE41-NEXT:    roundps $10, %xmm1, %xmm1
    118 ; SSE41-NEXT:    retq
    119 ;
    120 ; AVX-LABEL: ceil_v8f32:
    121 ; AVX:       ## BB#0:
    122 ; AVX-NEXT:    vroundps $10, %ymm0, %ymm0
    123 ; AVX-NEXT:    retq
    124   %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
    125   ret <8 x float> %t
    126 }
    127 declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
    128 
    129 define <2 x double> @trunc_v2f64(<2 x double> %p) {
    130 ; SSE41-LABEL: trunc_v2f64:
    131 ; SSE41:       ## BB#0:
    132 ; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
    133 ; SSE41-NEXT:    retq
    134 ;
    135 ; AVX-LABEL: trunc_v2f64:
    136 ; AVX:       ## BB#0:
    137 ; AVX-NEXT:    vroundpd $11, %xmm0, %xmm0
    138 ; AVX-NEXT:    retq
    139   %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
    140   ret <2 x double> %t
    141 }
    142 declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
    143 
    144 define <4 x float> @trunc_v4f32(<4 x float> %p) {
    145 ; SSE41-LABEL: trunc_v4f32:
    146 ; SSE41:       ## BB#0:
    147 ; SSE41-NEXT:    roundps $11, %xmm0, %xmm0
    148 ; SSE41-NEXT:    retq
    149 ;
    150 ; AVX-LABEL: trunc_v4f32:
    151 ; AVX:       ## BB#0:
    152 ; AVX-NEXT:    vroundps $11, %xmm0, %xmm0
    153 ; AVX-NEXT:    retq
    154   %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
    155   ret <4 x float> %t
    156 }
    157 declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
    158 
    159 define <4 x double> @trunc_v4f64(<4 x double> %p) {
    160 ; SSE41-LABEL: trunc_v4f64:
    161 ; SSE41:       ## BB#0:
    162 ; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
    163 ; SSE41-NEXT:    roundpd $11, %xmm1, %xmm1
    164 ; SSE41-NEXT:    retq
    165 ;
    166 ; AVX-LABEL: trunc_v4f64:
    167 ; AVX:       ## BB#0:
    168 ; AVX-NEXT:    vroundpd $11, %ymm0, %ymm0
    169 ; AVX-NEXT:    retq
    170   %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
    171   ret <4 x double> %t
    172 }
    173 declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
    174 
    175 define <8 x float> @trunc_v8f32(<8 x float> %p) {
    176 ; SSE41-LABEL: trunc_v8f32:
    177 ; SSE41:       ## BB#0:
    178 ; SSE41-NEXT:    roundps $11, %xmm0, %xmm0
    179 ; SSE41-NEXT:    roundps $11, %xmm1, %xmm1
    180 ; SSE41-NEXT:    retq
    181 ;
    182 ; AVX-LABEL: trunc_v8f32:
    183 ; AVX:       ## BB#0:
    184 ; AVX-NEXT:    vroundps $11, %ymm0, %ymm0
    185 ; AVX-NEXT:    retq
    186   %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
    187   ret <8 x float> %t
    188 }
    189 declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
    190 
    191 define <2 x double> @rint_v2f64(<2 x double> %p) {
    192 ; SSE41-LABEL: rint_v2f64:
    193 ; SSE41:       ## BB#0:
    194 ; SSE41-NEXT:    roundpd $4, %xmm0, %xmm0
    195 ; SSE41-NEXT:    retq
    196 ;
    197 ; AVX-LABEL: rint_v2f64:
    198 ; AVX:       ## BB#0:
    199 ; AVX-NEXT:    vroundpd $4, %xmm0, %xmm0
    200 ; AVX-NEXT:    retq
    201   %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
    202   ret <2 x double> %t
    203 }
    204 declare <2 x double> @llvm.rint.v2f64(<2 x double> %p)
    205 
    206 define <4 x float> @rint_v4f32(<4 x float> %p) {
    207 ; SSE41-LABEL: rint_v4f32:
    208 ; SSE41:       ## BB#0:
    209 ; SSE41-NEXT:    roundps $4, %xmm0, %xmm0
    210 ; SSE41-NEXT:    retq
    211 ;
    212 ; AVX-LABEL: rint_v4f32:
    213 ; AVX:       ## BB#0:
    214 ; AVX-NEXT:    vroundps $4, %xmm0, %xmm0
    215 ; AVX-NEXT:    retq
    216   %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
    217   ret <4 x float> %t
    218 }
    219 declare <4 x float> @llvm.rint.v4f32(<4 x float> %p)
    220 
    221 define <4 x double> @rint_v4f64(<4 x double> %p) {
    222 ; SSE41-LABEL: rint_v4f64:
    223 ; SSE41:       ## BB#0:
    224 ; SSE41-NEXT:    roundpd $4, %xmm0, %xmm0
    225 ; SSE41-NEXT:    roundpd $4, %xmm1, %xmm1
    226 ; SSE41-NEXT:    retq
    227 ;
    228 ; AVX-LABEL: rint_v4f64:
    229 ; AVX:       ## BB#0:
    230 ; AVX-NEXT:    vroundpd $4, %ymm0, %ymm0
    231 ; AVX-NEXT:    retq
    232   %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
    233   ret <4 x double> %t
    234 }
    235 declare <4 x double> @llvm.rint.v4f64(<4 x double> %p)
    236 
    237 define <8 x float> @rint_v8f32(<8 x float> %p) {
    238 ; SSE41-LABEL: rint_v8f32:
    239 ; SSE41:       ## BB#0:
    240 ; SSE41-NEXT:    roundps $4, %xmm0, %xmm0
    241 ; SSE41-NEXT:    roundps $4, %xmm1, %xmm1
    242 ; SSE41-NEXT:    retq
    243 ;
    244 ; AVX-LABEL: rint_v8f32:
    245 ; AVX:       ## BB#0:
    246 ; AVX-NEXT:    vroundps $4, %ymm0, %ymm0
    247 ; AVX-NEXT:    retq
    248   %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
    249   ret <8 x float> %t
    250 }
    251 declare <8 x float> @llvm.rint.v8f32(<8 x float> %p)
    252 
    253 define <2 x double> @nearbyint_v2f64(<2 x double> %p) {
    254 ; SSE41-LABEL: nearbyint_v2f64:
    255 ; SSE41:       ## BB#0:
    256 ; SSE41-NEXT:    roundpd $12, %xmm0, %xmm0
    257 ; SSE41-NEXT:    retq
    258 ;
    259 ; AVX-LABEL: nearbyint_v2f64:
    260 ; AVX:       ## BB#0:
    261 ; AVX-NEXT:    vroundpd $12, %xmm0, %xmm0
    262 ; AVX-NEXT:    retq
    263   %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
    264   ret <2 x double> %t
    265 }
    266 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
    267 
    268 define <4 x float> @nearbyint_v4f32(<4 x float> %p) {
    269 ; SSE41-LABEL: nearbyint_v4f32:
    270 ; SSE41:       ## BB#0:
    271 ; SSE41-NEXT:    roundps $12, %xmm0, %xmm0
    272 ; SSE41-NEXT:    retq
    273 ;
    274 ; AVX-LABEL: nearbyint_v4f32:
    275 ; AVX:       ## BB#0:
    276 ; AVX-NEXT:    vroundps $12, %xmm0, %xmm0
    277 ; AVX-NEXT:    retq
    278   %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
    279   ret <4 x float> %t
    280 }
    281 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
    282 
    283 define <4 x double> @nearbyint_v4f64(<4 x double> %p) {
    284 ; SSE41-LABEL: nearbyint_v4f64:
    285 ; SSE41:       ## BB#0:
    286 ; SSE41-NEXT:    roundpd $12, %xmm0, %xmm0
    287 ; SSE41-NEXT:    roundpd $12, %xmm1, %xmm1
    288 ; SSE41-NEXT:    retq
    289 ;
    290 ; AVX-LABEL: nearbyint_v4f64:
    291 ; AVX:       ## BB#0:
    292 ; AVX-NEXT:    vroundpd $12, %ymm0, %ymm0
    293 ; AVX-NEXT:    retq
    294   %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
    295   ret <4 x double> %t
    296 }
    297 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
    298 
    299 define <8 x float> @nearbyint_v8f32(<8 x float> %p) {
    300 ; SSE41-LABEL: nearbyint_v8f32:
    301 ; SSE41:       ## BB#0:
    302 ; SSE41-NEXT:    roundps $12, %xmm0, %xmm0
    303 ; SSE41-NEXT:    roundps $12, %xmm1, %xmm1
    304 ; SSE41-NEXT:    retq
    305 ;
    306 ; AVX-LABEL: nearbyint_v8f32:
    307 ; AVX:       ## BB#0:
    308 ; AVX-NEXT:    vroundps $12, %ymm0, %ymm0
    309 ; AVX-NEXT:    retq
    310   %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
    311   ret <8 x float> %t
    312 }
    313 declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
    314 
    315 ;
    316 ; Constant Folding
    317 ;
    318 
    319 define <2 x double> @const_floor_v2f64() {
    320 ; SSE41-LABEL: const_floor_v2f64:
    321 ; SSE41:       ## BB#0:
    322 ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00]
    323 ; SSE41-NEXT:    retq
    324 ;
    325 ; AVX-LABEL: const_floor_v2f64:
    326 ; AVX:       ## BB#0:
    327 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00]
    328 ; AVX-NEXT:    retq
    329   %t = call <2 x double> @llvm.floor.v2f64(<2 x double> <double -1.5, double 2.5>)
    330   ret <2 x double> %t
    331 }
    332 
    333 define <4 x float> @const_floor_v4f32() {
    334 ; SSE41-LABEL: const_floor_v4f32:
    335 ; SSE41:       ## BB#0:
    336 ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
    337 ; SSE41-NEXT:    retq
    338 ;
    339 ; AVX-LABEL: const_floor_v4f32:
    340 ; AVX:       ## BB#0:
    341 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
    342 ; AVX-NEXT:    retq
    343   %t = call <4 x float> @llvm.floor.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
    344   ret <4 x float> %t
    345 }
    346 
    347 define <2 x double> @const_ceil_v2f64() {
    348 ; SSE41-LABEL: const_ceil_v2f64:
    349 ; SSE41:       ## BB#0:
    350 ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00]
    351 ; SSE41-NEXT:    retq
    352 ;
    353 ; AVX-LABEL: const_ceil_v2f64:
    354 ; AVX:       ## BB#0:
    355 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00]
    356 ; AVX-NEXT:    retq
    357   %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> <double -1.5, double 2.5>)
    358   ret <2 x double> %t
    359 }
    360 
    361 define <4 x float> @const_ceil_v4f32() {
    362 ; SSE41-LABEL: const_ceil_v4f32:
    363 ; SSE41:       ## BB#0:
    364 ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00]
    365 ; SSE41-NEXT:    retq
    366 ;
    367 ; AVX-LABEL: const_ceil_v4f32:
    368 ; AVX:       ## BB#0:
    369 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00]
    370 ; AVX-NEXT:    retq
    371   %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
    372   ret <4 x float> %t
    373 }
    374 
    375 define <2 x double> @const_trunc_v2f64() {
    376 ; SSE41-LABEL: const_trunc_v2f64:
    377 ; SSE41:       ## BB#0:
    378 ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00]
    379 ; SSE41-NEXT:    retq
    380 ;
    381 ; AVX-LABEL: const_trunc_v2f64:
    382 ; AVX:       ## BB#0:
    383 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00]
    384 ; AVX-NEXT:    retq
    385   %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> <double -1.5, double 2.5>)
    386   ret <2 x double> %t
    387 }
    388 
    389 define <4 x float> @const_trunc_v4f32() {
    390 ; SSE41-LABEL: const_trunc_v4f32:
    391 ; SSE41:       ## BB#0:
    392 ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
    393 ; SSE41-NEXT:    retq
    394 ;
    395 ; AVX-LABEL: const_trunc_v4f32:
    396 ; AVX:       ## BB#0:
    397 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
    398 ; AVX-NEXT:    retq
    399   %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
    400   ret <4 x float> %t
    401 }
    402