Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck %s
      3 
      4 declare <2 x double> @llvm.floor.v2f64(<2 x double> %p)
      5 declare <4 x float> @llvm.floor.v4f32(<4 x float> %p)
      6 declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
      7 declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
      8 declare <8 x double> @llvm.floor.v8f64(<8 x double> %p)
      9 declare <16 x float> @llvm.floor.v16f32(<16 x float> %p)
     10 declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
     11 declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
     12 declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
     13 declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
     14 declare <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
     15 declare <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
     16 declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
     17 declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
     18 declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
     19 declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
     20 declare <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
     21 declare <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
     22 declare <2 x double> @llvm.rint.v2f64(<2 x double> %p)
     23 declare <4 x float> @llvm.rint.v4f32(<4 x float> %p)
     24 declare <4 x double> @llvm.rint.v4f64(<4 x double> %p)
     25 declare <8 x float> @llvm.rint.v8f32(<8 x float> %p)
     26 declare <8 x double> @llvm.rint.v8f64(<8 x double> %p)
     27 declare <16 x float> @llvm.rint.v16f32(<16 x float> %p)
     28 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
     29 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
     30 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
     31 declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
     32 declare <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
     33 declare <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
     34 
     35 define <2 x double> @floor_v2f64(<2 x double> %p) {
     36 ; CHECK-LABEL: floor_v2f64:
     37 ; CHECK:       ## %bb.0:
     38 ; CHECK-NEXT:    vroundpd $9, %xmm0, %xmm0
     39 ; CHECK-NEXT:    retq
     40   %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
     41   ret <2 x double> %t
     42 }
     43 
     44 define <4 x float> @floor_v4f32(<4 x float> %p) {
     45 ; CHECK-LABEL: floor_v4f32:
     46 ; CHECK:       ## %bb.0:
     47 ; CHECK-NEXT:    vroundps $9, %xmm0, %xmm0
     48 ; CHECK-NEXT:    retq
     49   %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
     50   ret <4 x float> %t
     51 }
     52 
     53 define <4 x double> @floor_v4f64(<4 x double> %p){
     54 ; CHECK-LABEL: floor_v4f64:
     55 ; CHECK:       ## %bb.0:
     56 ; CHECK-NEXT:    vroundpd $9, %ymm0, %ymm0
     57 ; CHECK-NEXT:    retq
     58   %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
     59   ret <4 x double> %t
     60 }
     61 
     62 define <8 x float> @floor_v8f32(<8 x float> %p) {
     63 ; CHECK-LABEL: floor_v8f32:
     64 ; CHECK:       ## %bb.0:
     65 ; CHECK-NEXT:    vroundps $9, %ymm0, %ymm0
     66 ; CHECK-NEXT:    retq
     67   %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
     68   ret <8 x float> %t
     69 }
     70 
     71 define <8 x double> @floor_v8f64(<8 x double> %p){
     72 ; CHECK-LABEL: floor_v8f64:
     73 ; CHECK:       ## %bb.0:
     74 ; CHECK-NEXT:    vrndscalepd $9, %zmm0, %zmm0
     75 ; CHECK-NEXT:    retq
     76   %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
     77   ret <8 x double> %t
     78 }
     79 
     80 define <16 x float> @floor_v16f32(<16 x float> %p) {
     81 ; CHECK-LABEL: floor_v16f32:
     82 ; CHECK:       ## %bb.0:
     83 ; CHECK-NEXT:    vrndscaleps $9, %zmm0, %zmm0
     84 ; CHECK-NEXT:    retq
     85   %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
     86   ret <16 x float> %t
     87 }
     88 
     89 define <2 x double> @floor_v2f64_load(<2 x double>* %ptr) {
     90 ; CHECK-LABEL: floor_v2f64_load:
     91 ; CHECK:       ## %bb.0:
     92 ; CHECK-NEXT:    vroundpd $9, (%rdi), %xmm0
     93 ; CHECK-NEXT:    retq
     94   %p = load <2 x double>, <2 x double>* %ptr
     95   %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
     96   ret <2 x double> %t
     97 }
     98 
     99 define <4 x float> @floor_v4f32_load(<4 x float>* %ptr) {
    100 ; CHECK-LABEL: floor_v4f32_load:
    101 ; CHECK:       ## %bb.0:
    102 ; CHECK-NEXT:    vroundps $9, (%rdi), %xmm0
    103 ; CHECK-NEXT:    retq
    104   %p = load <4 x float>, <4 x float>* %ptr
    105   %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
    106   ret <4 x float> %t
    107 }
    108 
    109 define <4 x double> @floor_v4f64_load(<4 x double>* %ptr){
    110 ; CHECK-LABEL: floor_v4f64_load:
    111 ; CHECK:       ## %bb.0:
    112 ; CHECK-NEXT:    vroundpd $9, (%rdi), %ymm0
    113 ; CHECK-NEXT:    retq
    114   %p = load <4 x double>, <4 x double>* %ptr
    115   %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
    116   ret <4 x double> %t
    117 }
    118 
    119 define <8 x float> @floor_v8f32_load(<8 x float>* %ptr) {
    120 ; CHECK-LABEL: floor_v8f32_load:
    121 ; CHECK:       ## %bb.0:
    122 ; CHECK-NEXT:    vroundps $9, (%rdi), %ymm0
    123 ; CHECK-NEXT:    retq
    124   %p = load <8 x float>, <8 x float>* %ptr
    125   %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
    126   ret <8 x float> %t
    127 }
    128 
    129 define <8 x double> @floor_v8f64_load(<8 x double>* %ptr){
    130 ; CHECK-LABEL: floor_v8f64_load:
    131 ; CHECK:       ## %bb.0:
    132 ; CHECK-NEXT:    vrndscalepd $9, (%rdi), %zmm0
    133 ; CHECK-NEXT:    retq
    134   %p = load <8 x double>, <8 x double>* %ptr
    135   %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
    136   ret <8 x double> %t
    137 }
    138 
    139 define <16 x float> @floor_v16f32_load(<16 x float>* %ptr) {
    140 ; CHECK-LABEL: floor_v16f32_load:
    141 ; CHECK:       ## %bb.0:
    142 ; CHECK-NEXT:    vrndscaleps $9, (%rdi), %zmm0
    143 ; CHECK-NEXT:    retq
    144   %p = load <16 x float>, <16 x float>* %ptr
    145   %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
    146   ret <16 x float> %t
    147 }
    148 
    149 define <2 x double> @floor_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) {
    150 ; CHECK-LABEL: floor_v2f64_mask:
    151 ; CHECK:       ## %bb.0:
    152 ; CHECK-NEXT:    vptestnmq %xmm2, %xmm2, %k1
    153 ; CHECK-NEXT:    vrndscalepd $9, %xmm0, %xmm1 {%k1}
    154 ; CHECK-NEXT:    vmovapd %xmm1, %xmm0
    155 ; CHECK-NEXT:    retq
    156   %c = icmp eq <2 x i64> %cmp, zeroinitializer
    157   %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
    158   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
    159   ret <2 x double> %s
    160 }
    161 
    162 define <4 x float> @floor_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) {
    163 ; CHECK-LABEL: floor_v4f32_mask:
    164 ; CHECK:       ## %bb.0:
    165 ; CHECK-NEXT:    vptestnmd %xmm2, %xmm2, %k1
    166 ; CHECK-NEXT:    vrndscaleps $9, %xmm0, %xmm1 {%k1}
    167 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    168 ; CHECK-NEXT:    retq
    169   %c = icmp eq <4 x i32> %cmp, zeroinitializer
    170   %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
    171   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
    172   ret <4 x float> %s
    173 }
    174 
    175 define <4 x double> @floor_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) {
    176 ; CHECK-LABEL: floor_v4f64_mask:
    177 ; CHECK:       ## %bb.0:
    178 ; CHECK-NEXT:    vptestnmq %ymm2, %ymm2, %k1
    179 ; CHECK-NEXT:    vrndscalepd $9, %ymm0, %ymm1 {%k1}
    180 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
    181 ; CHECK-NEXT:    retq
    182   %c = icmp eq <4 x i64> %cmp, zeroinitializer
    183   %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
    184   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
    185   ret <4 x double> %s
    186 }
    187 
    188 define <8 x float> @floor_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) {
    189 ; CHECK-LABEL: floor_v8f32_mask:
    190 ; CHECK:       ## %bb.0:
    191 ; CHECK-NEXT:    vptestnmd %ymm2, %ymm2, %k1
    192 ; CHECK-NEXT:    vrndscaleps $9, %ymm0, %ymm1 {%k1}
    193 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
    194 ; CHECK-NEXT:    retq
    195   %c = icmp eq <8 x i32> %cmp, zeroinitializer
    196   %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
    197   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
    198   ret <8 x float> %s
    199 }
    200 
    201 define <8 x double> @floor_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) {
    202 ; CHECK-LABEL: floor_v8f64_mask:
    203 ; CHECK:       ## %bb.0:
    204 ; CHECK-NEXT:    vptestnmq %zmm2, %zmm2, %k1
    205 ; CHECK-NEXT:    vrndscalepd $9, %zmm0, %zmm1 {%k1}
    206 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
    207 ; CHECK-NEXT:    retq
    208   %c = icmp eq <8 x i64> %cmp, zeroinitializer
    209   %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
    210   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
    211   ret <8 x double> %s
    212 }
    213 
    214 define <16 x float> @floor_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) {
    215 ; CHECK-LABEL: floor_v16f32_mask:
    216 ; CHECK:       ## %bb.0:
    217 ; CHECK-NEXT:    vptestnmd %zmm2, %zmm2, %k1
    218 ; CHECK-NEXT:    vrndscaleps $9, %zmm0, %zmm1 {%k1}
    219 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    220 ; CHECK-NEXT:    retq
    221   %c = icmp eq <16 x i32> %cmp, zeroinitializer
    222   %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
    223   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
    224   ret <16 x float> %s
    225 }
    226 
    227 define <2 x double> @floor_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) {
    228 ; CHECK-LABEL: floor_v2f64_maskz:
    229 ; CHECK:       ## %bb.0:
    230 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
    231 ; CHECK-NEXT:    vrndscalepd $9, %xmm0, %xmm0 {%k1} {z}
    232 ; CHECK-NEXT:    retq
    233   %c = icmp eq <2 x i64> %cmp, zeroinitializer
    234   %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
    235   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
    236   ret <2 x double> %s
    237 }
    238 
    239 define <4 x float> @floor_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) {
    240 ; CHECK-LABEL: floor_v4f32_maskz:
    241 ; CHECK:       ## %bb.0:
    242 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
    243 ; CHECK-NEXT:    vrndscaleps $9, %xmm0, %xmm0 {%k1} {z}
    244 ; CHECK-NEXT:    retq
    245   %c = icmp eq <4 x i32> %cmp, zeroinitializer
    246   %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
    247   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
    248   ret <4 x float> %s
    249 }
    250 
    251 define <4 x double> @floor_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) {
    252 ; CHECK-LABEL: floor_v4f64_maskz:
    253 ; CHECK:       ## %bb.0:
    254 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
    255 ; CHECK-NEXT:    vrndscalepd $9, %ymm0, %ymm0 {%k1} {z}
    256 ; CHECK-NEXT:    retq
    257   %c = icmp eq <4 x i64> %cmp, zeroinitializer
    258   %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
    259   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
    260   ret <4 x double> %s
    261 }
    262 
    263 define <8 x float> @floor_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) {
    264 ; CHECK-LABEL: floor_v8f32_maskz:
    265 ; CHECK:       ## %bb.0:
    266 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
    267 ; CHECK-NEXT:    vrndscaleps $9, %ymm0, %ymm0 {%k1} {z}
    268 ; CHECK-NEXT:    retq
    269   %c = icmp eq <8 x i32> %cmp, zeroinitializer
    270   %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
    271   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
    272   ret <8 x float> %s
    273 }
    274 
    275 define <8 x double> @floor_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) {
    276 ; CHECK-LABEL: floor_v8f64_maskz:
    277 ; CHECK:       ## %bb.0:
    278 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
    279 ; CHECK-NEXT:    vrndscalepd $9, %zmm0, %zmm0 {%k1} {z}
    280 ; CHECK-NEXT:    retq
    281   %c = icmp eq <8 x i64> %cmp, zeroinitializer
    282   %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
    283   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
    284   ret <8 x double> %s
    285 }
    286 
    287 define <16 x float> @floor_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) {
    288 ; CHECK-LABEL: floor_v16f32_maskz:
    289 ; CHECK:       ## %bb.0:
    290 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
    291 ; CHECK-NEXT:    vrndscaleps $9, %zmm0, %zmm0 {%k1} {z}
    292 ; CHECK-NEXT:    retq
    293   %c = icmp eq <16 x i32> %cmp, zeroinitializer
    294   %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
    295   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
    296   ret <16 x float> %s
    297 }
    298 
    299 define <2 x double> @floor_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
    300 ; CHECK-LABEL: floor_v2f64_mask_load:
    301 ; CHECK:       ## %bb.0:
    302 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
    303 ; CHECK-NEXT:    vrndscalepd $9, (%rdi), %xmm0 {%k1}
    304 ; CHECK-NEXT:    retq
    305   %c = icmp eq <2 x i64> %cmp, zeroinitializer
    306   %p = load <2 x double>, <2 x double>* %ptr
    307   %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
    308   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
    309   ret <2 x double> %s
    310 }
    311 
    312 define <4 x float> @floor_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
    313 ; CHECK-LABEL: floor_v4f32_mask_load:
    314 ; CHECK:       ## %bb.0:
    315 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
    316 ; CHECK-NEXT:    vrndscaleps $9, (%rdi), %xmm0 {%k1}
    317 ; CHECK-NEXT:    retq
    318   %c = icmp eq <4 x i32> %cmp, zeroinitializer
    319   %p = load <4 x float>, <4 x float>* %ptr
    320   %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
    321   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
    322   ret <4 x float> %s
    323 }
    324 
    325 define <4 x double> @floor_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
    326 ; CHECK-LABEL: floor_v4f64_mask_load:
    327 ; CHECK:       ## %bb.0:
    328 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
    329 ; CHECK-NEXT:    vrndscalepd $9, (%rdi), %ymm0 {%k1}
    330 ; CHECK-NEXT:    retq
    331   %c = icmp eq <4 x i64> %cmp, zeroinitializer
    332   %p = load <4 x double>, <4 x double>* %ptr
    333   %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
    334   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
    335   ret <4 x double> %s
    336 }
    337 
    338 define <8 x float> @floor_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
    339 ; CHECK-LABEL: floor_v8f32_mask_load:
    340 ; CHECK:       ## %bb.0:
    341 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
    342 ; CHECK-NEXT:    vrndscaleps $9, (%rdi), %ymm0 {%k1}
    343 ; CHECK-NEXT:    retq
    344   %c = icmp eq <8 x i32> %cmp, zeroinitializer
    345   %p = load <8 x float>, <8 x float>* %ptr
    346   %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
    347   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
    348   ret <8 x float> %s
    349 }
    350 
    351 define <8 x double> @floor_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
    352 ; CHECK-LABEL: floor_v8f64_mask_load:
    353 ; CHECK:       ## %bb.0:
    354 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
    355 ; CHECK-NEXT:    vrndscalepd $9, (%rdi), %zmm0 {%k1}
    356 ; CHECK-NEXT:    retq
    357   %c = icmp eq <8 x i64> %cmp, zeroinitializer
    358   %p = load <8 x double>, <8 x double>* %ptr
    359   %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
    360   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
    361   ret <8 x double> %s
    362 }
    363 
    364 define <16 x float> @floor_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
    365 ; CHECK-LABEL: floor_v16f32_mask_load:
    366 ; CHECK:       ## %bb.0:
    367 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
    368 ; CHECK-NEXT:    vrndscaleps $9, (%rdi), %zmm0 {%k1}
    369 ; CHECK-NEXT:    retq
    370   %c = icmp eq <16 x i32> %cmp, zeroinitializer
    371   %p = load <16 x float>, <16 x float>* %ptr
    372   %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
    373   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
    374   ret <16 x float> %s
    375 }
    376 
    377 define <2 x double> @floor_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) {
    378 ; CHECK-LABEL: floor_v2f64_maskz_load:
    379 ; CHECK:       ## %bb.0:
    380 ; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
    381 ; CHECK-NEXT:    vrndscalepd $9, (%rdi), %xmm0 {%k1} {z}
    382 ; CHECK-NEXT:    retq
    383   %c = icmp eq <2 x i64> %cmp, zeroinitializer
    384   %p = load <2 x double>, <2 x double>* %ptr
    385   %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
    386   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
    387   ret <2 x double> %s
    388 }
    389 
    390 define <4 x float> @floor_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) {
    391 ; CHECK-LABEL: floor_v4f32_maskz_load:
    392 ; CHECK:       ## %bb.0:
    393 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
    394 ; CHECK-NEXT:    vrndscaleps $9, (%rdi), %xmm0 {%k1} {z}
    395 ; CHECK-NEXT:    retq
    396   %c = icmp eq <4 x i32> %cmp, zeroinitializer
    397   %p = load <4 x float>, <4 x float>* %ptr
    398   %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
    399   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
    400   ret <4 x float> %s
    401 }
    402 
    403 define <4 x double> @floor_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) {
    404 ; CHECK-LABEL: floor_v4f64_maskz_load:
    405 ; CHECK:       ## %bb.0:
    406 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
    407 ; CHECK-NEXT:    vrndscalepd $9, (%rdi), %ymm0 {%k1} {z}
    408 ; CHECK-NEXT:    retq
    409   %c = icmp eq <4 x i64> %cmp, zeroinitializer
    410   %p = load <4 x double>, <4 x double>* %ptr
    411   %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
    412   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
    413   ret <4 x double> %s
    414 }
    415 
    416 define <8 x float> @floor_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) {
    417 ; CHECK-LABEL: floor_v8f32_maskz_load:
    418 ; CHECK:       ## %bb.0:
    419 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
    420 ; CHECK-NEXT:    vrndscaleps $9, (%rdi), %ymm0 {%k1} {z}
    421 ; CHECK-NEXT:    retq
    422   %c = icmp eq <8 x i32> %cmp, zeroinitializer
    423   %p = load <8 x float>, <8 x float>* %ptr
    424   %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
    425   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
    426   ret <8 x float> %s
    427 }
    428 
    429 define <8 x double> @floor_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) {
    430 ; CHECK-LABEL: floor_v8f64_maskz_load:
    431 ; CHECK:       ## %bb.0:
    432 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
    433 ; CHECK-NEXT:    vrndscalepd $9, (%rdi), %zmm0 {%k1} {z}
    434 ; CHECK-NEXT:    retq
    435   %c = icmp eq <8 x i64> %cmp, zeroinitializer
    436   %p = load <8 x double>, <8 x double>* %ptr
    437   %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
    438   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
    439   ret <8 x double> %s
    440 }
    441 
    442 define <16 x float> @floor_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) {
    443 ; CHECK-LABEL: floor_v16f32_maskz_load:
    444 ; CHECK:       ## %bb.0:
    445 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
    446 ; CHECK-NEXT:    vrndscaleps $9, (%rdi), %zmm0 {%k1} {z}
    447 ; CHECK-NEXT:    retq
    448   %c = icmp eq <16 x i32> %cmp, zeroinitializer
    449   %p = load <16 x float>, <16 x float>* %ptr
    450   %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
    451   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
    452   ret <16 x float> %s
    453 }
    454 
    455 define <2 x double> @floor_v2f64_broadcast(double* %ptr) {
    456 ; CHECK-LABEL: floor_v2f64_broadcast:
    457 ; CHECK:       ## %bb.0:
    458 ; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to2}, %xmm0
    459 ; CHECK-NEXT:    retq
    460   %ps = load double, double* %ptr
    461   %pins = insertelement <2 x double> undef, double %ps, i32 0
    462   %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
    463   %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
    464   ret <2 x double> %t
    465 }
    466 
    467 define <4 x float> @floor_v4f32_broadcast(float* %ptr) {
    468 ; CHECK-LABEL: floor_v4f32_broadcast:
    469 ; CHECK:       ## %bb.0:
    470 ; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to4}, %xmm0
    471 ; CHECK-NEXT:    retq
    472   %ps = load float, float* %ptr
    473   %pins = insertelement <4 x float> undef, float %ps, i32 0
    474   %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
    475   %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
    476   ret <4 x float> %t
    477 }
    478 
    479 define <4 x double> @floor_v4f64_broadcast(double* %ptr){
    480 ; CHECK-LABEL: floor_v4f64_broadcast:
    481 ; CHECK:       ## %bb.0:
    482 ; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to4}, %ymm0
    483 ; CHECK-NEXT:    retq
    484   %ps = load double, double* %ptr
    485   %pins = insertelement <4 x double> undef, double %ps, i32 0
    486   %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
    487   %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
    488   ret <4 x double> %t
    489 }
    490 
    491 define <8 x float> @floor_v8f32_broadcast(float* %ptr) {
    492 ; CHECK-LABEL: floor_v8f32_broadcast:
    493 ; CHECK:       ## %bb.0:
    494 ; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to8}, %ymm0
    495 ; CHECK-NEXT:    retq
    496   %ps = load float, float* %ptr
    497   %pins = insertelement <8 x float> undef, float %ps, i32 0
    498   %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
    499   %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
    500   ret <8 x float> %t
    501 }
    502 
    503 define <8 x double> @floor_v8f64_broadcast(double* %ptr){
    504 ; CHECK-LABEL: floor_v8f64_broadcast:
    505 ; CHECK:       ## %bb.0:
    506 ; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to8}, %zmm0
    507 ; CHECK-NEXT:    retq
    508   %ps = load double, double* %ptr
    509   %pins = insertelement <8 x double> undef, double %ps, i32 0
    510   %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
    511   %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
    512   ret <8 x double> %t
    513 }
    514 
    515 define <16 x float> @floor_v16f32_broadcast(float* %ptr) {
    516 ; CHECK-LABEL: floor_v16f32_broadcast:
    517 ; CHECK:       ## %bb.0:
    518 ; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to16}, %zmm0
    519 ; CHECK-NEXT:    retq
    520   %ps = load float, float* %ptr
    521   %pins = insertelement <16 x float> undef, float %ps, i32 0
    522   %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
    523   %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
    524   ret <16 x float> %t
    525 }
    526 
    527 define <2 x double> @floor_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
    528 ; CHECK-LABEL: floor_v2f64_mask_broadcast:
    529 ; CHECK:       ## %bb.0:
    530 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
    531 ; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to2}, %xmm0 {%k1}
    532 ; CHECK-NEXT:    retq
    533   %c = icmp eq <2 x i64> %cmp, zeroinitializer
    534   %ps = load double, double* %ptr
    535   %pins = insertelement <2 x double> undef, double %ps, i32 0
    536   %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
    537   %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
    538   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
    539   ret <2 x double> %s
    540 }
    541 
    542 define <4 x float> @floor_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
    543 ; CHECK-LABEL: floor_v4f32_mask_broadcast:
    544 ; CHECK:       ## %bb.0:
    545 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
    546 ; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to4}, %xmm0 {%k1}
    547 ; CHECK-NEXT:    retq
    548   %c = icmp eq <4 x i32> %cmp, zeroinitializer
    549   %ps = load float, float* %ptr
    550   %pins = insertelement <4 x float> undef, float %ps, i32 0
    551   %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
    552   %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
    553   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
    554   ret <4 x float> %s
    555 }
    556 
    557 define <4 x double> @floor_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
    558 ; CHECK-LABEL: floor_v4f64_mask_broadcast:
    559 ; CHECK:       ## %bb.0:
    560 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
    561 ; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to4}, %ymm0 {%k1}
    562 ; CHECK-NEXT:    retq
    563   %c = icmp eq <4 x i64> %cmp, zeroinitializer
    564   %ps = load double, double* %ptr
    565   %pins = insertelement <4 x double> undef, double %ps, i32 0
    566   %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
    567   %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
    568   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
    569   ret <4 x double> %s
    570 }
    571 
    572 define <8 x float> @floor_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
    573 ; CHECK-LABEL: floor_v8f32_mask_broadcast:
    574 ; CHECK:       ## %bb.0:
    575 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
    576 ; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to8}, %ymm0 {%k1}
    577 ; CHECK-NEXT:    retq
    578   %c = icmp eq <8 x i32> %cmp, zeroinitializer
    579   %ps = load float, float* %ptr
    580   %pins = insertelement <8 x float> undef, float %ps, i32 0
    581   %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
    582   %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
    583   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
    584   ret <8 x float> %s
    585 }
    586 
    587 define <8 x double> @floor_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
    588 ; CHECK-LABEL: floor_v8f64_mask_broadcast:
    589 ; CHECK:       ## %bb.0:
    590 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
    591 ; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to8}, %zmm0 {%k1}
    592 ; CHECK-NEXT:    retq
    593   %c = icmp eq <8 x i64> %cmp, zeroinitializer
    594   %ps = load double, double* %ptr
    595   %pins = insertelement <8 x double> undef, double %ps, i32 0
    596   %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
    597   %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
    598   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
    599   ret <8 x double> %s
    600 }
    601 
    602 define <16 x float> @floor_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
    603 ; CHECK-LABEL: floor_v16f32_mask_broadcast:
    604 ; CHECK:       ## %bb.0:
    605 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
    606 ; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to16}, %zmm0 {%k1}
    607 ; CHECK-NEXT:    retq
    608   %c = icmp eq <16 x i32> %cmp, zeroinitializer
    609   %ps = load float, float* %ptr
    610   %pins = insertelement <16 x float> undef, float %ps, i32 0
    611   %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
    612   %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
    613   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
    614   ret <16 x float> %s
    615 }
    616 
    617 define <2 x double> @floor_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) {
    618 ; CHECK-LABEL: floor_v2f64_maskz_broadcast:
    619 ; CHECK:       ## %bb.0:
    620 ; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
    621 ; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to2}, %xmm0 {%k1} {z}
    622 ; CHECK-NEXT:    retq
    623   %c = icmp eq <2 x i64> %cmp, zeroinitializer
    624   %ps = load double, double* %ptr
    625   %pins = insertelement <2 x double> undef, double %ps, i32 0
    626   %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
    627   %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
    628   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
    629   ret <2 x double> %s
    630 }
    631 
    632 define <4 x float> @floor_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) {
    633 ; CHECK-LABEL: floor_v4f32_maskz_broadcast:
    634 ; CHECK:       ## %bb.0:
    635 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
    636 ; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to4}, %xmm0 {%k1} {z}
    637 ; CHECK-NEXT:    retq
    638   %c = icmp eq <4 x i32> %cmp, zeroinitializer
    639   %ps = load float, float* %ptr
    640   %pins = insertelement <4 x float> undef, float %ps, i32 0
    641   %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
    642   %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
    643   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
    644   ret <4 x float> %s
    645 }
    646 
    647 define <4 x double> @floor_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) {
    648 ; CHECK-LABEL: floor_v4f64_maskz_broadcast:
    649 ; CHECK:       ## %bb.0:
    650 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
    651 ; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to4}, %ymm0 {%k1} {z}
    652 ; CHECK-NEXT:    retq
    653   %c = icmp eq <4 x i64> %cmp, zeroinitializer
    654   %ps = load double, double* %ptr
    655   %pins = insertelement <4 x double> undef, double %ps, i32 0
    656   %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
    657   %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
    658   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
    659   ret <4 x double> %s
    660 }
    661 
    662 define <8 x float> @floor_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) {
    663 ; CHECK-LABEL: floor_v8f32_maskz_broadcast:
    664 ; CHECK:       ## %bb.0:
    665 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
    666 ; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to8}, %ymm0 {%k1} {z}
    667 ; CHECK-NEXT:    retq
    668   %c = icmp eq <8 x i32> %cmp, zeroinitializer
    669   %ps = load float, float* %ptr
    670   %pins = insertelement <8 x float> undef, float %ps, i32 0
    671   %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
    672   %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
    673   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
    674   ret <8 x float> %s
    675 }
    676 
    677 define <8 x double> @floor_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) {
    678 ; CHECK-LABEL: floor_v8f64_maskz_broadcast:
    679 ; CHECK:       ## %bb.0:
    680 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
    681 ; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to8}, %zmm0 {%k1} {z}
    682 ; CHECK-NEXT:    retq
    683   %c = icmp eq <8 x i64> %cmp, zeroinitializer
    684   %ps = load double, double* %ptr
    685   %pins = insertelement <8 x double> undef, double %ps, i32 0
    686   %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
    687   %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
    688   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
    689   ret <8 x double> %s
    690 }
    691 
    692 define <16 x float> @floor_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) {
    693 ; CHECK-LABEL: floor_v16f32_maskz_broadcast:
    694 ; CHECK:       ## %bb.0:
    695 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
    696 ; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to16}, %zmm0 {%k1} {z}
    697 ; CHECK-NEXT:    retq
    698   %c = icmp eq <16 x i32> %cmp, zeroinitializer
    699   %ps = load float, float* %ptr
    700   %pins = insertelement <16 x float> undef, float %ps, i32 0
    701   %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
    702   %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
    703   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
    704   ret <16 x float> %s
    705 }
    706 
    707 define <2 x double> @ceil_v2f64(<2 x double> %p) {
    708 ; CHECK-LABEL: ceil_v2f64:
    709 ; CHECK:       ## %bb.0:
    710 ; CHECK-NEXT:    vroundpd $10, %xmm0, %xmm0
    711 ; CHECK-NEXT:    retq
    712   %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
    713   ret <2 x double> %t
    714 }
    715 
    716 define <4 x float> @ceil_v4f32(<4 x float> %p) {
    717 ; CHECK-LABEL: ceil_v4f32:
    718 ; CHECK:       ## %bb.0:
    719 ; CHECK-NEXT:    vroundps $10, %xmm0, %xmm0
    720 ; CHECK-NEXT:    retq
    721   %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
    722   ret <4 x float> %t
    723 }
    724 
    725 define <4 x double> @ceil_v4f64(<4 x double> %p){
    726 ; CHECK-LABEL: ceil_v4f64:
    727 ; CHECK:       ## %bb.0:
    728 ; CHECK-NEXT:    vroundpd $10, %ymm0, %ymm0
    729 ; CHECK-NEXT:    retq
    730   %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
    731   ret <4 x double> %t
    732 }
    733 
    734 define <8 x float> @ceil_v8f32(<8 x float> %p) {
    735 ; CHECK-LABEL: ceil_v8f32:
    736 ; CHECK:       ## %bb.0:
    737 ; CHECK-NEXT:    vroundps $10, %ymm0, %ymm0
    738 ; CHECK-NEXT:    retq
    739   %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
    740   ret <8 x float> %t
    741 }
    742 
    743 define <8 x double> @ceil_v8f64(<8 x double> %p){
    744 ; CHECK-LABEL: ceil_v8f64:
    745 ; CHECK:       ## %bb.0:
    746 ; CHECK-NEXT:    vrndscalepd $10, %zmm0, %zmm0
    747 ; CHECK-NEXT:    retq
    748   %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
    749   ret <8 x double> %t
    750 }
    751 
    752 define <16 x float> @ceil_v16f32(<16 x float> %p) {
    753 ; CHECK-LABEL: ceil_v16f32:
    754 ; CHECK:       ## %bb.0:
    755 ; CHECK-NEXT:    vrndscaleps $10, %zmm0, %zmm0
    756 ; CHECK-NEXT:    retq
    757   %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
    758   ret <16 x float> %t
    759 }
    760 
    761 define <2 x double> @ceil_v2f64_load(<2 x double>* %ptr) {
    762 ; CHECK-LABEL: ceil_v2f64_load:
    763 ; CHECK:       ## %bb.0:
    764 ; CHECK-NEXT:    vroundpd $10, (%rdi), %xmm0
    765 ; CHECK-NEXT:    retq
    766   %p = load <2 x double>, <2 x double>* %ptr
    767   %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
    768   ret <2 x double> %t
    769 }
    770 
    771 define <4 x float> @ceil_v4f32_load(<4 x float>* %ptr) {
    772 ; CHECK-LABEL: ceil_v4f32_load:
    773 ; CHECK:       ## %bb.0:
    774 ; CHECK-NEXT:    vroundps $10, (%rdi), %xmm0
    775 ; CHECK-NEXT:    retq
    776   %p = load <4 x float>, <4 x float>* %ptr
    777   %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
    778   ret <4 x float> %t
    779 }
    780 
    781 define <4 x double> @ceil_v4f64_load(<4 x double>* %ptr){
    782 ; CHECK-LABEL: ceil_v4f64_load:
    783 ; CHECK:       ## %bb.0:
    784 ; CHECK-NEXT:    vroundpd $10, (%rdi), %ymm0
    785 ; CHECK-NEXT:    retq
    786   %p = load <4 x double>, <4 x double>* %ptr
    787   %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
    788   ret <4 x double> %t
    789 }
    790 
    791 define <8 x float> @ceil_v8f32_load(<8 x float>* %ptr) {
    792 ; CHECK-LABEL: ceil_v8f32_load:
    793 ; CHECK:       ## %bb.0:
    794 ; CHECK-NEXT:    vroundps $10, (%rdi), %ymm0
    795 ; CHECK-NEXT:    retq
    796   %p = load <8 x float>, <8 x float>* %ptr
    797   %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
    798   ret <8 x float> %t
    799 }
    800 
    801 define <8 x double> @ceil_v8f64_load(<8 x double>* %ptr){
    802 ; CHECK-LABEL: ceil_v8f64_load:
    803 ; CHECK:       ## %bb.0:
    804 ; CHECK-NEXT:    vrndscalepd $10, (%rdi), %zmm0
    805 ; CHECK-NEXT:    retq
    806   %p = load <8 x double>, <8 x double>* %ptr
    807   %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
    808   ret <8 x double> %t
    809 }
    810 
    811 define <16 x float> @ceil_v16f32_load(<16 x float>* %ptr) {
    812 ; CHECK-LABEL: ceil_v16f32_load:
    813 ; CHECK:       ## %bb.0:
    814 ; CHECK-NEXT:    vrndscaleps $10, (%rdi), %zmm0
    815 ; CHECK-NEXT:    retq
    816   %p = load <16 x float>, <16 x float>* %ptr
    817   %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
    818   ret <16 x float> %t
    819 }
    820 
    821 define <2 x double> @ceil_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) {
    822 ; CHECK-LABEL: ceil_v2f64_mask:
    823 ; CHECK:       ## %bb.0:
    824 ; CHECK-NEXT:    vptestnmq %xmm2, %xmm2, %k1
    825 ; CHECK-NEXT:    vrndscalepd $10, %xmm0, %xmm1 {%k1}
    826 ; CHECK-NEXT:    vmovapd %xmm1, %xmm0
    827 ; CHECK-NEXT:    retq
    828   %c = icmp eq <2 x i64> %cmp, zeroinitializer
    829   %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
    830   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
    831   ret <2 x double> %s
    832 }
    833 
    834 define <4 x float> @ceil_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) {
    835 ; CHECK-LABEL: ceil_v4f32_mask:
    836 ; CHECK:       ## %bb.0:
    837 ; CHECK-NEXT:    vptestnmd %xmm2, %xmm2, %k1
    838 ; CHECK-NEXT:    vrndscaleps $10, %xmm0, %xmm1 {%k1}
    839 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    840 ; CHECK-NEXT:    retq
    841   %c = icmp eq <4 x i32> %cmp, zeroinitializer
    842   %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
    843   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
    844   ret <4 x float> %s
    845 }
    846 
    847 define <4 x double> @ceil_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) {
    848 ; CHECK-LABEL: ceil_v4f64_mask:
    849 ; CHECK:       ## %bb.0:
    850 ; CHECK-NEXT:    vptestnmq %ymm2, %ymm2, %k1
    851 ; CHECK-NEXT:    vrndscalepd $10, %ymm0, %ymm1 {%k1}
    852 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
    853 ; CHECK-NEXT:    retq
    854   %c = icmp eq <4 x i64> %cmp, zeroinitializer
    855   %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
    856   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
    857   ret <4 x double> %s
    858 }
    859 
    860 define <8 x float> @ceil_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) {
    861 ; CHECK-LABEL: ceil_v8f32_mask:
    862 ; CHECK:       ## %bb.0:
    863 ; CHECK-NEXT:    vptestnmd %ymm2, %ymm2, %k1
    864 ; CHECK-NEXT:    vrndscaleps $10, %ymm0, %ymm1 {%k1}
    865 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
    866 ; CHECK-NEXT:    retq
    867   %c = icmp eq <8 x i32> %cmp, zeroinitializer
    868   %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
    869   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
    870   ret <8 x float> %s
    871 }
    872 
    873 define <8 x double> @ceil_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) {
    874 ; CHECK-LABEL: ceil_v8f64_mask:
    875 ; CHECK:       ## %bb.0:
    876 ; CHECK-NEXT:    vptestnmq %zmm2, %zmm2, %k1
    877 ; CHECK-NEXT:    vrndscalepd $10, %zmm0, %zmm1 {%k1}
    878 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
    879 ; CHECK-NEXT:    retq
    880   %c = icmp eq <8 x i64> %cmp, zeroinitializer
    881   %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
    882   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
    883   ret <8 x double> %s
    884 }
    885 
    886 define <16 x float> @ceil_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) {
    887 ; CHECK-LABEL: ceil_v16f32_mask:
    888 ; CHECK:       ## %bb.0:
    889 ; CHECK-NEXT:    vptestnmd %zmm2, %zmm2, %k1
    890 ; CHECK-NEXT:    vrndscaleps $10, %zmm0, %zmm1 {%k1}
    891 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    892 ; CHECK-NEXT:    retq
    893   %c = icmp eq <16 x i32> %cmp, zeroinitializer
    894   %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
    895   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
    896   ret <16 x float> %s
    897 }
    898 
    899 define <2 x double> @ceil_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) {
    900 ; CHECK-LABEL: ceil_v2f64_maskz:
    901 ; CHECK:       ## %bb.0:
    902 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
    903 ; CHECK-NEXT:    vrndscalepd $10, %xmm0, %xmm0 {%k1} {z}
    904 ; CHECK-NEXT:    retq
    905   %c = icmp eq <2 x i64> %cmp, zeroinitializer
    906   %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
    907   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
    908   ret <2 x double> %s
    909 }
    910 
    911 define <4 x float> @ceil_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) {
    912 ; CHECK-LABEL: ceil_v4f32_maskz:
    913 ; CHECK:       ## %bb.0:
    914 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
    915 ; CHECK-NEXT:    vrndscaleps $10, %xmm0, %xmm0 {%k1} {z}
    916 ; CHECK-NEXT:    retq
    917   %c = icmp eq <4 x i32> %cmp, zeroinitializer
    918   %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
    919   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
    920   ret <4 x float> %s
    921 }
    922 
    923 define <4 x double> @ceil_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) {
    924 ; CHECK-LABEL: ceil_v4f64_maskz:
    925 ; CHECK:       ## %bb.0:
    926 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
    927 ; CHECK-NEXT:    vrndscalepd $10, %ymm0, %ymm0 {%k1} {z}
    928 ; CHECK-NEXT:    retq
    929   %c = icmp eq <4 x i64> %cmp, zeroinitializer
    930   %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
    931   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
    932   ret <4 x double> %s
    933 }
    934 
    935 define <8 x float> @ceil_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) {
    936 ; CHECK-LABEL: ceil_v8f32_maskz:
    937 ; CHECK:       ## %bb.0:
    938 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
    939 ; CHECK-NEXT:    vrndscaleps $10, %ymm0, %ymm0 {%k1} {z}
    940 ; CHECK-NEXT:    retq
    941   %c = icmp eq <8 x i32> %cmp, zeroinitializer
    942   %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
    943   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
    944   ret <8 x float> %s
    945 }
    946 
    947 define <8 x double> @ceil_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) {
    948 ; CHECK-LABEL: ceil_v8f64_maskz:
    949 ; CHECK:       ## %bb.0:
    950 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
    951 ; CHECK-NEXT:    vrndscalepd $10, %zmm0, %zmm0 {%k1} {z}
    952 ; CHECK-NEXT:    retq
    953   %c = icmp eq <8 x i64> %cmp, zeroinitializer
    954   %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
    955   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
    956   ret <8 x double> %s
    957 }
    958 
    959 define <16 x float> @ceil_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) {
    960 ; CHECK-LABEL: ceil_v16f32_maskz:
    961 ; CHECK:       ## %bb.0:
    962 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
    963 ; CHECK-NEXT:    vrndscaleps $10, %zmm0, %zmm0 {%k1} {z}
    964 ; CHECK-NEXT:    retq
    965   %c = icmp eq <16 x i32> %cmp, zeroinitializer
    966   %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
    967   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
    968   ret <16 x float> %s
    969 }
    970 
    971 define <2 x double> @ceil_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
    972 ; CHECK-LABEL: ceil_v2f64_mask_load:
    973 ; CHECK:       ## %bb.0:
    974 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
    975 ; CHECK-NEXT:    vrndscalepd $10, (%rdi), %xmm0 {%k1}
    976 ; CHECK-NEXT:    retq
    977   %c = icmp eq <2 x i64> %cmp, zeroinitializer
    978   %p = load <2 x double>, <2 x double>* %ptr
    979   %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
    980   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
    981   ret <2 x double> %s
    982 }
    983 
    984 define <4 x float> @ceil_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
    985 ; CHECK-LABEL: ceil_v4f32_mask_load:
    986 ; CHECK:       ## %bb.0:
    987 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
    988 ; CHECK-NEXT:    vrndscaleps $10, (%rdi), %xmm0 {%k1}
    989 ; CHECK-NEXT:    retq
    990   %c = icmp eq <4 x i32> %cmp, zeroinitializer
    991   %p = load <4 x float>, <4 x float>* %ptr
    992   %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
    993   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
    994   ret <4 x float> %s
    995 }
    996 
    997 define <4 x double> @ceil_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
    998 ; CHECK-LABEL: ceil_v4f64_mask_load:
    999 ; CHECK:       ## %bb.0:
   1000 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   1001 ; CHECK-NEXT:    vrndscalepd $10, (%rdi), %ymm0 {%k1}
   1002 ; CHECK-NEXT:    retq
   1003   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   1004   %p = load <4 x double>, <4 x double>* %ptr
   1005   %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
   1006   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
   1007   ret <4 x double> %s
   1008 }
   1009 
   1010 define <8 x float> @ceil_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
   1011 ; CHECK-LABEL: ceil_v8f32_mask_load:
   1012 ; CHECK:       ## %bb.0:
   1013 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
   1014 ; CHECK-NEXT:    vrndscaleps $10, (%rdi), %ymm0 {%k1}
   1015 ; CHECK-NEXT:    retq
   1016   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   1017   %p = load <8 x float>, <8 x float>* %ptr
   1018   %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
   1019   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
   1020   ret <8 x float> %s
   1021 }
   1022 
   1023 define <8 x double> @ceil_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
   1024 ; CHECK-LABEL: ceil_v8f64_mask_load:
   1025 ; CHECK:       ## %bb.0:
   1026 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   1027 ; CHECK-NEXT:    vrndscalepd $10, (%rdi), %zmm0 {%k1}
   1028 ; CHECK-NEXT:    retq
   1029   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   1030   %p = load <8 x double>, <8 x double>* %ptr
   1031   %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
   1032   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
   1033   ret <8 x double> %s
   1034 }
   1035 
   1036 define <16 x float> @ceil_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
   1037 ; CHECK-LABEL: ceil_v16f32_mask_load:
   1038 ; CHECK:       ## %bb.0:
   1039 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
   1040 ; CHECK-NEXT:    vrndscaleps $10, (%rdi), %zmm0 {%k1}
   1041 ; CHECK-NEXT:    retq
   1042   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   1043   %p = load <16 x float>, <16 x float>* %ptr
   1044   %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
   1045   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
   1046   ret <16 x float> %s
   1047 }
   1048 
   1049 define <2 x double> @ceil_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) {
   1050 ; CHECK-LABEL: ceil_v2f64_maskz_load:
   1051 ; CHECK:       ## %bb.0:
   1052 ; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
   1053 ; CHECK-NEXT:    vrndscalepd $10, (%rdi), %xmm0 {%k1} {z}
   1054 ; CHECK-NEXT:    retq
   1055   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   1056   %p = load <2 x double>, <2 x double>* %ptr
   1057   %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
   1058   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
   1059   ret <2 x double> %s
   1060 }
   1061 
   1062 define <4 x float> @ceil_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) {
   1063 ; CHECK-LABEL: ceil_v4f32_maskz_load:
   1064 ; CHECK:       ## %bb.0:
   1065 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
   1066 ; CHECK-NEXT:    vrndscaleps $10, (%rdi), %xmm0 {%k1} {z}
   1067 ; CHECK-NEXT:    retq
   1068   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   1069   %p = load <4 x float>, <4 x float>* %ptr
   1070   %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
   1071   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
   1072   ret <4 x float> %s
   1073 }
   1074 
   1075 define <4 x double> @ceil_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) {
   1076 ; CHECK-LABEL: ceil_v4f64_maskz_load:
   1077 ; CHECK:       ## %bb.0:
   1078 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
   1079 ; CHECK-NEXT:    vrndscalepd $10, (%rdi), %ymm0 {%k1} {z}
   1080 ; CHECK-NEXT:    retq
   1081   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   1082   %p = load <4 x double>, <4 x double>* %ptr
   1083   %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
   1084   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
   1085   ret <4 x double> %s
   1086 }
   1087 
   1088 define <8 x float> @ceil_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) {
   1089 ; CHECK-LABEL: ceil_v8f32_maskz_load:
   1090 ; CHECK:       ## %bb.0:
   1091 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
   1092 ; CHECK-NEXT:    vrndscaleps $10, (%rdi), %ymm0 {%k1} {z}
   1093 ; CHECK-NEXT:    retq
   1094   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   1095   %p = load <8 x float>, <8 x float>* %ptr
   1096   %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
   1097   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
   1098   ret <8 x float> %s
   1099 }
   1100 
   1101 define <8 x double> @ceil_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) {
   1102 ; CHECK-LABEL: ceil_v8f64_maskz_load:
   1103 ; CHECK:       ## %bb.0:
   1104 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
   1105 ; CHECK-NEXT:    vrndscalepd $10, (%rdi), %zmm0 {%k1} {z}
   1106 ; CHECK-NEXT:    retq
   1107   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   1108   %p = load <8 x double>, <8 x double>* %ptr
   1109   %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
   1110   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
   1111   ret <8 x double> %s
   1112 }
   1113 
   1114 define <16 x float> @ceil_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) {
   1115 ; CHECK-LABEL: ceil_v16f32_maskz_load:
   1116 ; CHECK:       ## %bb.0:
   1117 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
   1118 ; CHECK-NEXT:    vrndscaleps $10, (%rdi), %zmm0 {%k1} {z}
   1119 ; CHECK-NEXT:    retq
   1120   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   1121   %p = load <16 x float>, <16 x float>* %ptr
   1122   %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
   1123   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
   1124   ret <16 x float> %s
   1125 }
   1126 
   1127 define <2 x double> @ceil_v2f64_broadcast(double* %ptr) {
   1128 ; CHECK-LABEL: ceil_v2f64_broadcast:
   1129 ; CHECK:       ## %bb.0:
   1130 ; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to2}, %xmm0
   1131 ; CHECK-NEXT:    retq
   1132   %ps = load double, double* %ptr
   1133   %pins = insertelement <2 x double> undef, double %ps, i32 0
   1134   %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
   1135   %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
   1136   ret <2 x double> %t
   1137 }
   1138 
   1139 define <4 x float> @ceil_v4f32_broadcast(float* %ptr) {
   1140 ; CHECK-LABEL: ceil_v4f32_broadcast:
   1141 ; CHECK:       ## %bb.0:
   1142 ; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to4}, %xmm0
   1143 ; CHECK-NEXT:    retq
   1144   %ps = load float, float* %ptr
   1145   %pins = insertelement <4 x float> undef, float %ps, i32 0
   1146   %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
   1147   %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
   1148   ret <4 x float> %t
   1149 }
   1150 
   1151 define <4 x double> @ceil_v4f64_broadcast(double* %ptr){
   1152 ; CHECK-LABEL: ceil_v4f64_broadcast:
   1153 ; CHECK:       ## %bb.0:
   1154 ; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to4}, %ymm0
   1155 ; CHECK-NEXT:    retq
   1156   %ps = load double, double* %ptr
   1157   %pins = insertelement <4 x double> undef, double %ps, i32 0
   1158   %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
   1159   %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
   1160   ret <4 x double> %t
   1161 }
   1162 
   1163 define <8 x float> @ceil_v8f32_broadcast(float* %ptr) {
   1164 ; CHECK-LABEL: ceil_v8f32_broadcast:
   1165 ; CHECK:       ## %bb.0:
   1166 ; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to8}, %ymm0
   1167 ; CHECK-NEXT:    retq
   1168   %ps = load float, float* %ptr
   1169   %pins = insertelement <8 x float> undef, float %ps, i32 0
   1170   %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
   1171   %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
   1172   ret <8 x float> %t
   1173 }
   1174 
   1175 define <8 x double> @ceil_v8f64_broadcast(double* %ptr){
   1176 ; CHECK-LABEL: ceil_v8f64_broadcast:
   1177 ; CHECK:       ## %bb.0:
   1178 ; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to8}, %zmm0
   1179 ; CHECK-NEXT:    retq
   1180   %ps = load double, double* %ptr
   1181   %pins = insertelement <8 x double> undef, double %ps, i32 0
   1182   %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
   1183   %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
   1184   ret <8 x double> %t
   1185 }
   1186 
   1187 define <16 x float> @ceil_v16f32_broadcast(float* %ptr) {
   1188 ; CHECK-LABEL: ceil_v16f32_broadcast:
   1189 ; CHECK:       ## %bb.0:
   1190 ; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to16}, %zmm0
   1191 ; CHECK-NEXT:    retq
   1192   %ps = load float, float* %ptr
   1193   %pins = insertelement <16 x float> undef, float %ps, i32 0
   1194   %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
   1195   %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
   1196   ret <16 x float> %t
   1197 }
   1198 
   1199 define <2 x double> @ceil_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
   1200 ; CHECK-LABEL: ceil_v2f64_mask_broadcast:
   1201 ; CHECK:       ## %bb.0:
   1202 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
   1203 ; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to2}, %xmm0 {%k1}
   1204 ; CHECK-NEXT:    retq
   1205   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   1206   %ps = load double, double* %ptr
   1207   %pins = insertelement <2 x double> undef, double %ps, i32 0
   1208   %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
   1209   %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
   1210   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
   1211   ret <2 x double> %s
   1212 }
   1213 
   1214 define <4 x float> @ceil_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
   1215 ; CHECK-LABEL: ceil_v4f32_mask_broadcast:
   1216 ; CHECK:       ## %bb.0:
   1217 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
   1218 ; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to4}, %xmm0 {%k1}
   1219 ; CHECK-NEXT:    retq
   1220   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   1221   %ps = load float, float* %ptr
   1222   %pins = insertelement <4 x float> undef, float %ps, i32 0
   1223   %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
   1224   %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
   1225   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
   1226   ret <4 x float> %s
   1227 }
   1228 
   1229 define <4 x double> @ceil_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
   1230 ; CHECK-LABEL: ceil_v4f64_mask_broadcast:
   1231 ; CHECK:       ## %bb.0:
   1232 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   1233 ; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to4}, %ymm0 {%k1}
   1234 ; CHECK-NEXT:    retq
   1235   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   1236   %ps = load double, double* %ptr
   1237   %pins = insertelement <4 x double> undef, double %ps, i32 0
   1238   %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
   1239   %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
   1240   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
   1241   ret <4 x double> %s
   1242 }
   1243 
   1244 define <8 x float> @ceil_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
   1245 ; CHECK-LABEL: ceil_v8f32_mask_broadcast:
   1246 ; CHECK:       ## %bb.0:
   1247 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
   1248 ; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to8}, %ymm0 {%k1}
   1249 ; CHECK-NEXT:    retq
   1250   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   1251   %ps = load float, float* %ptr
   1252   %pins = insertelement <8 x float> undef, float %ps, i32 0
   1253   %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
   1254   %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
   1255   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
   1256   ret <8 x float> %s
   1257 }
   1258 
   1259 define <8 x double> @ceil_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
   1260 ; CHECK-LABEL: ceil_v8f64_mask_broadcast:
   1261 ; CHECK:       ## %bb.0:
   1262 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   1263 ; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to8}, %zmm0 {%k1}
   1264 ; CHECK-NEXT:    retq
   1265   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   1266   %ps = load double, double* %ptr
   1267   %pins = insertelement <8 x double> undef, double %ps, i32 0
   1268   %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
   1269   %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
   1270   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
   1271   ret <8 x double> %s
   1272 }
   1273 
   1274 define <16 x float> @ceil_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
   1275 ; CHECK-LABEL: ceil_v16f32_mask_broadcast:
   1276 ; CHECK:       ## %bb.0:
   1277 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
   1278 ; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to16}, %zmm0 {%k1}
   1279 ; CHECK-NEXT:    retq
   1280   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   1281   %ps = load float, float* %ptr
   1282   %pins = insertelement <16 x float> undef, float %ps, i32 0
   1283   %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
   1284   %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
   1285   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
   1286   ret <16 x float> %s
   1287 }
   1288 
   1289 define <2 x double> @ceil_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) {
   1290 ; CHECK-LABEL: ceil_v2f64_maskz_broadcast:
   1291 ; CHECK:       ## %bb.0:
   1292 ; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
   1293 ; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to2}, %xmm0 {%k1} {z}
   1294 ; CHECK-NEXT:    retq
   1295   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   1296   %ps = load double, double* %ptr
   1297   %pins = insertelement <2 x double> undef, double %ps, i32 0
   1298   %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
   1299   %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
   1300   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
   1301   ret <2 x double> %s
   1302 }
   1303 
   1304 define <4 x float> @ceil_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) {
   1305 ; CHECK-LABEL: ceil_v4f32_maskz_broadcast:
   1306 ; CHECK:       ## %bb.0:
   1307 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
   1308 ; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to4}, %xmm0 {%k1} {z}
   1309 ; CHECK-NEXT:    retq
   1310   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   1311   %ps = load float, float* %ptr
   1312   %pins = insertelement <4 x float> undef, float %ps, i32 0
   1313   %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
   1314   %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
   1315   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
   1316   ret <4 x float> %s
   1317 }
   1318 
   1319 define <4 x double> @ceil_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) {
   1320 ; CHECK-LABEL: ceil_v4f64_maskz_broadcast:
   1321 ; CHECK:       ## %bb.0:
   1322 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
   1323 ; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to4}, %ymm0 {%k1} {z}
   1324 ; CHECK-NEXT:    retq
   1325   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   1326   %ps = load double, double* %ptr
   1327   %pins = insertelement <4 x double> undef, double %ps, i32 0
   1328   %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
   1329   %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
   1330   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
   1331   ret <4 x double> %s
   1332 }
   1333 
   1334 define <8 x float> @ceil_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) {
   1335 ; CHECK-LABEL: ceil_v8f32_maskz_broadcast:
   1336 ; CHECK:       ## %bb.0:
   1337 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
   1338 ; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to8}, %ymm0 {%k1} {z}
   1339 ; CHECK-NEXT:    retq
   1340   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   1341   %ps = load float, float* %ptr
   1342   %pins = insertelement <8 x float> undef, float %ps, i32 0
   1343   %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
   1344   %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
   1345   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
   1346   ret <8 x float> %s
   1347 }
   1348 
   1349 define <8 x double> @ceil_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) {
   1350 ; CHECK-LABEL: ceil_v8f64_maskz_broadcast:
   1351 ; CHECK:       ## %bb.0:
   1352 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
   1353 ; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to8}, %zmm0 {%k1} {z}
   1354 ; CHECK-NEXT:    retq
   1355   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   1356   %ps = load double, double* %ptr
   1357   %pins = insertelement <8 x double> undef, double %ps, i32 0
   1358   %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
   1359   %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
   1360   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
   1361   ret <8 x double> %s
   1362 }
   1363 
   1364 define <16 x float> @ceil_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) {
   1365 ; CHECK-LABEL: ceil_v16f32_maskz_broadcast:
   1366 ; CHECK:       ## %bb.0:
   1367 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
   1368 ; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to16}, %zmm0 {%k1} {z}
   1369 ; CHECK-NEXT:    retq
   1370   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   1371   %ps = load float, float* %ptr
   1372   %pins = insertelement <16 x float> undef, float %ps, i32 0
   1373   %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
   1374   %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
   1375   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
   1376   ret <16 x float> %s
   1377 }
   1378 
   1379 define <2 x double> @trunc_v2f64(<2 x double> %p) {
   1380 ; CHECK-LABEL: trunc_v2f64:
   1381 ; CHECK:       ## %bb.0:
   1382 ; CHECK-NEXT:    vroundpd $11, %xmm0, %xmm0
   1383 ; CHECK-NEXT:    retq
   1384   %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
   1385   ret <2 x double> %t
   1386 }
   1387 
   1388 define <4 x float> @trunc_v4f32(<4 x float> %p) {
   1389 ; CHECK-LABEL: trunc_v4f32:
   1390 ; CHECK:       ## %bb.0:
   1391 ; CHECK-NEXT:    vroundps $11, %xmm0, %xmm0
   1392 ; CHECK-NEXT:    retq
   1393   %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
   1394   ret <4 x float> %t
   1395 }
   1396 
   1397 define <4 x double> @trunc_v4f64(<4 x double> %p){
   1398 ; CHECK-LABEL: trunc_v4f64:
   1399 ; CHECK:       ## %bb.0:
   1400 ; CHECK-NEXT:    vroundpd $11, %ymm0, %ymm0
   1401 ; CHECK-NEXT:    retq
   1402   %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
   1403   ret <4 x double> %t
   1404 }
   1405 
   1406 define <8 x float> @trunc_v8f32(<8 x float> %p) {
   1407 ; CHECK-LABEL: trunc_v8f32:
   1408 ; CHECK:       ## %bb.0:
   1409 ; CHECK-NEXT:    vroundps $11, %ymm0, %ymm0
   1410 ; CHECK-NEXT:    retq
   1411   %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
   1412   ret <8 x float> %t
   1413 }
   1414 
   1415 define <8 x double> @trunc_v8f64(<8 x double> %p){
   1416 ; CHECK-LABEL: trunc_v8f64:
   1417 ; CHECK:       ## %bb.0:
   1418 ; CHECK-NEXT:    vrndscalepd $11, %zmm0, %zmm0
   1419 ; CHECK-NEXT:    retq
   1420   %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
   1421   ret <8 x double> %t
   1422 }
   1423 
   1424 define <16 x float> @trunc_v16f32(<16 x float> %p) {
   1425 ; CHECK-LABEL: trunc_v16f32:
   1426 ; CHECK:       ## %bb.0:
   1427 ; CHECK-NEXT:    vrndscaleps $11, %zmm0, %zmm0
   1428 ; CHECK-NEXT:    retq
   1429   %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
   1430   ret <16 x float> %t
   1431 }
   1432 
   1433 define <2 x double> @trunc_v2f64_load(<2 x double>* %ptr) {
   1434 ; CHECK-LABEL: trunc_v2f64_load:
   1435 ; CHECK:       ## %bb.0:
   1436 ; CHECK-NEXT:    vroundpd $11, (%rdi), %xmm0
   1437 ; CHECK-NEXT:    retq
   1438   %p = load <2 x double>, <2 x double>* %ptr
   1439   %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
   1440   ret <2 x double> %t
   1441 }
   1442 
   1443 define <4 x float> @trunc_v4f32_load(<4 x float>* %ptr) {
   1444 ; CHECK-LABEL: trunc_v4f32_load:
   1445 ; CHECK:       ## %bb.0:
   1446 ; CHECK-NEXT:    vroundps $11, (%rdi), %xmm0
   1447 ; CHECK-NEXT:    retq
   1448   %p = load <4 x float>, <4 x float>* %ptr
   1449   %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
   1450   ret <4 x float> %t
   1451 }
   1452 
   1453 define <4 x double> @trunc_v4f64_load(<4 x double>* %ptr){
   1454 ; CHECK-LABEL: trunc_v4f64_load:
   1455 ; CHECK:       ## %bb.0:
   1456 ; CHECK-NEXT:    vroundpd $11, (%rdi), %ymm0
   1457 ; CHECK-NEXT:    retq
   1458   %p = load <4 x double>, <4 x double>* %ptr
   1459   %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
   1460   ret <4 x double> %t
   1461 }
   1462 
   1463 define <8 x float> @trunc_v8f32_load(<8 x float>* %ptr) {
   1464 ; CHECK-LABEL: trunc_v8f32_load:
   1465 ; CHECK:       ## %bb.0:
   1466 ; CHECK-NEXT:    vroundps $11, (%rdi), %ymm0
   1467 ; CHECK-NEXT:    retq
   1468   %p = load <8 x float>, <8 x float>* %ptr
   1469   %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
   1470   ret <8 x float> %t
   1471 }
   1472 
   1473 define <8 x double> @trunc_v8f64_load(<8 x double>* %ptr){
   1474 ; CHECK-LABEL: trunc_v8f64_load:
   1475 ; CHECK:       ## %bb.0:
   1476 ; CHECK-NEXT:    vrndscalepd $11, (%rdi), %zmm0
   1477 ; CHECK-NEXT:    retq
   1478   %p = load <8 x double>, <8 x double>* %ptr
   1479   %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
   1480   ret <8 x double> %t
   1481 }
   1482 
   1483 define <16 x float> @trunc_v16f32_load(<16 x float>* %ptr) {
   1484 ; CHECK-LABEL: trunc_v16f32_load:
   1485 ; CHECK:       ## %bb.0:
   1486 ; CHECK-NEXT:    vrndscaleps $11, (%rdi), %zmm0
   1487 ; CHECK-NEXT:    retq
   1488   %p = load <16 x float>, <16 x float>* %ptr
   1489   %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
   1490   ret <16 x float> %t
   1491 }
   1492 
   1493 define <2 x double> @trunc_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) {
   1494 ; CHECK-LABEL: trunc_v2f64_mask:
   1495 ; CHECK:       ## %bb.0:
   1496 ; CHECK-NEXT:    vptestnmq %xmm2, %xmm2, %k1
   1497 ; CHECK-NEXT:    vrndscalepd $11, %xmm0, %xmm1 {%k1}
   1498 ; CHECK-NEXT:    vmovapd %xmm1, %xmm0
   1499 ; CHECK-NEXT:    retq
   1500   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   1501   %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
   1502   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
   1503   ret <2 x double> %s
   1504 }
   1505 
   1506 define <4 x float> @trunc_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) {
   1507 ; CHECK-LABEL: trunc_v4f32_mask:
   1508 ; CHECK:       ## %bb.0:
   1509 ; CHECK-NEXT:    vptestnmd %xmm2, %xmm2, %k1
   1510 ; CHECK-NEXT:    vrndscaleps $11, %xmm0, %xmm1 {%k1}
   1511 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
   1512 ; CHECK-NEXT:    retq
   1513   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   1514   %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
   1515   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
   1516   ret <4 x float> %s
   1517 }
   1518 
   1519 define <4 x double> @trunc_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) {
   1520 ; CHECK-LABEL: trunc_v4f64_mask:
   1521 ; CHECK:       ## %bb.0:
   1522 ; CHECK-NEXT:    vptestnmq %ymm2, %ymm2, %k1
   1523 ; CHECK-NEXT:    vrndscalepd $11, %ymm0, %ymm1 {%k1}
   1524 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
   1525 ; CHECK-NEXT:    retq
   1526   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   1527   %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
   1528   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
   1529   ret <4 x double> %s
   1530 }
   1531 
   1532 define <8 x float> @trunc_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) {
   1533 ; CHECK-LABEL: trunc_v8f32_mask:
   1534 ; CHECK:       ## %bb.0:
   1535 ; CHECK-NEXT:    vptestnmd %ymm2, %ymm2, %k1
   1536 ; CHECK-NEXT:    vrndscaleps $11, %ymm0, %ymm1 {%k1}
   1537 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
   1538 ; CHECK-NEXT:    retq
   1539   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   1540   %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
   1541   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
   1542   ret <8 x float> %s
   1543 }
   1544 
   1545 define <8 x double> @trunc_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) {
   1546 ; CHECK-LABEL: trunc_v8f64_mask:
   1547 ; CHECK:       ## %bb.0:
   1548 ; CHECK-NEXT:    vptestnmq %zmm2, %zmm2, %k1
   1549 ; CHECK-NEXT:    vrndscalepd $11, %zmm0, %zmm1 {%k1}
   1550 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
   1551 ; CHECK-NEXT:    retq
   1552   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   1553   %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
   1554   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
   1555   ret <8 x double> %s
   1556 }
   1557 
   1558 define <16 x float> @trunc_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) {
   1559 ; CHECK-LABEL: trunc_v16f32_mask:
   1560 ; CHECK:       ## %bb.0:
   1561 ; CHECK-NEXT:    vptestnmd %zmm2, %zmm2, %k1
   1562 ; CHECK-NEXT:    vrndscaleps $11, %zmm0, %zmm1 {%k1}
   1563 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
   1564 ; CHECK-NEXT:    retq
   1565   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   1566   %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
   1567   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
   1568   ret <16 x float> %s
   1569 }
   1570 
   1571 define <2 x double> @trunc_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) {
   1572 ; CHECK-LABEL: trunc_v2f64_maskz:
   1573 ; CHECK:       ## %bb.0:
   1574 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
   1575 ; CHECK-NEXT:    vrndscalepd $11, %xmm0, %xmm0 {%k1} {z}
   1576 ; CHECK-NEXT:    retq
   1577   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   1578   %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
   1579   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
   1580   ret <2 x double> %s
   1581 }
   1582 
   1583 define <4 x float> @trunc_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) {
   1584 ; CHECK-LABEL: trunc_v4f32_maskz:
   1585 ; CHECK:       ## %bb.0:
   1586 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
   1587 ; CHECK-NEXT:    vrndscaleps $11, %xmm0, %xmm0 {%k1} {z}
   1588 ; CHECK-NEXT:    retq
   1589   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   1590   %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
   1591   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
   1592   ret <4 x float> %s
   1593 }
   1594 
   1595 define <4 x double> @trunc_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) {
   1596 ; CHECK-LABEL: trunc_v4f64_maskz:
   1597 ; CHECK:       ## %bb.0:
   1598 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   1599 ; CHECK-NEXT:    vrndscalepd $11, %ymm0, %ymm0 {%k1} {z}
   1600 ; CHECK-NEXT:    retq
   1601   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   1602   %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
   1603   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
   1604   ret <4 x double> %s
   1605 }
   1606 
   1607 define <8 x float> @trunc_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) {
   1608 ; CHECK-LABEL: trunc_v8f32_maskz:
   1609 ; CHECK:       ## %bb.0:
   1610 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
   1611 ; CHECK-NEXT:    vrndscaleps $11, %ymm0, %ymm0 {%k1} {z}
   1612 ; CHECK-NEXT:    retq
   1613   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   1614   %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
   1615   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
   1616   ret <8 x float> %s
   1617 }
   1618 
   1619 define <8 x double> @trunc_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) {
   1620 ; CHECK-LABEL: trunc_v8f64_maskz:
   1621 ; CHECK:       ## %bb.0:
   1622 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   1623 ; CHECK-NEXT:    vrndscalepd $11, %zmm0, %zmm0 {%k1} {z}
   1624 ; CHECK-NEXT:    retq
   1625   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   1626   %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
   1627   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
   1628   ret <8 x double> %s
   1629 }
   1630 
   1631 define <16 x float> @trunc_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) {
   1632 ; CHECK-LABEL: trunc_v16f32_maskz:
   1633 ; CHECK:       ## %bb.0:
   1634 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
   1635 ; CHECK-NEXT:    vrndscaleps $11, %zmm0, %zmm0 {%k1} {z}
   1636 ; CHECK-NEXT:    retq
   1637   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   1638   %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
   1639   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
   1640   ret <16 x float> %s
   1641 }
   1642 
   1643 define <2 x double> @trunc_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
   1644 ; CHECK-LABEL: trunc_v2f64_mask_load:
   1645 ; CHECK:       ## %bb.0:
   1646 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
   1647 ; CHECK-NEXT:    vrndscalepd $11, (%rdi), %xmm0 {%k1}
   1648 ; CHECK-NEXT:    retq
   1649   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   1650   %p = load <2 x double>, <2 x double>* %ptr
   1651   %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
   1652   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
   1653   ret <2 x double> %s
   1654 }
   1655 
   1656 define <4 x float> @trunc_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
   1657 ; CHECK-LABEL: trunc_v4f32_mask_load:
   1658 ; CHECK:       ## %bb.0:
   1659 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
   1660 ; CHECK-NEXT:    vrndscaleps $11, (%rdi), %xmm0 {%k1}
   1661 ; CHECK-NEXT:    retq
   1662   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   1663   %p = load <4 x float>, <4 x float>* %ptr
   1664   %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
   1665   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
   1666   ret <4 x float> %s
   1667 }
   1668 
   1669 define <4 x double> @trunc_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
   1670 ; CHECK-LABEL: trunc_v4f64_mask_load:
   1671 ; CHECK:       ## %bb.0:
   1672 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   1673 ; CHECK-NEXT:    vrndscalepd $11, (%rdi), %ymm0 {%k1}
   1674 ; CHECK-NEXT:    retq
   1675   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   1676   %p = load <4 x double>, <4 x double>* %ptr
   1677   %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
   1678   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
   1679   ret <4 x double> %s
   1680 }
   1681 
   1682 define <8 x float> @trunc_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
   1683 ; CHECK-LABEL: trunc_v8f32_mask_load:
   1684 ; CHECK:       ## %bb.0:
   1685 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
   1686 ; CHECK-NEXT:    vrndscaleps $11, (%rdi), %ymm0 {%k1}
   1687 ; CHECK-NEXT:    retq
   1688   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   1689   %p = load <8 x float>, <8 x float>* %ptr
   1690   %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
   1691   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
   1692   ret <8 x float> %s
   1693 }
   1694 
   1695 define <8 x double> @trunc_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
   1696 ; CHECK-LABEL: trunc_v8f64_mask_load:
   1697 ; CHECK:       ## %bb.0:
   1698 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   1699 ; CHECK-NEXT:    vrndscalepd $11, (%rdi), %zmm0 {%k1}
   1700 ; CHECK-NEXT:    retq
   1701   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   1702   %p = load <8 x double>, <8 x double>* %ptr
   1703   %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
   1704   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
   1705   ret <8 x double> %s
   1706 }
   1707 
   1708 define <16 x float> @trunc_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
   1709 ; CHECK-LABEL: trunc_v16f32_mask_load:
   1710 ; CHECK:       ## %bb.0:
   1711 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
   1712 ; CHECK-NEXT:    vrndscaleps $11, (%rdi), %zmm0 {%k1}
   1713 ; CHECK-NEXT:    retq
   1714   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   1715   %p = load <16 x float>, <16 x float>* %ptr
   1716   %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
   1717   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
   1718   ret <16 x float> %s
   1719 }
   1720 
   1721 define <2 x double> @trunc_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) {
   1722 ; CHECK-LABEL: trunc_v2f64_maskz_load:
   1723 ; CHECK:       ## %bb.0:
   1724 ; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
   1725 ; CHECK-NEXT:    vrndscalepd $11, (%rdi), %xmm0 {%k1} {z}
   1726 ; CHECK-NEXT:    retq
   1727   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   1728   %p = load <2 x double>, <2 x double>* %ptr
   1729   %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
   1730   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
   1731   ret <2 x double> %s
   1732 }
   1733 
   1734 define <4 x float> @trunc_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) {
   1735 ; CHECK-LABEL: trunc_v4f32_maskz_load:
   1736 ; CHECK:       ## %bb.0:
   1737 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
   1738 ; CHECK-NEXT:    vrndscaleps $11, (%rdi), %xmm0 {%k1} {z}
   1739 ; CHECK-NEXT:    retq
   1740   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   1741   %p = load <4 x float>, <4 x float>* %ptr
   1742   %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
   1743   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
   1744   ret <4 x float> %s
   1745 }
   1746 
   1747 define <4 x double> @trunc_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) {
   1748 ; CHECK-LABEL: trunc_v4f64_maskz_load:
   1749 ; CHECK:       ## %bb.0:
   1750 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
   1751 ; CHECK-NEXT:    vrndscalepd $11, (%rdi), %ymm0 {%k1} {z}
   1752 ; CHECK-NEXT:    retq
   1753   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   1754   %p = load <4 x double>, <4 x double>* %ptr
   1755   %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
   1756   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
   1757   ret <4 x double> %s
   1758 }
   1759 
   1760 define <8 x float> @trunc_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) {
   1761 ; CHECK-LABEL: trunc_v8f32_maskz_load:
   1762 ; CHECK:       ## %bb.0:
   1763 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
   1764 ; CHECK-NEXT:    vrndscaleps $11, (%rdi), %ymm0 {%k1} {z}
   1765 ; CHECK-NEXT:    retq
   1766   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   1767   %p = load <8 x float>, <8 x float>* %ptr
   1768   %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
   1769   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
   1770   ret <8 x float> %s
   1771 }
   1772 
   1773 define <8 x double> @trunc_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) {
   1774 ; CHECK-LABEL: trunc_v8f64_maskz_load:
   1775 ; CHECK:       ## %bb.0:
   1776 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
   1777 ; CHECK-NEXT:    vrndscalepd $11, (%rdi), %zmm0 {%k1} {z}
   1778 ; CHECK-NEXT:    retq
   1779   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   1780   %p = load <8 x double>, <8 x double>* %ptr
   1781   %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
   1782   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
   1783   ret <8 x double> %s
   1784 }
   1785 
   1786 define <16 x float> @trunc_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) {
   1787 ; CHECK-LABEL: trunc_v16f32_maskz_load:
   1788 ; CHECK:       ## %bb.0:
   1789 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
   1790 ; CHECK-NEXT:    vrndscaleps $11, (%rdi), %zmm0 {%k1} {z}
   1791 ; CHECK-NEXT:    retq
   1792   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   1793   %p = load <16 x float>, <16 x float>* %ptr
   1794   %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
   1795   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
   1796   ret <16 x float> %s
   1797 }
   1798 
   1799 define <2 x double> @trunc_v2f64_broadcast(double* %ptr) {
   1800 ; CHECK-LABEL: trunc_v2f64_broadcast:
   1801 ; CHECK:       ## %bb.0:
   1802 ; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to2}, %xmm0
   1803 ; CHECK-NEXT:    retq
   1804   %ps = load double, double* %ptr
   1805   %pins = insertelement <2 x double> undef, double %ps, i32 0
   1806   %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
   1807   %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
   1808   ret <2 x double> %t
   1809 }
   1810 
   1811 define <4 x float> @trunc_v4f32_broadcast(float* %ptr) {
   1812 ; CHECK-LABEL: trunc_v4f32_broadcast:
   1813 ; CHECK:       ## %bb.0:
   1814 ; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to4}, %xmm0
   1815 ; CHECK-NEXT:    retq
   1816   %ps = load float, float* %ptr
   1817   %pins = insertelement <4 x float> undef, float %ps, i32 0
   1818   %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
   1819   %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
   1820   ret <4 x float> %t
   1821 }
   1822 
   1823 define <4 x double> @trunc_v4f64_broadcast(double* %ptr){
   1824 ; CHECK-LABEL: trunc_v4f64_broadcast:
   1825 ; CHECK:       ## %bb.0:
   1826 ; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to4}, %ymm0
   1827 ; CHECK-NEXT:    retq
   1828   %ps = load double, double* %ptr
   1829   %pins = insertelement <4 x double> undef, double %ps, i32 0
   1830   %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
   1831   %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
   1832   ret <4 x double> %t
   1833 }
   1834 
   1835 define <8 x float> @trunc_v8f32_broadcast(float* %ptr) {
   1836 ; CHECK-LABEL: trunc_v8f32_broadcast:
   1837 ; CHECK:       ## %bb.0:
   1838 ; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to8}, %ymm0
   1839 ; CHECK-NEXT:    retq
   1840   %ps = load float, float* %ptr
   1841   %pins = insertelement <8 x float> undef, float %ps, i32 0
   1842   %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
   1843   %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
   1844   ret <8 x float> %t
   1845 }
   1846 
   1847 define <8 x double> @trunc_v8f64_broadcast(double* %ptr){
   1848 ; CHECK-LABEL: trunc_v8f64_broadcast:
   1849 ; CHECK:       ## %bb.0:
   1850 ; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to8}, %zmm0
   1851 ; CHECK-NEXT:    retq
   1852   %ps = load double, double* %ptr
   1853   %pins = insertelement <8 x double> undef, double %ps, i32 0
   1854   %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
   1855   %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
   1856   ret <8 x double> %t
   1857 }
   1858 
   1859 define <16 x float> @trunc_v16f32_broadcast(float* %ptr) {
   1860 ; CHECK-LABEL: trunc_v16f32_broadcast:
   1861 ; CHECK:       ## %bb.0:
   1862 ; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to16}, %zmm0
   1863 ; CHECK-NEXT:    retq
   1864   %ps = load float, float* %ptr
   1865   %pins = insertelement <16 x float> undef, float %ps, i32 0
   1866   %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
   1867   %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
   1868   ret <16 x float> %t
   1869 }
   1870 
   1871 define <2 x double> @trunc_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
   1872 ; CHECK-LABEL: trunc_v2f64_mask_broadcast:
   1873 ; CHECK:       ## %bb.0:
   1874 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
   1875 ; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to2}, %xmm0 {%k1}
   1876 ; CHECK-NEXT:    retq
   1877   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   1878   %ps = load double, double* %ptr
   1879   %pins = insertelement <2 x double> undef, double %ps, i32 0
   1880   %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
   1881   %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
   1882   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
   1883   ret <2 x double> %s
   1884 }
   1885 
   1886 define <4 x float> @trunc_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
   1887 ; CHECK-LABEL: trunc_v4f32_mask_broadcast:
   1888 ; CHECK:       ## %bb.0:
   1889 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
   1890 ; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to4}, %xmm0 {%k1}
   1891 ; CHECK-NEXT:    retq
   1892   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   1893   %ps = load float, float* %ptr
   1894   %pins = insertelement <4 x float> undef, float %ps, i32 0
   1895   %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
   1896   %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
   1897   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
   1898   ret <4 x float> %s
   1899 }
   1900 
   1901 define <4 x double> @trunc_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
   1902 ; CHECK-LABEL: trunc_v4f64_mask_broadcast:
   1903 ; CHECK:       ## %bb.0:
   1904 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   1905 ; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to4}, %ymm0 {%k1}
   1906 ; CHECK-NEXT:    retq
   1907   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   1908   %ps = load double, double* %ptr
   1909   %pins = insertelement <4 x double> undef, double %ps, i32 0
   1910   %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
   1911   %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
   1912   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
   1913   ret <4 x double> %s
   1914 }
   1915 
   1916 define <8 x float> @trunc_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
   1917 ; CHECK-LABEL: trunc_v8f32_mask_broadcast:
   1918 ; CHECK:       ## %bb.0:
   1919 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
   1920 ; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to8}, %ymm0 {%k1}
   1921 ; CHECK-NEXT:    retq
   1922   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   1923   %ps = load float, float* %ptr
   1924   %pins = insertelement <8 x float> undef, float %ps, i32 0
   1925   %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
   1926   %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
   1927   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
   1928   ret <8 x float> %s
   1929 }
   1930 
   1931 define <8 x double> @trunc_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
   1932 ; CHECK-LABEL: trunc_v8f64_mask_broadcast:
   1933 ; CHECK:       ## %bb.0:
   1934 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   1935 ; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to8}, %zmm0 {%k1}
   1936 ; CHECK-NEXT:    retq
   1937   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   1938   %ps = load double, double* %ptr
   1939   %pins = insertelement <8 x double> undef, double %ps, i32 0
   1940   %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
   1941   %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
   1942   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
   1943   ret <8 x double> %s
   1944 }
   1945 
   1946 define <16 x float> @trunc_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
   1947 ; CHECK-LABEL: trunc_v16f32_mask_broadcast:
   1948 ; CHECK:       ## %bb.0:
   1949 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
   1950 ; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to16}, %zmm0 {%k1}
   1951 ; CHECK-NEXT:    retq
   1952   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   1953   %ps = load float, float* %ptr
   1954   %pins = insertelement <16 x float> undef, float %ps, i32 0
   1955   %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
   1956   %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
   1957   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
   1958   ret <16 x float> %s
   1959 }
   1960 
   1961 define <2 x double> @trunc_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) {
   1962 ; CHECK-LABEL: trunc_v2f64_maskz_broadcast:
   1963 ; CHECK:       ## %bb.0:
   1964 ; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
   1965 ; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to2}, %xmm0 {%k1} {z}
   1966 ; CHECK-NEXT:    retq
   1967   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   1968   %ps = load double, double* %ptr
   1969   %pins = insertelement <2 x double> undef, double %ps, i32 0
   1970   %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
   1971   %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
   1972   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
   1973   ret <2 x double> %s
   1974 }
   1975 
   1976 define <4 x float> @trunc_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) {
   1977 ; CHECK-LABEL: trunc_v4f32_maskz_broadcast:
   1978 ; CHECK:       ## %bb.0:
   1979 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
   1980 ; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to4}, %xmm0 {%k1} {z}
   1981 ; CHECK-NEXT:    retq
   1982   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   1983   %ps = load float, float* %ptr
   1984   %pins = insertelement <4 x float> undef, float %ps, i32 0
   1985   %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
   1986   %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
   1987   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
   1988   ret <4 x float> %s
   1989 }
   1990 
   1991 define <4 x double> @trunc_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) {
   1992 ; CHECK-LABEL: trunc_v4f64_maskz_broadcast:
   1993 ; CHECK:       ## %bb.0:
   1994 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
   1995 ; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to4}, %ymm0 {%k1} {z}
   1996 ; CHECK-NEXT:    retq
   1997   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   1998   %ps = load double, double* %ptr
   1999   %pins = insertelement <4 x double> undef, double %ps, i32 0
   2000   %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
   2001   %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
   2002   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
   2003   ret <4 x double> %s
   2004 }
   2005 
   2006 define <8 x float> @trunc_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) {
   2007 ; CHECK-LABEL: trunc_v8f32_maskz_broadcast:
   2008 ; CHECK:       ## %bb.0:
   2009 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
   2010 ; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to8}, %ymm0 {%k1} {z}
   2011 ; CHECK-NEXT:    retq
   2012   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   2013   %ps = load float, float* %ptr
   2014   %pins = insertelement <8 x float> undef, float %ps, i32 0
   2015   %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
   2016   %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
   2017   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
   2018   ret <8 x float> %s
   2019 }
   2020 
   2021 define <8 x double> @trunc_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) {
   2022 ; CHECK-LABEL: trunc_v8f64_maskz_broadcast:
   2023 ; CHECK:       ## %bb.0:
   2024 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
   2025 ; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to8}, %zmm0 {%k1} {z}
   2026 ; CHECK-NEXT:    retq
   2027   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   2028   %ps = load double, double* %ptr
   2029   %pins = insertelement <8 x double> undef, double %ps, i32 0
   2030   %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
   2031   %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
   2032   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
   2033   ret <8 x double> %s
   2034 }
   2035 
   2036 define <16 x float> @trunc_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) {
   2037 ; CHECK-LABEL: trunc_v16f32_maskz_broadcast:
   2038 ; CHECK:       ## %bb.0:
   2039 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
   2040 ; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to16}, %zmm0 {%k1} {z}
   2041 ; CHECK-NEXT:    retq
   2042   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   2043   %ps = load float, float* %ptr
   2044   %pins = insertelement <16 x float> undef, float %ps, i32 0
   2045   %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
   2046   %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
   2047   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
   2048   ret <16 x float> %s
   2049 }
   2050 
   2051 define <2 x double> @rint_v2f64(<2 x double> %p) {
   2052 ; CHECK-LABEL: rint_v2f64:
   2053 ; CHECK:       ## %bb.0:
   2054 ; CHECK-NEXT:    vroundpd $4, %xmm0, %xmm0
   2055 ; CHECK-NEXT:    retq
   2056   %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
   2057   ret <2 x double> %t
   2058 }
   2059 
   2060 define <4 x float> @rint_v4f32(<4 x float> %p) {
   2061 ; CHECK-LABEL: rint_v4f32:
   2062 ; CHECK:       ## %bb.0:
   2063 ; CHECK-NEXT:    vroundps $4, %xmm0, %xmm0
   2064 ; CHECK-NEXT:    retq
   2065   %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
   2066   ret <4 x float> %t
   2067 }
   2068 
   2069 define <4 x double> @rint_v4f64(<4 x double> %p){
   2070 ; CHECK-LABEL: rint_v4f64:
   2071 ; CHECK:       ## %bb.0:
   2072 ; CHECK-NEXT:    vroundpd $4, %ymm0, %ymm0
   2073 ; CHECK-NEXT:    retq
   2074   %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
   2075   ret <4 x double> %t
   2076 }
   2077 
   2078 define <8 x float> @rint_v8f32(<8 x float> %p) {
   2079 ; CHECK-LABEL: rint_v8f32:
   2080 ; CHECK:       ## %bb.0:
   2081 ; CHECK-NEXT:    vroundps $4, %ymm0, %ymm0
   2082 ; CHECK-NEXT:    retq
   2083   %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
   2084   ret <8 x float> %t
   2085 }
   2086 
   2087 define <8 x double> @rint_v8f64(<8 x double> %p){
   2088 ; CHECK-LABEL: rint_v8f64:
   2089 ; CHECK:       ## %bb.0:
   2090 ; CHECK-NEXT:    vrndscalepd $4, %zmm0, %zmm0
   2091 ; CHECK-NEXT:    retq
   2092   %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
   2093   ret <8 x double> %t
   2094 }
   2095 
   2096 define <16 x float> @rint_v16f32(<16 x float> %p) {
   2097 ; CHECK-LABEL: rint_v16f32:
   2098 ; CHECK:       ## %bb.0:
   2099 ; CHECK-NEXT:    vrndscaleps $4, %zmm0, %zmm0
   2100 ; CHECK-NEXT:    retq
   2101   %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
   2102   ret <16 x float> %t
   2103 }
   2104 
   2105 define <2 x double> @rint_v2f64_load(<2 x double>* %ptr) {
   2106 ; CHECK-LABEL: rint_v2f64_load:
   2107 ; CHECK:       ## %bb.0:
   2108 ; CHECK-NEXT:    vroundpd $4, (%rdi), %xmm0
   2109 ; CHECK-NEXT:    retq
   2110   %p = load <2 x double>, <2 x double>* %ptr
   2111   %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
   2112   ret <2 x double> %t
   2113 }
   2114 
   2115 define <4 x float> @rint_v4f32_load(<4 x float>* %ptr) {
   2116 ; CHECK-LABEL: rint_v4f32_load:
   2117 ; CHECK:       ## %bb.0:
   2118 ; CHECK-NEXT:    vroundps $4, (%rdi), %xmm0
   2119 ; CHECK-NEXT:    retq
   2120   %p = load <4 x float>, <4 x float>* %ptr
   2121   %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
   2122   ret <4 x float> %t
   2123 }
   2124 
   2125 define <4 x double> @rint_v4f64_load(<4 x double>* %ptr){
   2126 ; CHECK-LABEL: rint_v4f64_load:
   2127 ; CHECK:       ## %bb.0:
   2128 ; CHECK-NEXT:    vroundpd $4, (%rdi), %ymm0
   2129 ; CHECK-NEXT:    retq
   2130   %p = load <4 x double>, <4 x double>* %ptr
   2131   %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
   2132   ret <4 x double> %t
   2133 }
   2134 
   2135 define <8 x float> @rint_v8f32_load(<8 x float>* %ptr) {
   2136 ; CHECK-LABEL: rint_v8f32_load:
   2137 ; CHECK:       ## %bb.0:
   2138 ; CHECK-NEXT:    vroundps $4, (%rdi), %ymm0
   2139 ; CHECK-NEXT:    retq
   2140   %p = load <8 x float>, <8 x float>* %ptr
   2141   %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
   2142   ret <8 x float> %t
   2143 }
   2144 
   2145 define <8 x double> @rint_v8f64_load(<8 x double>* %ptr){
   2146 ; CHECK-LABEL: rint_v8f64_load:
   2147 ; CHECK:       ## %bb.0:
   2148 ; CHECK-NEXT:    vrndscalepd $4, (%rdi), %zmm0
   2149 ; CHECK-NEXT:    retq
   2150   %p = load <8 x double>, <8 x double>* %ptr
   2151   %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
   2152   ret <8 x double> %t
   2153 }
   2154 
   2155 define <16 x float> @rint_v16f32_load(<16 x float>* %ptr) {
   2156 ; CHECK-LABEL: rint_v16f32_load:
   2157 ; CHECK:       ## %bb.0:
   2158 ; CHECK-NEXT:    vrndscaleps $4, (%rdi), %zmm0
   2159 ; CHECK-NEXT:    retq
   2160   %p = load <16 x float>, <16 x float>* %ptr
   2161   %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
   2162   ret <16 x float> %t
   2163 }
   2164 
   2165 define <2 x double> @rint_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) {
   2166 ; CHECK-LABEL: rint_v2f64_mask:
   2167 ; CHECK:       ## %bb.0:
   2168 ; CHECK-NEXT:    vptestnmq %xmm2, %xmm2, %k1
   2169 ; CHECK-NEXT:    vrndscalepd $4, %xmm0, %xmm1 {%k1}
   2170 ; CHECK-NEXT:    vmovapd %xmm1, %xmm0
   2171 ; CHECK-NEXT:    retq
   2172   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   2173   %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
   2174   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
   2175   ret <2 x double> %s
   2176 }
   2177 
   2178 define <4 x float> @rint_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) {
   2179 ; CHECK-LABEL: rint_v4f32_mask:
   2180 ; CHECK:       ## %bb.0:
   2181 ; CHECK-NEXT:    vptestnmd %xmm2, %xmm2, %k1
   2182 ; CHECK-NEXT:    vrndscaleps $4, %xmm0, %xmm1 {%k1}
   2183 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
   2184 ; CHECK-NEXT:    retq
   2185   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   2186   %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
   2187   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
   2188   ret <4 x float> %s
   2189 }
   2190 
   2191 define <4 x double> @rint_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) {
   2192 ; CHECK-LABEL: rint_v4f64_mask:
   2193 ; CHECK:       ## %bb.0:
   2194 ; CHECK-NEXT:    vptestnmq %ymm2, %ymm2, %k1
   2195 ; CHECK-NEXT:    vrndscalepd $4, %ymm0, %ymm1 {%k1}
   2196 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
   2197 ; CHECK-NEXT:    retq
   2198   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   2199   %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
   2200   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
   2201   ret <4 x double> %s
   2202 }
   2203 
   2204 define <8 x float> @rint_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) {
   2205 ; CHECK-LABEL: rint_v8f32_mask:
   2206 ; CHECK:       ## %bb.0:
   2207 ; CHECK-NEXT:    vptestnmd %ymm2, %ymm2, %k1
   2208 ; CHECK-NEXT:    vrndscaleps $4, %ymm0, %ymm1 {%k1}
   2209 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
   2210 ; CHECK-NEXT:    retq
   2211   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   2212   %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
   2213   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
   2214   ret <8 x float> %s
   2215 }
   2216 
   2217 define <8 x double> @rint_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) {
   2218 ; CHECK-LABEL: rint_v8f64_mask:
   2219 ; CHECK:       ## %bb.0:
   2220 ; CHECK-NEXT:    vptestnmq %zmm2, %zmm2, %k1
   2221 ; CHECK-NEXT:    vrndscalepd $4, %zmm0, %zmm1 {%k1}
   2222 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
   2223 ; CHECK-NEXT:    retq
   2224   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   2225   %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
   2226   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
   2227   ret <8 x double> %s
   2228 }
   2229 
   2230 define <16 x float> @rint_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) {
   2231 ; CHECK-LABEL: rint_v16f32_mask:
   2232 ; CHECK:       ## %bb.0:
   2233 ; CHECK-NEXT:    vptestnmd %zmm2, %zmm2, %k1
   2234 ; CHECK-NEXT:    vrndscaleps $4, %zmm0, %zmm1 {%k1}
   2235 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
   2236 ; CHECK-NEXT:    retq
   2237   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   2238   %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
   2239   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
   2240   ret <16 x float> %s
   2241 }
   2242 
   2243 define <2 x double> @rint_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) {
   2244 ; CHECK-LABEL: rint_v2f64_maskz:
   2245 ; CHECK:       ## %bb.0:
   2246 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
   2247 ; CHECK-NEXT:    vrndscalepd $4, %xmm0, %xmm0 {%k1} {z}
   2248 ; CHECK-NEXT:    retq
   2249   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   2250   %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
   2251   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
   2252   ret <2 x double> %s
   2253 }
   2254 
   2255 define <4 x float> @rint_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) {
   2256 ; CHECK-LABEL: rint_v4f32_maskz:
   2257 ; CHECK:       ## %bb.0:
   2258 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
   2259 ; CHECK-NEXT:    vrndscaleps $4, %xmm0, %xmm0 {%k1} {z}
   2260 ; CHECK-NEXT:    retq
   2261   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   2262   %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
   2263   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
   2264   ret <4 x float> %s
   2265 }
   2266 
   2267 define <4 x double> @rint_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) {
   2268 ; CHECK-LABEL: rint_v4f64_maskz:
   2269 ; CHECK:       ## %bb.0:
   2270 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   2271 ; CHECK-NEXT:    vrndscalepd $4, %ymm0, %ymm0 {%k1} {z}
   2272 ; CHECK-NEXT:    retq
   2273   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   2274   %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
   2275   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
   2276   ret <4 x double> %s
   2277 }
   2278 
   2279 define <8 x float> @rint_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) {
   2280 ; CHECK-LABEL: rint_v8f32_maskz:
   2281 ; CHECK:       ## %bb.0:
   2282 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
   2283 ; CHECK-NEXT:    vrndscaleps $4, %ymm0, %ymm0 {%k1} {z}
   2284 ; CHECK-NEXT:    retq
   2285   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   2286   %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
   2287   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
   2288   ret <8 x float> %s
   2289 }
   2290 
   2291 define <8 x double> @rint_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) {
   2292 ; CHECK-LABEL: rint_v8f64_maskz:
   2293 ; CHECK:       ## %bb.0:
   2294 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   2295 ; CHECK-NEXT:    vrndscalepd $4, %zmm0, %zmm0 {%k1} {z}
   2296 ; CHECK-NEXT:    retq
   2297   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   2298   %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
   2299   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
   2300   ret <8 x double> %s
   2301 }
   2302 
   2303 define <16 x float> @rint_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) {
   2304 ; CHECK-LABEL: rint_v16f32_maskz:
   2305 ; CHECK:       ## %bb.0:
   2306 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
   2307 ; CHECK-NEXT:    vrndscaleps $4, %zmm0, %zmm0 {%k1} {z}
   2308 ; CHECK-NEXT:    retq
   2309   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   2310   %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
   2311   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
   2312   ret <16 x float> %s
   2313 }
   2314 
   2315 define <2 x double> @rint_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
   2316 ; CHECK-LABEL: rint_v2f64_mask_load:
   2317 ; CHECK:       ## %bb.0:
   2318 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
   2319 ; CHECK-NEXT:    vrndscalepd $4, (%rdi), %xmm0 {%k1}
   2320 ; CHECK-NEXT:    retq
   2321   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   2322   %p = load <2 x double>, <2 x double>* %ptr
   2323   %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
   2324   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
   2325   ret <2 x double> %s
   2326 }
   2327 
   2328 define <4 x float> @rint_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
   2329 ; CHECK-LABEL: rint_v4f32_mask_load:
   2330 ; CHECK:       ## %bb.0:
   2331 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
   2332 ; CHECK-NEXT:    vrndscaleps $4, (%rdi), %xmm0 {%k1}
   2333 ; CHECK-NEXT:    retq
   2334   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   2335   %p = load <4 x float>, <4 x float>* %ptr
   2336   %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
   2337   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
   2338   ret <4 x float> %s
   2339 }
   2340 
   2341 define <4 x double> @rint_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
   2342 ; CHECK-LABEL: rint_v4f64_mask_load:
   2343 ; CHECK:       ## %bb.0:
   2344 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   2345 ; CHECK-NEXT:    vrndscalepd $4, (%rdi), %ymm0 {%k1}
   2346 ; CHECK-NEXT:    retq
   2347   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   2348   %p = load <4 x double>, <4 x double>* %ptr
   2349   %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
   2350   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
   2351   ret <4 x double> %s
   2352 }
   2353 
   2354 define <8 x float> @rint_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
   2355 ; CHECK-LABEL: rint_v8f32_mask_load:
   2356 ; CHECK:       ## %bb.0:
   2357 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
   2358 ; CHECK-NEXT:    vrndscaleps $4, (%rdi), %ymm0 {%k1}
   2359 ; CHECK-NEXT:    retq
   2360   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   2361   %p = load <8 x float>, <8 x float>* %ptr
   2362   %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
   2363   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
   2364   ret <8 x float> %s
   2365 }
   2366 
   2367 define <8 x double> @rint_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
   2368 ; CHECK-LABEL: rint_v8f64_mask_load:
   2369 ; CHECK:       ## %bb.0:
   2370 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   2371 ; CHECK-NEXT:    vrndscalepd $4, (%rdi), %zmm0 {%k1}
   2372 ; CHECK-NEXT:    retq
   2373   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   2374   %p = load <8 x double>, <8 x double>* %ptr
   2375   %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
   2376   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
   2377   ret <8 x double> %s
   2378 }
   2379 
   2380 define <16 x float> @rint_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
   2381 ; CHECK-LABEL: rint_v16f32_mask_load:
   2382 ; CHECK:       ## %bb.0:
   2383 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
   2384 ; CHECK-NEXT:    vrndscaleps $4, (%rdi), %zmm0 {%k1}
   2385 ; CHECK-NEXT:    retq
   2386   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   2387   %p = load <16 x float>, <16 x float>* %ptr
   2388   %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
   2389   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
   2390   ret <16 x float> %s
   2391 }
   2392 
   2393 define <2 x double> @rint_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) {
   2394 ; CHECK-LABEL: rint_v2f64_maskz_load:
   2395 ; CHECK:       ## %bb.0:
   2396 ; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
   2397 ; CHECK-NEXT:    vrndscalepd $4, (%rdi), %xmm0 {%k1} {z}
   2398 ; CHECK-NEXT:    retq
   2399   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   2400   %p = load <2 x double>, <2 x double>* %ptr
   2401   %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
   2402   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
   2403   ret <2 x double> %s
   2404 }
   2405 
   2406 define <4 x float> @rint_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) {
   2407 ; CHECK-LABEL: rint_v4f32_maskz_load:
   2408 ; CHECK:       ## %bb.0:
   2409 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
   2410 ; CHECK-NEXT:    vrndscaleps $4, (%rdi), %xmm0 {%k1} {z}
   2411 ; CHECK-NEXT:    retq
   2412   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   2413   %p = load <4 x float>, <4 x float>* %ptr
   2414   %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
   2415   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
   2416   ret <4 x float> %s
   2417 }
   2418 
   2419 define <4 x double> @rint_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) {
   2420 ; CHECK-LABEL: rint_v4f64_maskz_load:
   2421 ; CHECK:       ## %bb.0:
   2422 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
   2423 ; CHECK-NEXT:    vrndscalepd $4, (%rdi), %ymm0 {%k1} {z}
   2424 ; CHECK-NEXT:    retq
   2425   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   2426   %p = load <4 x double>, <4 x double>* %ptr
   2427   %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
   2428   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
   2429   ret <4 x double> %s
   2430 }
   2431 
   2432 define <8 x float> @rint_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) {
   2433 ; CHECK-LABEL: rint_v8f32_maskz_load:
   2434 ; CHECK:       ## %bb.0:
   2435 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
   2436 ; CHECK-NEXT:    vrndscaleps $4, (%rdi), %ymm0 {%k1} {z}
   2437 ; CHECK-NEXT:    retq
   2438   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   2439   %p = load <8 x float>, <8 x float>* %ptr
   2440   %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
   2441   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
   2442   ret <8 x float> %s
   2443 }
   2444 
   2445 define <8 x double> @rint_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) {
   2446 ; CHECK-LABEL: rint_v8f64_maskz_load:
   2447 ; CHECK:       ## %bb.0:
   2448 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
   2449 ; CHECK-NEXT:    vrndscalepd $4, (%rdi), %zmm0 {%k1} {z}
   2450 ; CHECK-NEXT:    retq
   2451   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   2452   %p = load <8 x double>, <8 x double>* %ptr
   2453   %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
   2454   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
   2455   ret <8 x double> %s
   2456 }
   2457 
   2458 define <16 x float> @rint_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) {
   2459 ; CHECK-LABEL: rint_v16f32_maskz_load:
   2460 ; CHECK:       ## %bb.0:
   2461 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
   2462 ; CHECK-NEXT:    vrndscaleps $4, (%rdi), %zmm0 {%k1} {z}
   2463 ; CHECK-NEXT:    retq
   2464   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   2465   %p = load <16 x float>, <16 x float>* %ptr
   2466   %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
   2467   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
   2468   ret <16 x float> %s
   2469 }
   2470 
   2471 define <2 x double> @rint_v2f64_broadcast(double* %ptr) {
   2472 ; CHECK-LABEL: rint_v2f64_broadcast:
   2473 ; CHECK:       ## %bb.0:
   2474 ; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to2}, %xmm0
   2475 ; CHECK-NEXT:    retq
   2476   %ps = load double, double* %ptr
   2477   %pins = insertelement <2 x double> undef, double %ps, i32 0
   2478   %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
   2479   %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
   2480   ret <2 x double> %t
   2481 }
   2482 
   2483 define <4 x float> @rint_v4f32_broadcast(float* %ptr) {
   2484 ; CHECK-LABEL: rint_v4f32_broadcast:
   2485 ; CHECK:       ## %bb.0:
   2486 ; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to4}, %xmm0
   2487 ; CHECK-NEXT:    retq
   2488   %ps = load float, float* %ptr
   2489   %pins = insertelement <4 x float> undef, float %ps, i32 0
   2490   %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
   2491   %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
   2492   ret <4 x float> %t
   2493 }
   2494 
   2495 define <4 x double> @rint_v4f64_broadcast(double* %ptr){
   2496 ; CHECK-LABEL: rint_v4f64_broadcast:
   2497 ; CHECK:       ## %bb.0:
   2498 ; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to4}, %ymm0
   2499 ; CHECK-NEXT:    retq
   2500   %ps = load double, double* %ptr
   2501   %pins = insertelement <4 x double> undef, double %ps, i32 0
   2502   %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
   2503   %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
   2504   ret <4 x double> %t
   2505 }
   2506 
   2507 define <8 x float> @rint_v8f32_broadcast(float* %ptr) {
   2508 ; CHECK-LABEL: rint_v8f32_broadcast:
   2509 ; CHECK:       ## %bb.0:
   2510 ; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to8}, %ymm0
   2511 ; CHECK-NEXT:    retq
   2512   %ps = load float, float* %ptr
   2513   %pins = insertelement <8 x float> undef, float %ps, i32 0
   2514   %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
   2515   %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
   2516   ret <8 x float> %t
   2517 }
   2518 
   2519 define <8 x double> @rint_v8f64_broadcast(double* %ptr){
   2520 ; CHECK-LABEL: rint_v8f64_broadcast:
   2521 ; CHECK:       ## %bb.0:
   2522 ; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to8}, %zmm0
   2523 ; CHECK-NEXT:    retq
   2524   %ps = load double, double* %ptr
   2525   %pins = insertelement <8 x double> undef, double %ps, i32 0
   2526   %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
   2527   %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
   2528   ret <8 x double> %t
   2529 }
   2530 
   2531 define <16 x float> @rint_v16f32_broadcast(float* %ptr) {
   2532 ; CHECK-LABEL: rint_v16f32_broadcast:
   2533 ; CHECK:       ## %bb.0:
   2534 ; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to16}, %zmm0
   2535 ; CHECK-NEXT:    retq
   2536   %ps = load float, float* %ptr
   2537   %pins = insertelement <16 x float> undef, float %ps, i32 0
   2538   %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
   2539   %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
   2540   ret <16 x float> %t
   2541 }
   2542 
   2543 define <2 x double> @rint_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
   2544 ; CHECK-LABEL: rint_v2f64_mask_broadcast:
   2545 ; CHECK:       ## %bb.0:
   2546 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
   2547 ; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to2}, %xmm0 {%k1}
   2548 ; CHECK-NEXT:    retq
   2549   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   2550   %ps = load double, double* %ptr
   2551   %pins = insertelement <2 x double> undef, double %ps, i32 0
   2552   %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
   2553   %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
   2554   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
   2555   ret <2 x double> %s
   2556 }
   2557 
   2558 define <4 x float> @rint_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
   2559 ; CHECK-LABEL: rint_v4f32_mask_broadcast:
   2560 ; CHECK:       ## %bb.0:
   2561 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
   2562 ; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to4}, %xmm0 {%k1}
   2563 ; CHECK-NEXT:    retq
   2564   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   2565   %ps = load float, float* %ptr
   2566   %pins = insertelement <4 x float> undef, float %ps, i32 0
   2567   %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
   2568   %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
   2569   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
   2570   ret <4 x float> %s
   2571 }
   2572 
   2573 define <4 x double> @rint_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
   2574 ; CHECK-LABEL: rint_v4f64_mask_broadcast:
   2575 ; CHECK:       ## %bb.0:
   2576 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   2577 ; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to4}, %ymm0 {%k1}
   2578 ; CHECK-NEXT:    retq
   2579   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   2580   %ps = load double, double* %ptr
   2581   %pins = insertelement <4 x double> undef, double %ps, i32 0
   2582   %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
   2583   %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
   2584   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
   2585   ret <4 x double> %s
   2586 }
   2587 
   2588 define <8 x float> @rint_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
   2589 ; CHECK-LABEL: rint_v8f32_mask_broadcast:
   2590 ; CHECK:       ## %bb.0:
   2591 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
   2592 ; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to8}, %ymm0 {%k1}
   2593 ; CHECK-NEXT:    retq
   2594   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   2595   %ps = load float, float* %ptr
   2596   %pins = insertelement <8 x float> undef, float %ps, i32 0
   2597   %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
   2598   %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
   2599   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
   2600   ret <8 x float> %s
   2601 }
   2602 
   2603 define <8 x double> @rint_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
   2604 ; CHECK-LABEL: rint_v8f64_mask_broadcast:
   2605 ; CHECK:       ## %bb.0:
   2606 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   2607 ; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to8}, %zmm0 {%k1}
   2608 ; CHECK-NEXT:    retq
   2609   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   2610   %ps = load double, double* %ptr
   2611   %pins = insertelement <8 x double> undef, double %ps, i32 0
   2612   %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
   2613   %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
   2614   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
   2615   ret <8 x double> %s
   2616 }
   2617 
   2618 define <16 x float> @rint_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
   2619 ; CHECK-LABEL: rint_v16f32_mask_broadcast:
   2620 ; CHECK:       ## %bb.0:
   2621 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
   2622 ; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to16}, %zmm0 {%k1}
   2623 ; CHECK-NEXT:    retq
   2624   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   2625   %ps = load float, float* %ptr
   2626   %pins = insertelement <16 x float> undef, float %ps, i32 0
   2627   %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
   2628   %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
   2629   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
   2630   ret <16 x float> %s
   2631 }
   2632 
   2633 define <2 x double> @rint_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) {
   2634 ; CHECK-LABEL: rint_v2f64_maskz_broadcast:
   2635 ; CHECK:       ## %bb.0:
   2636 ; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
   2637 ; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to2}, %xmm0 {%k1} {z}
   2638 ; CHECK-NEXT:    retq
   2639   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   2640   %ps = load double, double* %ptr
   2641   %pins = insertelement <2 x double> undef, double %ps, i32 0
   2642   %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
   2643   %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
   2644   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
   2645   ret <2 x double> %s
   2646 }
   2647 
   2648 define <4 x float> @rint_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) {
   2649 ; CHECK-LABEL: rint_v4f32_maskz_broadcast:
   2650 ; CHECK:       ## %bb.0:
   2651 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
   2652 ; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to4}, %xmm0 {%k1} {z}
   2653 ; CHECK-NEXT:    retq
   2654   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   2655   %ps = load float, float* %ptr
   2656   %pins = insertelement <4 x float> undef, float %ps, i32 0
   2657   %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
   2658   %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
   2659   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
   2660   ret <4 x float> %s
   2661 }
   2662 
   2663 define <4 x double> @rint_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) {
   2664 ; CHECK-LABEL: rint_v4f64_maskz_broadcast:
   2665 ; CHECK:       ## %bb.0:
   2666 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
   2667 ; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to4}, %ymm0 {%k1} {z}
   2668 ; CHECK-NEXT:    retq
   2669   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   2670   %ps = load double, double* %ptr
   2671   %pins = insertelement <4 x double> undef, double %ps, i32 0
   2672   %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
   2673   %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
   2674   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
   2675   ret <4 x double> %s
   2676 }
   2677 
   2678 define <8 x float> @rint_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) {
   2679 ; CHECK-LABEL: rint_v8f32_maskz_broadcast:
   2680 ; CHECK:       ## %bb.0:
   2681 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
   2682 ; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to8}, %ymm0 {%k1} {z}
   2683 ; CHECK-NEXT:    retq
   2684   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   2685   %ps = load float, float* %ptr
   2686   %pins = insertelement <8 x float> undef, float %ps, i32 0
   2687   %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
   2688   %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
   2689   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
   2690   ret <8 x float> %s
   2691 }
   2692 
   2693 define <8 x double> @rint_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) {
   2694 ; CHECK-LABEL: rint_v8f64_maskz_broadcast:
   2695 ; CHECK:       ## %bb.0:
   2696 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
   2697 ; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to8}, %zmm0 {%k1} {z}
   2698 ; CHECK-NEXT:    retq
   2699   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   2700   %ps = load double, double* %ptr
   2701   %pins = insertelement <8 x double> undef, double %ps, i32 0
   2702   %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
   2703   %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
   2704   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
   2705   ret <8 x double> %s
   2706 }
   2707 
   2708 define <16 x float> @rint_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) {
   2709 ; CHECK-LABEL: rint_v16f32_maskz_broadcast:
   2710 ; CHECK:       ## %bb.0:
   2711 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
   2712 ; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to16}, %zmm0 {%k1} {z}
   2713 ; CHECK-NEXT:    retq
   2714   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   2715   %ps = load float, float* %ptr
   2716   %pins = insertelement <16 x float> undef, float %ps, i32 0
   2717   %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
   2718   %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
   2719   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
   2720   ret <16 x float> %s
   2721 }
   2722 
   2723 define <2 x double> @nearbyint_v2f64(<2 x double> %p) {
   2724 ; CHECK-LABEL: nearbyint_v2f64:
   2725 ; CHECK:       ## %bb.0:
   2726 ; CHECK-NEXT:    vroundpd $12, %xmm0, %xmm0
   2727 ; CHECK-NEXT:    retq
   2728   %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
   2729   ret <2 x double> %t
   2730 }
   2731 
   2732 define <4 x float> @nearbyint_v4f32(<4 x float> %p) {
   2733 ; CHECK-LABEL: nearbyint_v4f32:
   2734 ; CHECK:       ## %bb.0:
   2735 ; CHECK-NEXT:    vroundps $12, %xmm0, %xmm0
   2736 ; CHECK-NEXT:    retq
   2737   %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
   2738   ret <4 x float> %t
   2739 }
   2740 
   2741 define <4 x double> @nearbyint_v4f64(<4 x double> %p){
   2742 ; CHECK-LABEL: nearbyint_v4f64:
   2743 ; CHECK:       ## %bb.0:
   2744 ; CHECK-NEXT:    vroundpd $12, %ymm0, %ymm0
   2745 ; CHECK-NEXT:    retq
   2746   %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
   2747   ret <4 x double> %t
   2748 }
   2749 
   2750 define <8 x float> @nearbyint_v8f32(<8 x float> %p) {
   2751 ; CHECK-LABEL: nearbyint_v8f32:
   2752 ; CHECK:       ## %bb.0:
   2753 ; CHECK-NEXT:    vroundps $12, %ymm0, %ymm0
   2754 ; CHECK-NEXT:    retq
   2755   %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
   2756   ret <8 x float> %t
   2757 }
   2758 
   2759 define <8 x double> @nearbyint_v8f64(<8 x double> %p){
   2760 ; CHECK-LABEL: nearbyint_v8f64:
   2761 ; CHECK:       ## %bb.0:
   2762 ; CHECK-NEXT:    vrndscalepd $12, %zmm0, %zmm0
   2763 ; CHECK-NEXT:    retq
   2764   %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
   2765   ret <8 x double> %t
   2766 }
   2767 
   2768 define <16 x float> @nearbyint_v16f32(<16 x float> %p) {
   2769 ; CHECK-LABEL: nearbyint_v16f32:
   2770 ; CHECK:       ## %bb.0:
   2771 ; CHECK-NEXT:    vrndscaleps $12, %zmm0, %zmm0
   2772 ; CHECK-NEXT:    retq
   2773   %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
   2774   ret <16 x float> %t
   2775 }
   2776 
   2777 define <2 x double> @nearbyint_v2f64_load(<2 x double>* %ptr) {
   2778 ; CHECK-LABEL: nearbyint_v2f64_load:
   2779 ; CHECK:       ## %bb.0:
   2780 ; CHECK-NEXT:    vroundpd $12, (%rdi), %xmm0
   2781 ; CHECK-NEXT:    retq
   2782   %p = load <2 x double>, <2 x double>* %ptr
   2783   %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
   2784   ret <2 x double> %t
   2785 }
   2786 
   2787 define <4 x float> @nearbyint_v4f32_load(<4 x float>* %ptr) {
   2788 ; CHECK-LABEL: nearbyint_v4f32_load:
   2789 ; CHECK:       ## %bb.0:
   2790 ; CHECK-NEXT:    vroundps $12, (%rdi), %xmm0
   2791 ; CHECK-NEXT:    retq
   2792   %p = load <4 x float>, <4 x float>* %ptr
   2793   %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
   2794   ret <4 x float> %t
   2795 }
   2796 
   2797 define <4 x double> @nearbyint_v4f64_load(<4 x double>* %ptr){
   2798 ; CHECK-LABEL: nearbyint_v4f64_load:
   2799 ; CHECK:       ## %bb.0:
   2800 ; CHECK-NEXT:    vroundpd $12, (%rdi), %ymm0
   2801 ; CHECK-NEXT:    retq
   2802   %p = load <4 x double>, <4 x double>* %ptr
   2803   %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
   2804   ret <4 x double> %t
   2805 }
   2806 
   2807 define <8 x float> @nearbyint_v8f32_load(<8 x float>* %ptr) {
   2808 ; CHECK-LABEL: nearbyint_v8f32_load:
   2809 ; CHECK:       ## %bb.0:
   2810 ; CHECK-NEXT:    vroundps $12, (%rdi), %ymm0
   2811 ; CHECK-NEXT:    retq
   2812   %p = load <8 x float>, <8 x float>* %ptr
   2813   %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
   2814   ret <8 x float> %t
   2815 }
   2816 
   2817 define <8 x double> @nearbyint_v8f64_load(<8 x double>* %ptr){
   2818 ; CHECK-LABEL: nearbyint_v8f64_load:
   2819 ; CHECK:       ## %bb.0:
   2820 ; CHECK-NEXT:    vrndscalepd $12, (%rdi), %zmm0
   2821 ; CHECK-NEXT:    retq
   2822   %p = load <8 x double>, <8 x double>* %ptr
   2823   %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
   2824   ret <8 x double> %t
   2825 }
   2826 
   2827 define <16 x float> @nearbyint_v16f32_load(<16 x float>* %ptr) {
   2828 ; CHECK-LABEL: nearbyint_v16f32_load:
   2829 ; CHECK:       ## %bb.0:
   2830 ; CHECK-NEXT:    vrndscaleps $12, (%rdi), %zmm0
   2831 ; CHECK-NEXT:    retq
   2832   %p = load <16 x float>, <16 x float>* %ptr
   2833   %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
   2834   ret <16 x float> %t
   2835 }
   2836 
   2837 define <2 x double> @nearbyint_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) {
   2838 ; CHECK-LABEL: nearbyint_v2f64_mask:
   2839 ; CHECK:       ## %bb.0:
   2840 ; CHECK-NEXT:    vptestnmq %xmm2, %xmm2, %k1
   2841 ; CHECK-NEXT:    vrndscalepd $12, %xmm0, %xmm1 {%k1}
   2842 ; CHECK-NEXT:    vmovapd %xmm1, %xmm0
   2843 ; CHECK-NEXT:    retq
   2844   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   2845   %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
   2846   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
   2847   ret <2 x double> %s
   2848 }
   2849 
   2850 define <4 x float> @nearbyint_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) {
   2851 ; CHECK-LABEL: nearbyint_v4f32_mask:
   2852 ; CHECK:       ## %bb.0:
   2853 ; CHECK-NEXT:    vptestnmd %xmm2, %xmm2, %k1
   2854 ; CHECK-NEXT:    vrndscaleps $12, %xmm0, %xmm1 {%k1}
   2855 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
   2856 ; CHECK-NEXT:    retq
   2857   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   2858   %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
   2859   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
   2860   ret <4 x float> %s
   2861 }
   2862 
   2863 define <4 x double> @nearbyint_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) {
   2864 ; CHECK-LABEL: nearbyint_v4f64_mask:
   2865 ; CHECK:       ## %bb.0:
   2866 ; CHECK-NEXT:    vptestnmq %ymm2, %ymm2, %k1
   2867 ; CHECK-NEXT:    vrndscalepd $12, %ymm0, %ymm1 {%k1}
   2868 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
   2869 ; CHECK-NEXT:    retq
   2870   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   2871   %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
   2872   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
   2873   ret <4 x double> %s
   2874 }
   2875 
   2876 define <8 x float> @nearbyint_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) {
   2877 ; CHECK-LABEL: nearbyint_v8f32_mask:
   2878 ; CHECK:       ## %bb.0:
   2879 ; CHECK-NEXT:    vptestnmd %ymm2, %ymm2, %k1
   2880 ; CHECK-NEXT:    vrndscaleps $12, %ymm0, %ymm1 {%k1}
   2881 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
   2882 ; CHECK-NEXT:    retq
   2883   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   2884   %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
   2885   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
   2886   ret <8 x float> %s
   2887 }
   2888 
   2889 define <8 x double> @nearbyint_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) {
   2890 ; CHECK-LABEL: nearbyint_v8f64_mask:
   2891 ; CHECK:       ## %bb.0:
   2892 ; CHECK-NEXT:    vptestnmq %zmm2, %zmm2, %k1
   2893 ; CHECK-NEXT:    vrndscalepd $12, %zmm0, %zmm1 {%k1}
   2894 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
   2895 ; CHECK-NEXT:    retq
   2896   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   2897   %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
   2898   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
   2899   ret <8 x double> %s
   2900 }
   2901 
   2902 define <16 x float> @nearbyint_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) {
   2903 ; CHECK-LABEL: nearbyint_v16f32_mask:
   2904 ; CHECK:       ## %bb.0:
   2905 ; CHECK-NEXT:    vptestnmd %zmm2, %zmm2, %k1
   2906 ; CHECK-NEXT:    vrndscaleps $12, %zmm0, %zmm1 {%k1}
   2907 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
   2908 ; CHECK-NEXT:    retq
   2909   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   2910   %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
   2911   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
   2912   ret <16 x float> %s
   2913 }
   2914 
   2915 define <2 x double> @nearbyint_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) {
   2916 ; CHECK-LABEL: nearbyint_v2f64_maskz:
   2917 ; CHECK:       ## %bb.0:
   2918 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
   2919 ; CHECK-NEXT:    vrndscalepd $12, %xmm0, %xmm0 {%k1} {z}
   2920 ; CHECK-NEXT:    retq
   2921   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   2922   %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
   2923   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
   2924   ret <2 x double> %s
   2925 }
   2926 
   2927 define <4 x float> @nearbyint_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) {
   2928 ; CHECK-LABEL: nearbyint_v4f32_maskz:
   2929 ; CHECK:       ## %bb.0:
   2930 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
   2931 ; CHECK-NEXT:    vrndscaleps $12, %xmm0, %xmm0 {%k1} {z}
   2932 ; CHECK-NEXT:    retq
   2933   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   2934   %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
   2935   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
   2936   ret <4 x float> %s
   2937 }
   2938 
   2939 define <4 x double> @nearbyint_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) {
   2940 ; CHECK-LABEL: nearbyint_v4f64_maskz:
   2941 ; CHECK:       ## %bb.0:
   2942 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   2943 ; CHECK-NEXT:    vrndscalepd $12, %ymm0, %ymm0 {%k1} {z}
   2944 ; CHECK-NEXT:    retq
   2945   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   2946   %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
   2947   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
   2948   ret <4 x double> %s
   2949 }
   2950 
   2951 define <8 x float> @nearbyint_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) {
   2952 ; CHECK-LABEL: nearbyint_v8f32_maskz:
   2953 ; CHECK:       ## %bb.0:
   2954 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
   2955 ; CHECK-NEXT:    vrndscaleps $12, %ymm0, %ymm0 {%k1} {z}
   2956 ; CHECK-NEXT:    retq
   2957   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   2958   %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
   2959   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
   2960   ret <8 x float> %s
   2961 }
   2962 
   2963 define <8 x double> @nearbyint_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) {
   2964 ; CHECK-LABEL: nearbyint_v8f64_maskz:
   2965 ; CHECK:       ## %bb.0:
   2966 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   2967 ; CHECK-NEXT:    vrndscalepd $12, %zmm0, %zmm0 {%k1} {z}
   2968 ; CHECK-NEXT:    retq
   2969   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   2970   %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
   2971   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
   2972   ret <8 x double> %s
   2973 }
   2974 
   2975 define <16 x float> @nearbyint_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) {
   2976 ; CHECK-LABEL: nearbyint_v16f32_maskz:
   2977 ; CHECK:       ## %bb.0:
   2978 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
   2979 ; CHECK-NEXT:    vrndscaleps $12, %zmm0, %zmm0 {%k1} {z}
   2980 ; CHECK-NEXT:    retq
   2981   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   2982   %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
   2983   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
   2984   ret <16 x float> %s
   2985 }
   2986 
   2987 define <2 x double> @nearbyint_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
   2988 ; CHECK-LABEL: nearbyint_v2f64_mask_load:
   2989 ; CHECK:       ## %bb.0:
   2990 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
   2991 ; CHECK-NEXT:    vrndscalepd $12, (%rdi), %xmm0 {%k1}
   2992 ; CHECK-NEXT:    retq
   2993   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   2994   %p = load <2 x double>, <2 x double>* %ptr
   2995   %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
   2996   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
   2997   ret <2 x double> %s
   2998 }
   2999 
   3000 define <4 x float> @nearbyint_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
   3001 ; CHECK-LABEL: nearbyint_v4f32_mask_load:
   3002 ; CHECK:       ## %bb.0:
   3003 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
   3004 ; CHECK-NEXT:    vrndscaleps $12, (%rdi), %xmm0 {%k1}
   3005 ; CHECK-NEXT:    retq
   3006   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   3007   %p = load <4 x float>, <4 x float>* %ptr
   3008   %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
   3009   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
   3010   ret <4 x float> %s
   3011 }
   3012 
   3013 define <4 x double> @nearbyint_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
   3014 ; CHECK-LABEL: nearbyint_v4f64_mask_load:
   3015 ; CHECK:       ## %bb.0:
   3016 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   3017 ; CHECK-NEXT:    vrndscalepd $12, (%rdi), %ymm0 {%k1}
   3018 ; CHECK-NEXT:    retq
   3019   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   3020   %p = load <4 x double>, <4 x double>* %ptr
   3021   %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
   3022   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
   3023   ret <4 x double> %s
   3024 }
   3025 
   3026 define <8 x float> @nearbyint_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
   3027 ; CHECK-LABEL: nearbyint_v8f32_mask_load:
   3028 ; CHECK:       ## %bb.0:
   3029 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
   3030 ; CHECK-NEXT:    vrndscaleps $12, (%rdi), %ymm0 {%k1}
   3031 ; CHECK-NEXT:    retq
   3032   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   3033   %p = load <8 x float>, <8 x float>* %ptr
   3034   %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
   3035   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
   3036   ret <8 x float> %s
   3037 }
   3038 
   3039 define <8 x double> @nearbyint_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
   3040 ; CHECK-LABEL: nearbyint_v8f64_mask_load:
   3041 ; CHECK:       ## %bb.0:
   3042 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   3043 ; CHECK-NEXT:    vrndscalepd $12, (%rdi), %zmm0 {%k1}
   3044 ; CHECK-NEXT:    retq
   3045   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   3046   %p = load <8 x double>, <8 x double>* %ptr
   3047   %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
   3048   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
   3049   ret <8 x double> %s
   3050 }
   3051 
   3052 define <16 x float> @nearbyint_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
   3053 ; CHECK-LABEL: nearbyint_v16f32_mask_load:
   3054 ; CHECK:       ## %bb.0:
   3055 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
   3056 ; CHECK-NEXT:    vrndscaleps $12, (%rdi), %zmm0 {%k1}
   3057 ; CHECK-NEXT:    retq
   3058   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   3059   %p = load <16 x float>, <16 x float>* %ptr
   3060   %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
   3061   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
   3062   ret <16 x float> %s
   3063 }
   3064 
   3065 define <2 x double> @nearbyint_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) {
   3066 ; CHECK-LABEL: nearbyint_v2f64_maskz_load:
   3067 ; CHECK:       ## %bb.0:
   3068 ; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
   3069 ; CHECK-NEXT:    vrndscalepd $12, (%rdi), %xmm0 {%k1} {z}
   3070 ; CHECK-NEXT:    retq
   3071   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   3072   %p = load <2 x double>, <2 x double>* %ptr
   3073   %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
   3074   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
   3075   ret <2 x double> %s
   3076 }
   3077 
   3078 define <4 x float> @nearbyint_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) {
   3079 ; CHECK-LABEL: nearbyint_v4f32_maskz_load:
   3080 ; CHECK:       ## %bb.0:
   3081 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
   3082 ; CHECK-NEXT:    vrndscaleps $12, (%rdi), %xmm0 {%k1} {z}
   3083 ; CHECK-NEXT:    retq
   3084   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   3085   %p = load <4 x float>, <4 x float>* %ptr
   3086   %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
   3087   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
   3088   ret <4 x float> %s
   3089 }
   3090 
   3091 define <4 x double> @nearbyint_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) {
   3092 ; CHECK-LABEL: nearbyint_v4f64_maskz_load:
   3093 ; CHECK:       ## %bb.0:
   3094 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
   3095 ; CHECK-NEXT:    vrndscalepd $12, (%rdi), %ymm0 {%k1} {z}
   3096 ; CHECK-NEXT:    retq
   3097   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   3098   %p = load <4 x double>, <4 x double>* %ptr
   3099   %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
   3100   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
   3101   ret <4 x double> %s
   3102 }
   3103 
   3104 define <8 x float> @nearbyint_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) {
   3105 ; CHECK-LABEL: nearbyint_v8f32_maskz_load:
   3106 ; CHECK:       ## %bb.0:
   3107 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
   3108 ; CHECK-NEXT:    vrndscaleps $12, (%rdi), %ymm0 {%k1} {z}
   3109 ; CHECK-NEXT:    retq
   3110   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   3111   %p = load <8 x float>, <8 x float>* %ptr
   3112   %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
   3113   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
   3114   ret <8 x float> %s
   3115 }
   3116 
   3117 define <8 x double> @nearbyint_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) {
   3118 ; CHECK-LABEL: nearbyint_v8f64_maskz_load:
   3119 ; CHECK:       ## %bb.0:
   3120 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
   3121 ; CHECK-NEXT:    vrndscalepd $12, (%rdi), %zmm0 {%k1} {z}
   3122 ; CHECK-NEXT:    retq
   3123   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   3124   %p = load <8 x double>, <8 x double>* %ptr
   3125   %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
   3126   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
   3127   ret <8 x double> %s
   3128 }
   3129 
   3130 define <16 x float> @nearbyint_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) {
   3131 ; CHECK-LABEL: nearbyint_v16f32_maskz_load:
   3132 ; CHECK:       ## %bb.0:
   3133 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
   3134 ; CHECK-NEXT:    vrndscaleps $12, (%rdi), %zmm0 {%k1} {z}
   3135 ; CHECK-NEXT:    retq
   3136   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   3137   %p = load <16 x float>, <16 x float>* %ptr
   3138   %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
   3139   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
   3140   ret <16 x float> %s
   3141 }
   3142 
   3143 define <2 x double> @nearbyint_v2f64_broadcast(double* %ptr) {
   3144 ; CHECK-LABEL: nearbyint_v2f64_broadcast:
   3145 ; CHECK:       ## %bb.0:
   3146 ; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to2}, %xmm0
   3147 ; CHECK-NEXT:    retq
   3148   %ps = load double, double* %ptr
   3149   %pins = insertelement <2 x double> undef, double %ps, i32 0
   3150   %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
   3151   %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
   3152   ret <2 x double> %t
   3153 }
   3154 
   3155 define <4 x float> @nearbyint_v4f32_broadcast(float* %ptr) {
   3156 ; CHECK-LABEL: nearbyint_v4f32_broadcast:
   3157 ; CHECK:       ## %bb.0:
   3158 ; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to4}, %xmm0
   3159 ; CHECK-NEXT:    retq
   3160   %ps = load float, float* %ptr
   3161   %pins = insertelement <4 x float> undef, float %ps, i32 0
   3162   %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
   3163   %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
   3164   ret <4 x float> %t
   3165 }
   3166 
   3167 define <4 x double> @nearbyint_v4f64_broadcast(double* %ptr){
   3168 ; CHECK-LABEL: nearbyint_v4f64_broadcast:
   3169 ; CHECK:       ## %bb.0:
   3170 ; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to4}, %ymm0
   3171 ; CHECK-NEXT:    retq
   3172   %ps = load double, double* %ptr
   3173   %pins = insertelement <4 x double> undef, double %ps, i32 0
   3174   %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
   3175   %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
   3176   ret <4 x double> %t
   3177 }
   3178 
   3179 define <8 x float> @nearbyint_v8f32_broadcast(float* %ptr) {
   3180 ; CHECK-LABEL: nearbyint_v8f32_broadcast:
   3181 ; CHECK:       ## %bb.0:
   3182 ; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to8}, %ymm0
   3183 ; CHECK-NEXT:    retq
   3184   %ps = load float, float* %ptr
   3185   %pins = insertelement <8 x float> undef, float %ps, i32 0
   3186   %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
   3187   %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
   3188   ret <8 x float> %t
   3189 }
   3190 
   3191 define <8 x double> @nearbyint_v8f64_broadcast(double* %ptr){
   3192 ; CHECK-LABEL: nearbyint_v8f64_broadcast:
   3193 ; CHECK:       ## %bb.0:
   3194 ; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to8}, %zmm0
   3195 ; CHECK-NEXT:    retq
   3196   %ps = load double, double* %ptr
   3197   %pins = insertelement <8 x double> undef, double %ps, i32 0
   3198   %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
   3199   %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
   3200   ret <8 x double> %t
   3201 }
   3202 
   3203 define <16 x float> @nearbyint_v16f32_broadcast(float* %ptr) {
   3204 ; CHECK-LABEL: nearbyint_v16f32_broadcast:
   3205 ; CHECK:       ## %bb.0:
   3206 ; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to16}, %zmm0
   3207 ; CHECK-NEXT:    retq
   3208   %ps = load float, float* %ptr
   3209   %pins = insertelement <16 x float> undef, float %ps, i32 0
   3210   %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
   3211   %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
   3212   ret <16 x float> %t
   3213 }
   3214 
   3215 define <2 x double> @nearbyint_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
   3216 ; CHECK-LABEL: nearbyint_v2f64_mask_broadcast:
   3217 ; CHECK:       ## %bb.0:
   3218 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
   3219 ; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to2}, %xmm0 {%k1}
   3220 ; CHECK-NEXT:    retq
   3221   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   3222   %ps = load double, double* %ptr
   3223   %pins = insertelement <2 x double> undef, double %ps, i32 0
   3224   %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
   3225   %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
   3226   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
   3227   ret <2 x double> %s
   3228 }
   3229 
   3230 define <4 x float> @nearbyint_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
   3231 ; CHECK-LABEL: nearbyint_v4f32_mask_broadcast:
   3232 ; CHECK:       ## %bb.0:
   3233 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
   3234 ; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to4}, %xmm0 {%k1}
   3235 ; CHECK-NEXT:    retq
   3236   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   3237   %ps = load float, float* %ptr
   3238   %pins = insertelement <4 x float> undef, float %ps, i32 0
   3239   %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
   3240   %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
   3241   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
   3242   ret <4 x float> %s
   3243 }
   3244 
   3245 define <4 x double> @nearbyint_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
   3246 ; CHECK-LABEL: nearbyint_v4f64_mask_broadcast:
   3247 ; CHECK:       ## %bb.0:
   3248 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   3249 ; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to4}, %ymm0 {%k1}
   3250 ; CHECK-NEXT:    retq
   3251   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   3252   %ps = load double, double* %ptr
   3253   %pins = insertelement <4 x double> undef, double %ps, i32 0
   3254   %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
   3255   %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
   3256   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
   3257   ret <4 x double> %s
   3258 }
   3259 
   3260 define <8 x float> @nearbyint_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
   3261 ; CHECK-LABEL: nearbyint_v8f32_mask_broadcast:
   3262 ; CHECK:       ## %bb.0:
   3263 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
   3264 ; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to8}, %ymm0 {%k1}
   3265 ; CHECK-NEXT:    retq
   3266   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   3267   %ps = load float, float* %ptr
   3268   %pins = insertelement <8 x float> undef, float %ps, i32 0
   3269   %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
   3270   %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
   3271   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
   3272   ret <8 x float> %s
   3273 }
   3274 
   3275 define <8 x double> @nearbyint_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
   3276 ; CHECK-LABEL: nearbyint_v8f64_mask_broadcast:
   3277 ; CHECK:       ## %bb.0:
   3278 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   3279 ; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to8}, %zmm0 {%k1}
   3280 ; CHECK-NEXT:    retq
   3281   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   3282   %ps = load double, double* %ptr
   3283   %pins = insertelement <8 x double> undef, double %ps, i32 0
   3284   %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
   3285   %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
   3286   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
   3287   ret <8 x double> %s
   3288 }
   3289 
   3290 define <16 x float> @nearbyint_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
   3291 ; CHECK-LABEL: nearbyint_v16f32_mask_broadcast:
   3292 ; CHECK:       ## %bb.0:
   3293 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
   3294 ; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to16}, %zmm0 {%k1}
   3295 ; CHECK-NEXT:    retq
   3296   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   3297   %ps = load float, float* %ptr
   3298   %pins = insertelement <16 x float> undef, float %ps, i32 0
   3299   %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
   3300   %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
   3301   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
   3302   ret <16 x float> %s
   3303 }
   3304 
   3305 define <2 x double> @nearbyint_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) {
   3306 ; CHECK-LABEL: nearbyint_v2f64_maskz_broadcast:
   3307 ; CHECK:       ## %bb.0:
   3308 ; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
   3309 ; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to2}, %xmm0 {%k1} {z}
   3310 ; CHECK-NEXT:    retq
   3311   %c = icmp eq <2 x i64> %cmp, zeroinitializer
   3312   %ps = load double, double* %ptr
   3313   %pins = insertelement <2 x double> undef, double %ps, i32 0
   3314   %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
   3315   %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
   3316   %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
   3317   ret <2 x double> %s
   3318 }
   3319 
   3320 define <4 x float> @nearbyint_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) {
   3321 ; CHECK-LABEL: nearbyint_v4f32_maskz_broadcast:
   3322 ; CHECK:       ## %bb.0:
   3323 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
   3324 ; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to4}, %xmm0 {%k1} {z}
   3325 ; CHECK-NEXT:    retq
   3326   %c = icmp eq <4 x i32> %cmp, zeroinitializer
   3327   %ps = load float, float* %ptr
   3328   %pins = insertelement <4 x float> undef, float %ps, i32 0
   3329   %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
   3330   %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
   3331   %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
   3332   ret <4 x float> %s
   3333 }
   3334 
   3335 define <4 x double> @nearbyint_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) {
   3336 ; CHECK-LABEL: nearbyint_v4f64_maskz_broadcast:
   3337 ; CHECK:       ## %bb.0:
   3338 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
   3339 ; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to4}, %ymm0 {%k1} {z}
   3340 ; CHECK-NEXT:    retq
   3341   %c = icmp eq <4 x i64> %cmp, zeroinitializer
   3342   %ps = load double, double* %ptr
   3343   %pins = insertelement <4 x double> undef, double %ps, i32 0
   3344   %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
   3345   %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
   3346   %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
   3347   ret <4 x double> %s
   3348 }
   3349 
   3350 define <8 x float> @nearbyint_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) {
   3351 ; CHECK-LABEL: nearbyint_v8f32_maskz_broadcast:
   3352 ; CHECK:       ## %bb.0:
   3353 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
   3354 ; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to8}, %ymm0 {%k1} {z}
   3355 ; CHECK-NEXT:    retq
   3356   %c = icmp eq <8 x i32> %cmp, zeroinitializer
   3357   %ps = load float, float* %ptr
   3358   %pins = insertelement <8 x float> undef, float %ps, i32 0
   3359   %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
   3360   %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
   3361   %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
   3362   ret <8 x float> %s
   3363 }
   3364 
   3365 define <8 x double> @nearbyint_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) {
   3366 ; CHECK-LABEL: nearbyint_v8f64_maskz_broadcast:
   3367 ; CHECK:       ## %bb.0:
   3368 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
   3369 ; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to8}, %zmm0 {%k1} {z}
   3370 ; CHECK-NEXT:    retq
   3371   %c = icmp eq <8 x i64> %cmp, zeroinitializer
   3372   %ps = load double, double* %ptr
   3373   %pins = insertelement <8 x double> undef, double %ps, i32 0
   3374   %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
   3375   %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
   3376   %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
   3377   ret <8 x double> %s
   3378 }
   3379 
   3380 define <16 x float> @nearbyint_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) {
   3381 ; CHECK-LABEL: nearbyint_v16f32_maskz_broadcast:
   3382 ; CHECK:       ## %bb.0:
   3383 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
   3384 ; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to16}, %zmm0 {%k1} {z}
   3385 ; CHECK-NEXT:    retq
   3386   %c = icmp eq <16 x i32> %cmp, zeroinitializer
   3387   %ps = load float, float* %ptr
   3388   %pins = insertelement <16 x float> undef, float %ps, i32 0
   3389   %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
   3390   %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
   3391   %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
   3392   ret <16 x float> %s
   3393 }
   3394