Home | History | Annotate | Download | only in SystemZ
      1 ; Test vector intrinsics added with z14.
      2 ;
      3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
      4 
      5 declare <2 x i64> @llvm.s390.vbperm(<16 x i8>, <16 x i8>)
      6 declare <16 x i8> @llvm.s390.vmslg(<2 x i64>, <2 x i64>, <16 x i8>, i32)
      7 declare <16 x i8> @llvm.s390.vlrl(i32, i8 *)
      8 declare void @llvm.s390.vstrl(<16 x i8>, i32, i8 *)
      9 
     10 declare {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float>, <4 x float>)
     11 declare {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float>, <4 x float>)
     12 declare {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float>, <4 x float>)
     13 declare {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float>, i32)
     14 declare <4 x float> @llvm.s390.vfisb(<4 x float>, i32, i32)
     15 
     16 declare <2 x double> @llvm.s390.vfmaxdb(<2 x double>, <2 x double>, i32)
     17 declare <2 x double> @llvm.s390.vfmindb(<2 x double>, <2 x double>, i32)
     18 declare <4 x float> @llvm.s390.vfmaxsb(<4 x float>, <4 x float>, i32)
     19 declare <4 x float> @llvm.s390.vfminsb(<4 x float>, <4 x float>, i32)
     20 
     21 ; VBPERM.
     22 define <2 x i64> @test_vbperm(<16 x i8> %a, <16 x i8> %b) {
     23 ; CHECK-LABEL: test_vbperm:
     24 ; CHECK: vbperm %v24, %v24, %v26
     25 ; CHECK: br %r14
     26   %res = call <2 x i64> @llvm.s390.vbperm(<16 x i8> %a, <16 x i8> %b)
     27   ret <2 x i64> %res
     28 }
     29 
     30 ; VMSLG with no shifts.
     31 define <16 x i8> @test_vmslg1(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) {
     32 ; CHECK-LABEL: test_vmslg1:
     33 ; CHECK: vmslg %v24, %v24, %v26, %v28, 0
     34 ; CHECK: br %r14
     35   %res = call <16 x i8> @llvm.s390.vmslg(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c, i32 0)
     36   ret <16 x i8> %res
     37 }
     38 
     39 ; VMSLG with both shifts.
     40 define <16 x i8> @test_vmslg2(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) {
     41 ; CHECK-LABEL: test_vmslg2:
     42 ; CHECK: vmslg %v24, %v24, %v26, %v28, 12
     43 ; CHECK: br %r14
     44   %res = call <16 x i8> @llvm.s390.vmslg(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c, i32 12)
     45   ret <16 x i8> %res
     46 }
     47 
     48 ; VLRLR with the lowest in-range displacement.
     49 define <16 x i8> @test_vlrlr1(i8 *%ptr, i32 %length) {
     50 ; CHECK-LABEL: test_vlrlr1:
     51 ; CHECK: vlrlr %v24, %r3, 0(%r2)
     52 ; CHECK: br %r14
     53   %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
     54   ret <16 x i8> %res
     55 }
     56 
     57 ; VLRLR with the highest in-range displacement.
     58 define <16 x i8> @test_vlrlr2(i8 *%base, i32 %length) {
     59 ; CHECK-LABEL: test_vlrlr2:
     60 ; CHECK: vlrlr %v24, %r3, 4095(%r2)
     61 ; CHECK: br %r14
     62   %ptr = getelementptr i8, i8 *%base, i64 4095
     63   %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
     64   ret <16 x i8> %res
     65 }
     66 
     67 ; VLRLR with an out-of-range displacement.
     68 define <16 x i8> @test_vlrlr3(i8 *%base, i32 %length) {
     69 ; CHECK-LABEL: test_vlrlr3:
     70 ; CHECK: vlrlr %v24, %r3, 0({{%r[1-5]}})
     71 ; CHECK: br %r14
     72   %ptr = getelementptr i8, i8 *%base, i64 4096
     73   %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
     74   ret <16 x i8> %res
     75 }
     76 
     77 ; Check that VLRLR doesn't allow an index.
     78 define <16 x i8> @test_vlrlr4(i8 *%base, i64 %index, i32 %length) {
     79 ; CHECK-LABEL: test_vlrlr4:
     80 ; CHECK: vlrlr %v24, %r4, 0({{%r[1-5]}})
     81 ; CHECK: br %r14
     82   %ptr = getelementptr i8, i8 *%base, i64 %index
     83   %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
     84   ret <16 x i8> %res
     85 }
     86 
     87 ; VLRL with the lowest in-range displacement.
     88 define <16 x i8> @test_vlrl1(i8 *%ptr) {
     89 ; CHECK-LABEL: test_vlrl1:
     90 ; CHECK: vlrl %v24, 0(%r2), 0
     91 ; CHECK: br %r14
     92   %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
     93   ret <16 x i8> %res
     94 }
     95 
     96 ; VLRL with the highest in-range displacement.
     97 define <16 x i8> @test_vlrl2(i8 *%base) {
     98 ; CHECK-LABEL: test_vlrl2:
     99 ; CHECK: vlrl %v24, 4095(%r2), 0
    100 ; CHECK: br %r14
    101   %ptr = getelementptr i8, i8 *%base, i64 4095
    102   %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
    103   ret <16 x i8> %res
    104 }
    105 
    106 ; VLRL with an out-of-range displacement.
    107 define <16 x i8> @test_vlrl3(i8 *%base) {
    108 ; CHECK-LABEL: test_vlrl3:
    109 ; CHECK: vlrl %v24, 0({{%r[1-5]}}), 0
    110 ; CHECK: br %r14
    111   %ptr = getelementptr i8, i8 *%base, i64 4096
    112   %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
    113   ret <16 x i8> %res
    114 }
    115 
    116 ; Check that VLRL doesn't allow an index.
    117 define <16 x i8> @test_vlrl4(i8 *%base, i64 %index) {
    118 ; CHECK-LABEL: test_vlrl4:
    119 ; CHECK: vlrl %v24, 0({{%r[1-5]}}), 0
    120 ; CHECK: br %r14
    121   %ptr = getelementptr i8, i8 *%base, i64 %index
    122   %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
    123   ret <16 x i8> %res
    124 }
    125 
    126 ; VSTRLR with the lowest in-range displacement.
    127 define void @test_vstrlr1(<16 x i8> %vec, i8 *%ptr, i32 %length) {
    128 ; CHECK-LABEL: test_vstrlr1:
    129 ; CHECK: vstrlr %v24, %r3, 0(%r2)
    130 ; CHECK: br %r14
    131   call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
    132   ret void
    133 }
    134 
    135 ; VSTRLR with the highest in-range displacement.
    136 define void @test_vstrlr2(<16 x i8> %vec, i8 *%base, i32 %length) {
    137 ; CHECK-LABEL: test_vstrlr2:
    138 ; CHECK: vstrlr %v24, %r3, 4095(%r2)
    139 ; CHECK: br %r14
    140   %ptr = getelementptr i8, i8 *%base, i64 4095
    141   call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
    142   ret void
    143 }
    144 
    145 ; VSTRLR with an out-of-range displacement.
    146 define void @test_vstrlr3(<16 x i8> %vec, i8 *%base, i32 %length) {
    147 ; CHECK-LABEL: test_vstrlr3:
    148 ; CHECK: vstrlr %v24, %r3, 0({{%r[1-5]}})
    149 ; CHECK: br %r14
    150   %ptr = getelementptr i8, i8 *%base, i64 4096
    151   call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
    152   ret void
    153 }
    154 
    155 ; Check that VSTRLR doesn't allow an index.
    156 define void @test_vstrlr4(<16 x i8> %vec, i8 *%base, i64 %index, i32 %length) {
    157 ; CHECK-LABEL: test_vstrlr4:
    158 ; CHECK: vstrlr %v24, %r4, 0({{%r[1-5]}})
    159 ; CHECK: br %r14
    160   %ptr = getelementptr i8, i8 *%base, i64 %index
    161   call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
    162   ret void
    163 }
    164 
    165 ; VSTRL with the lowest in-range displacement.
    166 define void @test_vstrl1(<16 x i8> %vec, i8 *%ptr) {
    167 ; CHECK-LABEL: test_vstrl1:
    168 ; CHECK: vstrl %v24, 0(%r2), 8
    169 ; CHECK: br %r14
    170   call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
    171   ret void
    172 }
    173 
    174 ; VSTRL with the highest in-range displacement.
    175 define void @test_vstrl2(<16 x i8> %vec, i8 *%base) {
    176 ; CHECK-LABEL: test_vstrl2:
    177 ; CHECK: vstrl %v24, 4095(%r2), 8
    178 ; CHECK: br %r14
    179   %ptr = getelementptr i8, i8 *%base, i64 4095
    180   call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
    181   ret void
    182 }
    183 
    184 ; VSTRL with an out-of-range displacement.
    185 define void @test_vstrl3(<16 x i8> %vec, i8 *%base) {
    186 ; CHECK-LABEL: test_vstrl3:
    187 ; CHECK: vstrl %v24, 0({{%r[1-5]}}), 8
    188 ; CHECK: br %r14
    189   %ptr = getelementptr i8, i8 *%base, i64 4096
    190   call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
    191   ret void
    192 }
    193 
    194 ; Check that VSTRL doesn't allow an index.
    195 define void @test_vstrl4(<16 x i8> %vec, i8 *%base, i64 %index) {
    196 ; CHECK-LABEL: test_vstrl4:
    197 ; CHECK: vstrl %v24, 0({{%r[1-5]}}), 8
    198 ; CHECK: br %r14
    199   %ptr = getelementptr i8, i8 *%base, i64 %index
    200   call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
    201   ret void
    202 }
    203 
    204 ; VFCESBS with no processing of the result.
    205 define i32 @test_vfcesbs(<4 x float> %a, <4 x float> %b) {
    206 ; CHECK-LABEL: test_vfcesbs:
    207 ; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26
    208 ; CHECK: ipm %r2
    209 ; CHECK: srl %r2, 28
    210 ; CHECK: br %r14
    211   %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a,
    212                                                    <4 x float> %b)
    213   %res = extractvalue {<4 x i32>, i32} %call, 1
    214   ret i32 %res
    215 }
    216 
    217 ; VFCESBS, returning 1 if any elements are equal (CC != 3).
    218 define i32 @test_vfcesbs_any_bool(<4 x float> %a, <4 x float> %b) {
    219 ; CHECK-LABEL: test_vfcesbs_any_bool:
    220 ; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26
    221 ; CHECK: lhi %r2, 0
    222 ; CHECK: lochile %r2, 1
    223 ; CHECK: br %r14
    224   %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a,
    225                                                    <4 x float> %b)
    226   %res = extractvalue {<4 x i32>, i32} %call, 1
    227   %cmp = icmp ne i32 %res, 3
    228   %ext = zext i1 %cmp to i32
    229   ret i32 %ext
    230 }
    231 
    232 ; VFCESBS, storing to %ptr if any elements are equal.
    233 define <4 x i32> @test_vfcesbs_any_store(<4 x float> %a, <4 x float> %b,
    234                                          i32 *%ptr) {
    235 ; CHECK-LABEL: test_vfcesbs_any_store:
    236 ; CHECK-NOT: %r
    237 ; CHECK: vfcesbs %v24, %v24, %v26
    238 ; CHECK-NEXT: {{bor|bnler}} %r14
    239 ; CHECK: mvhi 0(%r2), 0
    240 ; CHECK: br %r14
    241   %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a,
    242                                                    <4 x float> %b)
    243   %res = extractvalue {<4 x i32>, i32} %call, 0
    244   %cc = extractvalue {<4 x i32>, i32} %call, 1
    245   %cmp = icmp ule i32 %cc, 2
    246   br i1 %cmp, label %store, label %exit
    247 
    248 store:
    249   store i32 0, i32 *%ptr
    250   br label %exit
    251 
    252 exit:
    253   ret <4 x i32> %res
    254 }
    255 
    256 ; VFCHSBS with no processing of the result.
    257 define i32 @test_vfchsbs(<4 x float> %a, <4 x float> %b) {
    258 ; CHECK-LABEL: test_vfchsbs:
    259 ; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26
    260 ; CHECK: ipm %r2
    261 ; CHECK: srl %r2, 28
    262 ; CHECK: br %r14
    263   %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a,
    264                                                    <4 x float> %b)
    265   %res = extractvalue {<4 x i32>, i32} %call, 1
    266   ret i32 %res
    267 }
    268 
    269 ; VFCHSBS, returning 1 if not all elements are higher.
    270 define i32 @test_vfchsbs_notall_bool(<4 x float> %a, <4 x float> %b) {
    271 ; CHECK-LABEL: test_vfchsbs_notall_bool:
    272 ; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26
    273 ; CHECK: lhi %r2, 0
    274 ; CHECK: lochinhe %r2, 1
    275 ; CHECK: br %r14
    276   %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a,
    277                                                    <4 x float> %b)
    278   %res = extractvalue {<4 x i32>, i32} %call, 1
    279   %cmp = icmp sge i32 %res, 1
    280   %ext = zext i1 %cmp to i32
    281   ret i32 %ext
    282 }
    283 
    284 ; VFCHSBS, storing to %ptr if not all elements are higher.
    285 define <4 x i32> @test_vfchsbs_notall_store(<4 x float> %a, <4 x float> %b,
    286                                             i32 *%ptr) {
    287 ; CHECK-LABEL: test_vfchsbs_notall_store:
    288 ; CHECK-NOT: %r
    289 ; CHECK: vfchsbs %v24, %v24, %v26
    290 ; CHECK-NEXT: {{bher|ber}} %r14
    291 ; CHECK: mvhi 0(%r2), 0
    292 ; CHECK: br %r14
    293   %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a,
    294                                                    <4 x float> %b)
    295   %res = extractvalue {<4 x i32>, i32} %call, 0
    296   %cc = extractvalue {<4 x i32>, i32} %call, 1
    297   %cmp = icmp ugt i32 %cc, 0
    298   br i1 %cmp, label %store, label %exit
    299 
    300 store:
    301   store i32 0, i32 *%ptr
    302   br label %exit
    303 
    304 exit:
    305   ret <4 x i32> %res
    306 }
    307 
    308 ; VFCHESBS with no processing of the result.
    309 define i32 @test_vfchesbs(<4 x float> %a, <4 x float> %b) {
    310 ; CHECK-LABEL: test_vfchesbs:
    311 ; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26
    312 ; CHECK: ipm %r2
    313 ; CHECK: srl %r2, 28
    314 ; CHECK: br %r14
    315   %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a,
    316 						    <4 x float> %b)
    317   %res = extractvalue {<4 x i32>, i32} %call, 1
    318   ret i32 %res
    319 }
    320 
    321 ; VFCHESBS, returning 1 if neither element is higher or equal.
    322 define i32 @test_vfchesbs_none_bool(<4 x float> %a, <4 x float> %b) {
    323 ; CHECK-LABEL: test_vfchesbs_none_bool:
    324 ; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26
    325 ; CHECK: lhi %r2, 0
    326 ; CHECK: lochio %r2, 1
    327 ; CHECK: br %r14
    328   %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a,
    329 						    <4 x float> %b)
    330   %res = extractvalue {<4 x i32>, i32} %call, 1
    331   %cmp = icmp eq i32 %res, 3
    332   %ext = zext i1 %cmp to i32
    333   ret i32 %ext
    334 }
    335 
    336 ; VFCHESBS, storing to %ptr if neither element is higher or equal.
    337 define <4 x i32> @test_vfchesbs_none_store(<4 x float> %a, <4 x float> %b,
    338                                            i32 *%ptr) {
    339 ; CHECK-LABEL: test_vfchesbs_none_store:
    340 ; CHECK-NOT: %r
    341 ; CHECK: vfchesbs %v24, %v24, %v26
    342 ; CHECK-NEXT: {{bnor|bler}} %r14
    343 ; CHECK: mvhi 0(%r2), 0
    344 ; CHECK: br %r14
    345   %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a,
    346 						    <4 x float> %b)
    347   %res = extractvalue {<4 x i32>, i32} %call, 0
    348   %cc = extractvalue {<4 x i32>, i32} %call, 1
    349   %cmp = icmp uge i32 %cc, 3
    350   br i1 %cmp, label %store, label %exit
    351 
    352 store:
    353   store i32 0, i32 *%ptr
    354   br label %exit
    355 
    356 exit:
    357   ret <4 x i32> %res
    358 }
    359 
    360 ; VFTCISB with the lowest useful class selector and no processing of the result.
    361 define i32 @test_vftcisb(<4 x float> %a) {
    362 ; CHECK-LABEL: test_vftcisb:
    363 ; CHECK: vftcisb {{%v[0-9]+}}, %v24, 1
    364 ; CHECK: ipm %r2
    365 ; CHECK: srl %r2, 28
    366 ; CHECK: br %r14
    367   %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 1)
    368   %res = extractvalue {<4 x i32>, i32} %call, 1
    369   ret i32 %res
    370 }
    371 
    372 ; VFTCISB with the highest useful class selector, returning 1 if all elements
    373 ; have the right class (CC == 0).
    374 define i32 @test_vftcisb_all_bool(<4 x float> %a) {
    375 ; CHECK-LABEL: test_vftcisb_all_bool:
    376 ; CHECK: vftcisb {{%v[0-9]+}}, %v24, 4094
    377 ; CHECK: lhi %r2, 0
    378 ; CHECK: lochie %r2, 1
    379 ; CHECK: br %r14
    380   %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 4094)
    381   %res = extractvalue {<4 x i32>, i32} %call, 1
    382   %cmp = icmp eq i32 %res, 0
    383   %ext = zext i1 %cmp to i32
    384   ret i32 %ext
    385 }
    386 
    387 ; VFISB with a rounding mode not usable via standard intrinsics.
    388 define <4 x float> @test_vfisb_0_4(<4 x float> %a) {
    389 ; CHECK-LABEL: test_vfisb_0_4:
    390 ; CHECK: vfisb %v24, %v24, 0, 4
    391 ; CHECK: br %r14
    392   %res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 0, i32 4)
    393   ret <4 x float> %res
    394 }
    395 
    396 ; VFISB with IEEE-inexact exception suppressed.
    397 define <4 x float> @test_vfisb_4_0(<4 x float> %a) {
    398 ; CHECK-LABEL: test_vfisb_4_0:
    399 ; CHECK: vfisb %v24, %v24, 4, 0
    400 ; CHECK: br %r14
    401   %res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 4, i32 0)
    402   ret <4 x float> %res
    403 }
    404 
    405 ; VFMAXDB.
    406 define <2 x double> @test_vfmaxdb(<2 x double> %a, <2 x double> %b) {
    407 ; CHECK-LABEL: test_vfmaxdb:
    408 ; CHECK: vfmaxdb %v24, %v24, %v26, 4
    409 ; CHECK: br %r14
    410   %res = call <2 x double> @llvm.s390.vfmaxdb(<2 x double> %a, <2 x double> %b, i32 4)
    411   ret <2 x double> %res
    412 }
    413 
    414 ; VFMINDB.
    415 define <2 x double> @test_vfmindb(<2 x double> %a, <2 x double> %b) {
    416 ; CHECK-LABEL: test_vfmindb:
    417 ; CHECK: vfmindb %v24, %v24, %v26, 4
    418 ; CHECK: br %r14
    419   %res = call <2 x double> @llvm.s390.vfmindb(<2 x double> %a, <2 x double> %b, i32 4)
    420   ret <2 x double> %res
    421 }
    422 
    423 ; VFMAXSB.
    424 define <4 x float> @test_vfmaxsb(<4 x float> %a, <4 x float> %b) {
    425 ; CHECK-LABEL: test_vfmaxsb:
    426 ; CHECK: vfmaxsb %v24, %v24, %v26, 4
    427 ; CHECK: br %r14
    428   %res = call <4 x float> @llvm.s390.vfmaxsb(<4 x float> %a, <4 x float> %b, i32 4)
    429   ret <4 x float> %res
    430 }
    431 
    432 ; VFMINSB.
    433 define <4 x float> @test_vfminsb(<4 x float> %a, <4 x float> %b) {
    434 ; CHECK-LABEL: test_vfminsb:
    435 ; CHECK: vfminsb %v24, %v24, %v26, 4
    436 ; CHECK: br %r14
    437   %res = call <4 x float> @llvm.s390.vfminsb(<4 x float> %a, <4 x float> %b, i32 4)
    438   ret <4 x float> %res
    439 }
    440 
    441