1 ; Test vector intrinsics added with z14. 2 ; 3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s 4 5 declare <2 x i64> @llvm.s390.vbperm(<16 x i8>, <16 x i8>) 6 declare <16 x i8> @llvm.s390.vmslg(<2 x i64>, <2 x i64>, <16 x i8>, i32) 7 declare <16 x i8> @llvm.s390.vlrl(i32, i8 *) 8 declare void @llvm.s390.vstrl(<16 x i8>, i32, i8 *) 9 10 declare {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float>, <4 x float>) 11 declare {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float>, <4 x float>) 12 declare {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float>, <4 x float>) 13 declare {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float>, i32) 14 declare <4 x float> @llvm.s390.vfisb(<4 x float>, i32, i32) 15 16 declare <2 x double> @llvm.s390.vfmaxdb(<2 x double>, <2 x double>, i32) 17 declare <2 x double> @llvm.s390.vfmindb(<2 x double>, <2 x double>, i32) 18 declare <4 x float> @llvm.s390.vfmaxsb(<4 x float>, <4 x float>, i32) 19 declare <4 x float> @llvm.s390.vfminsb(<4 x float>, <4 x float>, i32) 20 21 ; VBPERM. 22 define <2 x i64> @test_vbperm(<16 x i8> %a, <16 x i8> %b) { 23 ; CHECK-LABEL: test_vbperm: 24 ; CHECK: vbperm %v24, %v24, %v26 25 ; CHECK: br %r14 26 %res = call <2 x i64> @llvm.s390.vbperm(<16 x i8> %a, <16 x i8> %b) 27 ret <2 x i64> %res 28 } 29 30 ; VMSLG with no shifts. 31 define <16 x i8> @test_vmslg1(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) { 32 ; CHECK-LABEL: test_vmslg1: 33 ; CHECK: vmslg %v24, %v24, %v26, %v28, 0 34 ; CHECK: br %r14 35 %res = call <16 x i8> @llvm.s390.vmslg(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c, i32 0) 36 ret <16 x i8> %res 37 } 38 39 ; VMSLG with both shifts. 40 define <16 x i8> @test_vmslg2(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) { 41 ; CHECK-LABEL: test_vmslg2: 42 ; CHECK: vmslg %v24, %v24, %v26, %v28, 12 43 ; CHECK: br %r14 44 %res = call <16 x i8> @llvm.s390.vmslg(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c, i32 12) 45 ret <16 x i8> %res 46 } 47 48 ; VLRLR with the lowest in-range displacement. 49 define <16 x i8> @test_vlrlr1(i8 *%ptr, i32 %length) { 50 ; CHECK-LABEL: test_vlrlr1: 51 ; CHECK: vlrlr %v24, %r3, 0(%r2) 52 ; CHECK: br %r14 53 %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr) 54 ret <16 x i8> %res 55 } 56 57 ; VLRLR with the highest in-range displacement. 58 define <16 x i8> @test_vlrlr2(i8 *%base, i32 %length) { 59 ; CHECK-LABEL: test_vlrlr2: 60 ; CHECK: vlrlr %v24, %r3, 4095(%r2) 61 ; CHECK: br %r14 62 %ptr = getelementptr i8, i8 *%base, i64 4095 63 %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr) 64 ret <16 x i8> %res 65 } 66 67 ; VLRLR with an out-of-range displacement. 68 define <16 x i8> @test_vlrlr3(i8 *%base, i32 %length) { 69 ; CHECK-LABEL: test_vlrlr3: 70 ; CHECK: vlrlr %v24, %r3, 0({{%r[1-5]}}) 71 ; CHECK: br %r14 72 %ptr = getelementptr i8, i8 *%base, i64 4096 73 %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr) 74 ret <16 x i8> %res 75 } 76 77 ; Check that VLRLR doesn't allow an index. 78 define <16 x i8> @test_vlrlr4(i8 *%base, i64 %index, i32 %length) { 79 ; CHECK-LABEL: test_vlrlr4: 80 ; CHECK: vlrlr %v24, %r4, 0({{%r[1-5]}}) 81 ; CHECK: br %r14 82 %ptr = getelementptr i8, i8 *%base, i64 %index 83 %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr) 84 ret <16 x i8> %res 85 } 86 87 ; VLRL with the lowest in-range displacement. 88 define <16 x i8> @test_vlrl1(i8 *%ptr) { 89 ; CHECK-LABEL: test_vlrl1: 90 ; CHECK: vlrl %v24, 0(%r2), 0 91 ; CHECK: br %r14 92 %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr) 93 ret <16 x i8> %res 94 } 95 96 ; VLRL with the highest in-range displacement. 97 define <16 x i8> @test_vlrl2(i8 *%base) { 98 ; CHECK-LABEL: test_vlrl2: 99 ; CHECK: vlrl %v24, 4095(%r2), 0 100 ; CHECK: br %r14 101 %ptr = getelementptr i8, i8 *%base, i64 4095 102 %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr) 103 ret <16 x i8> %res 104 } 105 106 ; VLRL with an out-of-range displacement. 107 define <16 x i8> @test_vlrl3(i8 *%base) { 108 ; CHECK-LABEL: test_vlrl3: 109 ; CHECK: vlrl %v24, 0({{%r[1-5]}}), 0 110 ; CHECK: br %r14 111 %ptr = getelementptr i8, i8 *%base, i64 4096 112 %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr) 113 ret <16 x i8> %res 114 } 115 116 ; Check that VLRL doesn't allow an index. 117 define <16 x i8> @test_vlrl4(i8 *%base, i64 %index) { 118 ; CHECK-LABEL: test_vlrl4: 119 ; CHECK: vlrl %v24, 0({{%r[1-5]}}), 0 120 ; CHECK: br %r14 121 %ptr = getelementptr i8, i8 *%base, i64 %index 122 %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr) 123 ret <16 x i8> %res 124 } 125 126 ; VSTRLR with the lowest in-range displacement. 127 define void @test_vstrlr1(<16 x i8> %vec, i8 *%ptr, i32 %length) { 128 ; CHECK-LABEL: test_vstrlr1: 129 ; CHECK: vstrlr %v24, %r3, 0(%r2) 130 ; CHECK: br %r14 131 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr) 132 ret void 133 } 134 135 ; VSTRLR with the highest in-range displacement. 136 define void @test_vstrlr2(<16 x i8> %vec, i8 *%base, i32 %length) { 137 ; CHECK-LABEL: test_vstrlr2: 138 ; CHECK: vstrlr %v24, %r3, 4095(%r2) 139 ; CHECK: br %r14 140 %ptr = getelementptr i8, i8 *%base, i64 4095 141 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr) 142 ret void 143 } 144 145 ; VSTRLR with an out-of-range displacement. 146 define void @test_vstrlr3(<16 x i8> %vec, i8 *%base, i32 %length) { 147 ; CHECK-LABEL: test_vstrlr3: 148 ; CHECK: vstrlr %v24, %r3, 0({{%r[1-5]}}) 149 ; CHECK: br %r14 150 %ptr = getelementptr i8, i8 *%base, i64 4096 151 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr) 152 ret void 153 } 154 155 ; Check that VSTRLR doesn't allow an index. 156 define void @test_vstrlr4(<16 x i8> %vec, i8 *%base, i64 %index, i32 %length) { 157 ; CHECK-LABEL: test_vstrlr4: 158 ; CHECK: vstrlr %v24, %r4, 0({{%r[1-5]}}) 159 ; CHECK: br %r14 160 %ptr = getelementptr i8, i8 *%base, i64 %index 161 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr) 162 ret void 163 } 164 165 ; VSTRL with the lowest in-range displacement. 166 define void @test_vstrl1(<16 x i8> %vec, i8 *%ptr) { 167 ; CHECK-LABEL: test_vstrl1: 168 ; CHECK: vstrl %v24, 0(%r2), 8 169 ; CHECK: br %r14 170 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr) 171 ret void 172 } 173 174 ; VSTRL with the highest in-range displacement. 175 define void @test_vstrl2(<16 x i8> %vec, i8 *%base) { 176 ; CHECK-LABEL: test_vstrl2: 177 ; CHECK: vstrl %v24, 4095(%r2), 8 178 ; CHECK: br %r14 179 %ptr = getelementptr i8, i8 *%base, i64 4095 180 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr) 181 ret void 182 } 183 184 ; VSTRL with an out-of-range displacement. 185 define void @test_vstrl3(<16 x i8> %vec, i8 *%base) { 186 ; CHECK-LABEL: test_vstrl3: 187 ; CHECK: vstrl %v24, 0({{%r[1-5]}}), 8 188 ; CHECK: br %r14 189 %ptr = getelementptr i8, i8 *%base, i64 4096 190 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr) 191 ret void 192 } 193 194 ; Check that VSTRL doesn't allow an index. 195 define void @test_vstrl4(<16 x i8> %vec, i8 *%base, i64 %index) { 196 ; CHECK-LABEL: test_vstrl4: 197 ; CHECK: vstrl %v24, 0({{%r[1-5]}}), 8 198 ; CHECK: br %r14 199 %ptr = getelementptr i8, i8 *%base, i64 %index 200 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr) 201 ret void 202 } 203 204 ; VFCESBS with no processing of the result. 205 define i32 @test_vfcesbs(<4 x float> %a, <4 x float> %b) { 206 ; CHECK-LABEL: test_vfcesbs: 207 ; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26 208 ; CHECK: ipm %r2 209 ; CHECK: srl %r2, 28 210 ; CHECK: br %r14 211 %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a, 212 <4 x float> %b) 213 %res = extractvalue {<4 x i32>, i32} %call, 1 214 ret i32 %res 215 } 216 217 ; VFCESBS, returning 1 if any elements are equal (CC != 3). 218 define i32 @test_vfcesbs_any_bool(<4 x float> %a, <4 x float> %b) { 219 ; CHECK-LABEL: test_vfcesbs_any_bool: 220 ; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26 221 ; CHECK: lhi %r2, 0 222 ; CHECK: lochile %r2, 1 223 ; CHECK: br %r14 224 %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a, 225 <4 x float> %b) 226 %res = extractvalue {<4 x i32>, i32} %call, 1 227 %cmp = icmp ne i32 %res, 3 228 %ext = zext i1 %cmp to i32 229 ret i32 %ext 230 } 231 232 ; VFCESBS, storing to %ptr if any elements are equal. 233 define <4 x i32> @test_vfcesbs_any_store(<4 x float> %a, <4 x float> %b, 234 i32 *%ptr) { 235 ; CHECK-LABEL: test_vfcesbs_any_store: 236 ; CHECK-NOT: %r 237 ; CHECK: vfcesbs %v24, %v24, %v26 238 ; CHECK-NEXT: {{bor|bnler}} %r14 239 ; CHECK: mvhi 0(%r2), 0 240 ; CHECK: br %r14 241 %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a, 242 <4 x float> %b) 243 %res = extractvalue {<4 x i32>, i32} %call, 0 244 %cc = extractvalue {<4 x i32>, i32} %call, 1 245 %cmp = icmp ule i32 %cc, 2 246 br i1 %cmp, label %store, label %exit 247 248 store: 249 store i32 0, i32 *%ptr 250 br label %exit 251 252 exit: 253 ret <4 x i32> %res 254 } 255 256 ; VFCHSBS with no processing of the result. 257 define i32 @test_vfchsbs(<4 x float> %a, <4 x float> %b) { 258 ; CHECK-LABEL: test_vfchsbs: 259 ; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26 260 ; CHECK: ipm %r2 261 ; CHECK: srl %r2, 28 262 ; CHECK: br %r14 263 %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a, 264 <4 x float> %b) 265 %res = extractvalue {<4 x i32>, i32} %call, 1 266 ret i32 %res 267 } 268 269 ; VFCHSBS, returning 1 if not all elements are higher. 270 define i32 @test_vfchsbs_notall_bool(<4 x float> %a, <4 x float> %b) { 271 ; CHECK-LABEL: test_vfchsbs_notall_bool: 272 ; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26 273 ; CHECK: lhi %r2, 0 274 ; CHECK: lochinhe %r2, 1 275 ; CHECK: br %r14 276 %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a, 277 <4 x float> %b) 278 %res = extractvalue {<4 x i32>, i32} %call, 1 279 %cmp = icmp sge i32 %res, 1 280 %ext = zext i1 %cmp to i32 281 ret i32 %ext 282 } 283 284 ; VFCHSBS, storing to %ptr if not all elements are higher. 285 define <4 x i32> @test_vfchsbs_notall_store(<4 x float> %a, <4 x float> %b, 286 i32 *%ptr) { 287 ; CHECK-LABEL: test_vfchsbs_notall_store: 288 ; CHECK-NOT: %r 289 ; CHECK: vfchsbs %v24, %v24, %v26 290 ; CHECK-NEXT: {{bher|ber}} %r14 291 ; CHECK: mvhi 0(%r2), 0 292 ; CHECK: br %r14 293 %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a, 294 <4 x float> %b) 295 %res = extractvalue {<4 x i32>, i32} %call, 0 296 %cc = extractvalue {<4 x i32>, i32} %call, 1 297 %cmp = icmp ugt i32 %cc, 0 298 br i1 %cmp, label %store, label %exit 299 300 store: 301 store i32 0, i32 *%ptr 302 br label %exit 303 304 exit: 305 ret <4 x i32> %res 306 } 307 308 ; VFCHESBS with no processing of the result. 309 define i32 @test_vfchesbs(<4 x float> %a, <4 x float> %b) { 310 ; CHECK-LABEL: test_vfchesbs: 311 ; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26 312 ; CHECK: ipm %r2 313 ; CHECK: srl %r2, 28 314 ; CHECK: br %r14 315 %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a, 316 <4 x float> %b) 317 %res = extractvalue {<4 x i32>, i32} %call, 1 318 ret i32 %res 319 } 320 321 ; VFCHESBS, returning 1 if neither element is higher or equal. 322 define i32 @test_vfchesbs_none_bool(<4 x float> %a, <4 x float> %b) { 323 ; CHECK-LABEL: test_vfchesbs_none_bool: 324 ; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26 325 ; CHECK: lhi %r2, 0 326 ; CHECK: lochio %r2, 1 327 ; CHECK: br %r14 328 %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a, 329 <4 x float> %b) 330 %res = extractvalue {<4 x i32>, i32} %call, 1 331 %cmp = icmp eq i32 %res, 3 332 %ext = zext i1 %cmp to i32 333 ret i32 %ext 334 } 335 336 ; VFCHESBS, storing to %ptr if neither element is higher or equal. 337 define <4 x i32> @test_vfchesbs_none_store(<4 x float> %a, <4 x float> %b, 338 i32 *%ptr) { 339 ; CHECK-LABEL: test_vfchesbs_none_store: 340 ; CHECK-NOT: %r 341 ; CHECK: vfchesbs %v24, %v24, %v26 342 ; CHECK-NEXT: {{bnor|bler}} %r14 343 ; CHECK: mvhi 0(%r2), 0 344 ; CHECK: br %r14 345 %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a, 346 <4 x float> %b) 347 %res = extractvalue {<4 x i32>, i32} %call, 0 348 %cc = extractvalue {<4 x i32>, i32} %call, 1 349 %cmp = icmp uge i32 %cc, 3 350 br i1 %cmp, label %store, label %exit 351 352 store: 353 store i32 0, i32 *%ptr 354 br label %exit 355 356 exit: 357 ret <4 x i32> %res 358 } 359 360 ; VFTCISB with the lowest useful class selector and no processing of the result. 361 define i32 @test_vftcisb(<4 x float> %a) { 362 ; CHECK-LABEL: test_vftcisb: 363 ; CHECK: vftcisb {{%v[0-9]+}}, %v24, 1 364 ; CHECK: ipm %r2 365 ; CHECK: srl %r2, 28 366 ; CHECK: br %r14 367 %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 1) 368 %res = extractvalue {<4 x i32>, i32} %call, 1 369 ret i32 %res 370 } 371 372 ; VFTCISB with the highest useful class selector, returning 1 if all elements 373 ; have the right class (CC == 0). 374 define i32 @test_vftcisb_all_bool(<4 x float> %a) { 375 ; CHECK-LABEL: test_vftcisb_all_bool: 376 ; CHECK: vftcisb {{%v[0-9]+}}, %v24, 4094 377 ; CHECK: lhi %r2, 0 378 ; CHECK: lochie %r2, 1 379 ; CHECK: br %r14 380 %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 4094) 381 %res = extractvalue {<4 x i32>, i32} %call, 1 382 %cmp = icmp eq i32 %res, 0 383 %ext = zext i1 %cmp to i32 384 ret i32 %ext 385 } 386 387 ; VFISB with a rounding mode not usable via standard intrinsics. 388 define <4 x float> @test_vfisb_0_4(<4 x float> %a) { 389 ; CHECK-LABEL: test_vfisb_0_4: 390 ; CHECK: vfisb %v24, %v24, 0, 4 391 ; CHECK: br %r14 392 %res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 0, i32 4) 393 ret <4 x float> %res 394 } 395 396 ; VFISB with IEEE-inexact exception suppressed. 397 define <4 x float> @test_vfisb_4_0(<4 x float> %a) { 398 ; CHECK-LABEL: test_vfisb_4_0: 399 ; CHECK: vfisb %v24, %v24, 4, 0 400 ; CHECK: br %r14 401 %res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 4, i32 0) 402 ret <4 x float> %res 403 } 404 405 ; VFMAXDB. 406 define <2 x double> @test_vfmaxdb(<2 x double> %a, <2 x double> %b) { 407 ; CHECK-LABEL: test_vfmaxdb: 408 ; CHECK: vfmaxdb %v24, %v24, %v26, 4 409 ; CHECK: br %r14 410 %res = call <2 x double> @llvm.s390.vfmaxdb(<2 x double> %a, <2 x double> %b, i32 4) 411 ret <2 x double> %res 412 } 413 414 ; VFMINDB. 415 define <2 x double> @test_vfmindb(<2 x double> %a, <2 x double> %b) { 416 ; CHECK-LABEL: test_vfmindb: 417 ; CHECK: vfmindb %v24, %v24, %v26, 4 418 ; CHECK: br %r14 419 %res = call <2 x double> @llvm.s390.vfmindb(<2 x double> %a, <2 x double> %b, i32 4) 420 ret <2 x double> %res 421 } 422 423 ; VFMAXSB. 424 define <4 x float> @test_vfmaxsb(<4 x float> %a, <4 x float> %b) { 425 ; CHECK-LABEL: test_vfmaxsb: 426 ; CHECK: vfmaxsb %v24, %v24, %v26, 4 427 ; CHECK: br %r14 428 %res = call <4 x float> @llvm.s390.vfmaxsb(<4 x float> %a, <4 x float> %b, i32 4) 429 ret <4 x float> %res 430 } 431 432 ; VFMINSB. 433 define <4 x float> @test_vfminsb(<4 x float> %a, <4 x float> %b) { 434 ; CHECK-LABEL: test_vfminsb: 435 ; CHECK: vfminsb %v24, %v24, %v26, 4 436 ; CHECK: br %r14 437 %res = call <4 x float> @llvm.s390.vfminsb(<4 x float> %a, <4 x float> %b, i32 4) 438 ret <4 x float> %res 439 } 440 441