1 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ 2 ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s 3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ 4 ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s 5 6 declare float @llvm.fma.f32(float %f1, float %f2, float %f3) 7 8 define float @f1(float %f1, float %f2, float %acc) { 9 ; CHECK-LABEL: f1: 10 ; CHECK-SCALAR: maebr %f4, %f0, %f2 11 ; CHECK-SCALAR: ler %f0, %f4 12 ; CHECK-VECTOR: wfmasb %f0, %f0, %f2, %f4 13 ; CHECK: br %r14 14 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) 15 ret float %res 16 } 17 18 define float @f2(float %f1, float *%ptr, float %acc) { 19 ; CHECK-LABEL: f2: 20 ; CHECK: maeb %f2, %f0, 0(%r2) 21 ; CHECK-SCALAR: ler %f0, %f2 22 ; CHECK-VECTOR: ldr %f0, %f2 23 ; CHECK: br %r14 24 %f2 = load float, float *%ptr 25 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) 26 ret float %res 27 } 28 29 define float @f3(float %f1, float *%base, float %acc) { 30 ; CHECK-LABEL: f3: 31 ; CHECK: maeb %f2, %f0, 4092(%r2) 32 ; CHECK-SCALAR: ler %f0, %f2 33 ; CHECK-VECTOR: ldr %f0, %f2 34 ; CHECK: br %r14 35 %ptr = getelementptr float, float *%base, i64 1023 36 %f2 = load float, float *%ptr 37 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) 38 ret float %res 39 } 40 41 define float @f4(float %f1, float *%base, float %acc) { 42 ; The important thing here is that we don't generate an out-of-range 43 ; displacement. Other sequences besides this one would be OK. 44 ; 45 ; CHECK-LABEL: f4: 46 ; CHECK: aghi %r2, 4096 47 ; CHECK: maeb %f2, %f0, 0(%r2) 48 ; CHECK-SCALAR: ler %f0, %f2 49 ; CHECK-VECTOR: ldr %f0, %f2 50 ; CHECK: br %r14 51 %ptr = getelementptr float, float *%base, i64 1024 52 %f2 = load float, float *%ptr 53 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) 54 ret float %res 55 } 56 57 define float @f5(float %f1, float *%base, float %acc) { 58 ; Here too the important thing is that we don't generate an out-of-range 59 ; displacement. Other sequences besides this one would be OK. 60 ; 61 ; CHECK-LABEL: f5: 62 ; CHECK: aghi %r2, -4 63 ; CHECK: maeb %f2, %f0, 0(%r2) 64 ; CHECK-SCALAR: ler %f0, %f2 65 ; CHECK-VECTOR: ldr %f0, %f2 66 ; CHECK: br %r14 67 %ptr = getelementptr float, float *%base, i64 -1 68 %f2 = load float, float *%ptr 69 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) 70 ret float %res 71 } 72 73 define float @f6(float %f1, float *%base, i64 %index, float %acc) { 74 ; CHECK-LABEL: f6: 75 ; CHECK: sllg %r1, %r3, 2 76 ; CHECK: maeb %f2, %f0, 0(%r1,%r2) 77 ; CHECK-SCALAR: ler %f0, %f2 78 ; CHECK-VECTOR: ldr %f0, %f2 79 ; CHECK: br %r14 80 %ptr = getelementptr float, float *%base, i64 %index 81 %f2 = load float, float *%ptr 82 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) 83 ret float %res 84 } 85 86 define float @f7(float %f1, float *%base, i64 %index, float %acc) { 87 ; CHECK-LABEL: f7: 88 ; CHECK: sllg %r1, %r3, 2 89 ; CHECK: maeb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}}) 90 ; CHECK-SCALAR: ler %f0, %f2 91 ; CHECK-VECTOR: ldr %f0, %f2 92 ; CHECK: br %r14 93 %index2 = add i64 %index, 1023 94 %ptr = getelementptr float, float *%base, i64 %index2 95 %f2 = load float, float *%ptr 96 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) 97 ret float %res 98 } 99 100 define float @f8(float %f1, float *%base, i64 %index, float %acc) { 101 ; CHECK-LABEL: f8: 102 ; CHECK: sllg %r1, %r3, 2 103 ; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) 104 ; CHECK: maeb %f2, %f0, 0(%r1) 105 ; CHECK-SCALAR: ler %f0, %f2 106 ; CHECK-VECTOR: ldr %f0, %f2 107 ; CHECK: br %r14 108 %index2 = add i64 %index, 1024 109 %ptr = getelementptr float, float *%base, i64 %index2 110 %f2 = load float, float *%ptr 111 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) 112 ret float %res 113 } 114