1 ; RUN: llc < %s -mtriple=aarch64-linux--gnu -aarch64-neon-syntax=generic | FileCheck %s 2 3 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 4 5 declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>) 6 declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>) 7 declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>) 8 declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>) 9 declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>) 10 declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>) 11 12 declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>) 13 declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>) 14 declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>) 15 declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>) 16 declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>) 17 declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>) 18 19 declare float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float>) 20 declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>) 21 22 ; CHECK-LABEL: smax_B 23 ; CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b 24 define i8 @smax_B(<16 x i8>* nocapture readonly %arr) { 25 %arr.load = load <16 x i8>, <16 x i8>* %arr 26 %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %arr.load) 27 ret i8 %r 28 } 29 30 ; CHECK-LABEL: smax_H 31 ; CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h 32 define i16 @smax_H(<8 x i16>* nocapture readonly %arr) { 33 %arr.load = load <8 x i16>, <8 x i16>* %arr 34 %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %arr.load) 35 ret i16 %r 36 } 37 38 ; CHECK-LABEL: smax_S 39 ; CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s 40 define i32 @smax_S(<4 x i32> * nocapture readonly %arr) { 41 %arr.load = load <4 x i32>, <4 x i32>* %arr 42 %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %arr.load) 43 ret i32 %r 44 } 45 46 ; CHECK-LABEL: umax_B 47 ; CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b 48 define i8 @umax_B(<16 x i8>* nocapture readonly %arr) { 49 %arr.load = load <16 x i8>, <16 x i8>* %arr 50 %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %arr.load) 51 ret i8 %r 52 } 53 54 ; CHECK-LABEL: umax_H 55 ; CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h 56 define i16 @umax_H(<8 x i16>* nocapture readonly %arr) { 57 %arr.load = load <8 x i16>, <8 x i16>* %arr 58 %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %arr.load) 59 ret i16 %r 60 } 61 62 ; CHECK-LABEL: umax_S 63 ; CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s 64 define i32 @umax_S(<4 x i32>* nocapture readonly %arr) { 65 %arr.load = load <4 x i32>, <4 x i32>* %arr 66 %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %arr.load) 67 ret i32 %r 68 } 69 70 ; CHECK-LABEL: smin_B 71 ; CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b 72 define i8 @smin_B(<16 x i8>* nocapture readonly %arr) { 73 %arr.load = load <16 x i8>, <16 x i8>* %arr 74 %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %arr.load) 75 ret i8 %r 76 } 77 78 ; CHECK-LABEL: smin_H 79 ; CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h 80 define i16 @smin_H(<8 x i16>* nocapture readonly %arr) { 81 %arr.load = load <8 x i16>, <8 x i16>* %arr 82 %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %arr.load) 83 ret i16 %r 84 } 85 86 ; CHECK-LABEL: smin_S 87 ; CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s 88 define i32 @smin_S(<4 x i32>* nocapture readonly %arr) { 89 %arr.load = load <4 x i32>, <4 x i32>* %arr 90 %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %arr.load) 91 ret i32 %r 92 } 93 94 ; CHECK-LABEL: umin_B 95 ; CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b 96 define i8 @umin_B(<16 x i8>* nocapture readonly %arr) { 97 %arr.load = load <16 x i8>, <16 x i8>* %arr 98 %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %arr.load) 99 ret i8 %r 100 } 101 102 ; CHECK-LABEL: umin_H 103 ; CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h 104 define i16 @umin_H(<8 x i16>* nocapture readonly %arr) { 105 %arr.load = load <8 x i16>, <8 x i16>* %arr 106 %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %arr.load) 107 ret i16 %r 108 } 109 110 ; CHECK-LABEL: umin_S 111 ; CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s 112 define i32 @umin_S(<4 x i32>* nocapture readonly %arr) { 113 %arr.load = load <4 x i32>, <4 x i32>* %arr 114 %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %arr.load) 115 ret i32 %r 116 } 117 118 ; CHECK-LABEL: fmaxnm_S 119 ; CHECK: fmaxnmv 120 define float @fmaxnm_S(<4 x float>* nocapture readonly %arr) { 121 %arr.load = load <4 x float>, <4 x float>* %arr 122 %r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %arr.load) 123 ret float %r 124 } 125 126 ; CHECK-LABEL: fminnm_S 127 ; CHECK: fminnmv 128 define float @fminnm_S(<4 x float>* nocapture readonly %arr) { 129 %arr.load = load <4 x float>, <4 x float>* %arr 130 %r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %arr.load) 131 ret float %r 132 } 133 134 declare i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>) 135 136 define i16 @oversized_umax_256(<16 x i16>* nocapture readonly %arr) { 137 ; CHECK-LABEL: oversized_umax_256 138 ; CHECK: umax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 139 ; CHECK: umaxv {{h[0-9]+}}, [[V0]] 140 %arr.load = load <16 x i16>, <16 x i16>* %arr 141 %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> %arr.load) 142 ret i16 %r 143 } 144 145 declare i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>) 146 147 define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr) { 148 ; CHECK-LABEL: oversized_umax_512 149 ; CHECK: umax v 150 ; CHECK-NEXT: umax v 151 ; CHECK-NEXT: umax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 152 ; CHECK-NEXT: umaxv {{s[0-9]+}}, [[V0]] 153 %arr.load = load <16 x i32>, <16 x i32>* %arr 154 %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> %arr.load) 155 ret i32 %r 156 } 157 158 declare i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>) 159 160 define i16 @oversized_umin_256(<16 x i16>* nocapture readonly %arr) { 161 ; CHECK-LABEL: oversized_umin_256 162 ; CHECK: umin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 163 ; CHECK: uminv {{h[0-9]+}}, [[V0]] 164 %arr.load = load <16 x i16>, <16 x i16>* %arr 165 %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> %arr.load) 166 ret i16 %r 167 } 168 169 declare i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>) 170 171 define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr) { 172 ; CHECK-LABEL: oversized_umin_512 173 ; CHECK: umin v 174 ; CHECK-NEXT: umin v 175 ; CHECK-NEXT: umin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 176 ; CHECK-NEXT: uminv {{s[0-9]+}}, [[V0]] 177 %arr.load = load <16 x i32>, <16 x i32>* %arr 178 %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> %arr.load) 179 ret i32 %r 180 } 181 182 declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>) 183 184 define i16 @oversized_smax_256(<16 x i16>* nocapture readonly %arr) { 185 ; CHECK-LABEL: oversized_smax_256 186 ; CHECK: smax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 187 ; CHECK: smaxv {{h[0-9]+}}, [[V0]] 188 %arr.load = load <16 x i16>, <16 x i16>* %arr 189 %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> %arr.load) 190 ret i16 %r 191 } 192 193 declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>) 194 195 define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr) { 196 ; CHECK-LABEL: oversized_smax_512 197 ; CHECK: smax v 198 ; CHECK-NEXT: smax v 199 ; CHECK-NEXT: smax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 200 ; CHECK-NEXT: smaxv {{s[0-9]+}}, [[V0]] 201 %arr.load = load <16 x i32>, <16 x i32>* %arr 202 %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> %arr.load) 203 ret i32 %r 204 } 205 206 declare i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>) 207 208 define i16 @oversized_smin_256(<16 x i16>* nocapture readonly %arr) { 209 ; CHECK-LABEL: oversized_smin_256 210 ; CHECK: smin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 211 ; CHECK: sminv {{h[0-9]+}}, [[V0]] 212 %arr.load = load <16 x i16>, <16 x i16>* %arr 213 %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> %arr.load) 214 ret i16 %r 215 } 216 217 declare i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>) 218 219 define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr) { 220 ; CHECK-LABEL: oversized_smin_512 221 ; CHECK: smin v 222 ; CHECK-NEXT: smin v 223 ; CHECK-NEXT: smin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 224 ; CHECK-NEXT: sminv {{s[0-9]+}}, [[V0]] 225 %arr.load = load <16 x i32>, <16 x i32>* %arr 226 %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> %arr.load) 227 ret i32 %r 228 } 229