Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -mtriple=aarch64-linux--gnu -aarch64-neon-syntax=generic | FileCheck %s
      2 
      3 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
      4 
      5 declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>)
      6 declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>)
      7 declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>)
      8 declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>)
      9 declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>)
     10 declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>)
     11 
     12 declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>)
     13 declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>)
     14 declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>)
     15 declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>)
     16 declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>)
     17 declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>)
     18 
     19 declare float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float>)
     20 declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>)
     21 
     22 ; CHECK-LABEL: smax_B
     23 ; CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
     24 define i8 @smax_B(<16 x i8>* nocapture readonly %arr)  {
     25   %arr.load = load <16 x i8>, <16 x i8>* %arr
     26   %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %arr.load)
     27   ret i8 %r
     28 }
     29 
     30 ; CHECK-LABEL: smax_H
     31 ; CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
     32 define i16 @smax_H(<8 x i16>* nocapture readonly %arr) {
     33   %arr.load = load <8 x i16>, <8 x i16>* %arr
     34   %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %arr.load)
     35   ret i16 %r
     36 }
     37 
     38 ; CHECK-LABEL: smax_S
     39 ; CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
     40 define i32 @smax_S(<4 x i32> * nocapture readonly %arr)  {
     41   %arr.load = load <4 x i32>, <4 x i32>* %arr
     42   %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %arr.load)
     43   ret i32 %r
     44 }
     45 
     46 ; CHECK-LABEL: umax_B
     47 ; CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
     48 define i8 @umax_B(<16 x i8>* nocapture readonly %arr)  {
     49   %arr.load = load <16 x i8>, <16 x i8>* %arr
     50   %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %arr.load)
     51   ret i8 %r
     52 }
     53 
     54 ; CHECK-LABEL: umax_H
     55 ; CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
     56 define i16 @umax_H(<8 x i16>* nocapture readonly %arr)  {
     57   %arr.load = load <8 x i16>, <8 x i16>* %arr
     58   %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %arr.load)
     59   ret i16 %r
     60 }
     61 
     62 ; CHECK-LABEL: umax_S
     63 ; CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
     64 define i32 @umax_S(<4 x i32>* nocapture readonly %arr) {
     65   %arr.load = load <4 x i32>, <4 x i32>* %arr
     66   %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %arr.load)
     67   ret i32 %r
     68 }
     69 
     70 ; CHECK-LABEL: smin_B
     71 ; CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b
     72 define i8 @smin_B(<16 x i8>* nocapture readonly %arr) {
     73   %arr.load = load <16 x i8>, <16 x i8>* %arr
     74   %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %arr.load)
     75   ret i8 %r
     76 }
     77 
     78 ; CHECK-LABEL: smin_H
     79 ; CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h
     80 define i16 @smin_H(<8 x i16>* nocapture readonly %arr) {
     81   %arr.load = load <8 x i16>, <8 x i16>* %arr
     82   %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %arr.load)
     83   ret i16 %r
     84 }
     85 
     86 ; CHECK-LABEL: smin_S
     87 ; CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s
     88 define i32 @smin_S(<4 x i32>* nocapture readonly %arr) {
     89   %arr.load = load <4 x i32>, <4 x i32>* %arr
     90   %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %arr.load)
     91   ret i32 %r
     92 }
     93 
     94 ; CHECK-LABEL: umin_B
     95 ; CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b
     96 define i8 @umin_B(<16 x i8>* nocapture readonly %arr)  {
     97   %arr.load = load <16 x i8>, <16 x i8>* %arr
     98   %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %arr.load)
     99   ret i8 %r
    100 }
    101 
    102 ; CHECK-LABEL: umin_H
    103 ; CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h
    104 define i16 @umin_H(<8 x i16>* nocapture readonly %arr)  {
    105   %arr.load = load <8 x i16>, <8 x i16>* %arr
    106   %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %arr.load)
    107   ret i16 %r
    108 }
    109 
    110 ; CHECK-LABEL: umin_S
    111 ; CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s
    112 define i32 @umin_S(<4 x i32>* nocapture readonly %arr) {
    113   %arr.load = load <4 x i32>, <4 x i32>* %arr
    114   %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %arr.load)
    115   ret i32 %r
    116 }
    117 
    118 ; CHECK-LABEL: fmaxnm_S
    119 ; CHECK: fmaxnmv
    120 define float @fmaxnm_S(<4 x float>* nocapture readonly %arr) {
    121   %arr.load  = load <4 x float>, <4 x float>* %arr
    122   %r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %arr.load)
    123   ret float %r
    124 }
    125 
    126 ; CHECK-LABEL: fminnm_S
    127 ; CHECK: fminnmv
    128 define float @fminnm_S(<4 x float>* nocapture readonly %arr) {
    129   %arr.load  = load <4 x float>, <4 x float>* %arr
    130   %r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %arr.load)
    131   ret float %r
    132 }
    133 
    134 declare i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>)
    135 
    136 define i16 @oversized_umax_256(<16 x i16>* nocapture readonly %arr)  {
    137 ; CHECK-LABEL: oversized_umax_256
    138 ; CHECK: umax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
    139 ; CHECK: umaxv {{h[0-9]+}}, [[V0]]
    140   %arr.load = load <16 x i16>, <16 x i16>* %arr
    141   %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> %arr.load)
    142   ret i16 %r
    143 }
    144 
    145 declare i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>)
    146 
    147 define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr)  {
    148 ; CHECK-LABEL: oversized_umax_512
    149 ; CHECK: umax v
    150 ; CHECK-NEXT: umax v
    151 ; CHECK-NEXT: umax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    152 ; CHECK-NEXT: umaxv {{s[0-9]+}}, [[V0]]
    153   %arr.load = load <16 x i32>, <16 x i32>* %arr
    154   %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> %arr.load)
    155   ret i32 %r
    156 }
    157 
    158 declare i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>)
    159 
    160 define i16 @oversized_umin_256(<16 x i16>* nocapture readonly %arr)  {
    161 ; CHECK-LABEL: oversized_umin_256
    162 ; CHECK: umin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
    163 ; CHECK: uminv {{h[0-9]+}}, [[V0]]
    164   %arr.load = load <16 x i16>, <16 x i16>* %arr
    165   %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> %arr.load)
    166   ret i16 %r
    167 }
    168 
    169 declare i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>)
    170 
    171 define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr)  {
    172 ; CHECK-LABEL: oversized_umin_512
    173 ; CHECK: umin v
    174 ; CHECK-NEXT: umin v
    175 ; CHECK-NEXT: umin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    176 ; CHECK-NEXT: uminv {{s[0-9]+}}, [[V0]]
    177   %arr.load = load <16 x i32>, <16 x i32>* %arr
    178   %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> %arr.load)
    179   ret i32 %r
    180 }
    181 
    182 declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>)
    183 
    184 define i16 @oversized_smax_256(<16 x i16>* nocapture readonly %arr)  {
    185 ; CHECK-LABEL: oversized_smax_256
    186 ; CHECK: smax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
    187 ; CHECK: smaxv {{h[0-9]+}}, [[V0]]
    188   %arr.load = load <16 x i16>, <16 x i16>* %arr
    189   %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> %arr.load)
    190   ret i16 %r
    191 }
    192 
    193 declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>)
    194 
    195 define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr)  {
    196 ; CHECK-LABEL: oversized_smax_512
    197 ; CHECK: smax v
    198 ; CHECK-NEXT: smax v
    199 ; CHECK-NEXT: smax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    200 ; CHECK-NEXT: smaxv {{s[0-9]+}}, [[V0]]
    201   %arr.load = load <16 x i32>, <16 x i32>* %arr
    202   %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> %arr.load)
    203   ret i32 %r
    204 }
    205 
    206 declare i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>)
    207 
    208 define i16 @oversized_smin_256(<16 x i16>* nocapture readonly %arr)  {
    209 ; CHECK-LABEL: oversized_smin_256
    210 ; CHECK: smin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
    211 ; CHECK: sminv {{h[0-9]+}}, [[V0]]
    212   %arr.load = load <16 x i16>, <16 x i16>* %arr
    213   %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> %arr.load)
    214   ret i16 %r
    215 }
    216 
    217 declare i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>)
    218 
    219 define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr)  {
    220 ; CHECK-LABEL: oversized_smin_512
    221 ; CHECK: smin v
    222 ; CHECK-NEXT: smin v
    223 ; CHECK-NEXT: smin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    224 ; CHECK-NEXT: sminv {{s[0-9]+}}, [[V0]]
    225   %arr.load = load <16 x i32>, <16 x i32>* %arr
    226   %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> %arr.load)
    227   ret i32 %r
    228 }
    229