Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
      2 
      3 declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>)
      4 declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>)
      5 
      6 define <8 x i8> @test_smax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
      7 ; Using registers other than v0, v1 are possible, but would be odd.
      8 ; CHECK: test_smax_v8i8:
      9   %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
     10 ; CHECK: smax v0.8b, v0.8b, v1.8b
     11   ret <8 x i8> %tmp1
     12 }
     13 
     14 define <8 x i8> @test_umax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
     15   %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
     16 ; CHECK: umax v0.8b, v0.8b, v1.8b
     17   ret <8 x i8> %tmp1
     18 }
     19 
     20 declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>)
     21 declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>)
     22 
     23 define <16 x i8> @test_smax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
     24 ; CHECK: test_smax_v16i8:
     25   %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
     26 ; CHECK: smax v0.16b, v0.16b, v1.16b
     27   ret <16 x i8> %tmp1
     28 }
     29 
     30 define <16 x i8> @test_umax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
     31 ; CHECK: test_umax_v16i8:
     32   %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
     33 ; CHECK: umax v0.16b, v0.16b, v1.16b
     34   ret <16 x i8> %tmp1
     35 }
     36 
     37 declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>)
     38 declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>)
     39 
     40 define <4 x i16> @test_smax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
     41 ; CHECK: test_smax_v4i16:
     42   %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
     43 ; CHECK: smax v0.4h, v0.4h, v1.4h
     44   ret <4 x i16> %tmp1
     45 }
     46 
     47 define <4 x i16> @test_umax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
     48 ; CHECK: test_umax_v4i16:
     49   %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
     50 ; CHECK: umax v0.4h, v0.4h, v1.4h
     51   ret <4 x i16> %tmp1
     52 }
     53 
     54 
     55 declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>)
     56 declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>)
     57 
     58 define <8 x i16> @test_smax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
     59 ; CHECK: test_smax_v8i16:
     60   %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
     61 ; CHECK: smax v0.8h, v0.8h, v1.8h
     62   ret <8 x i16> %tmp1
     63 }
     64 
     65 define <8 x i16> @test_umax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
     66 ; CHECK: test_umax_v8i16:
     67   %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
     68 ; CHECK: umax v0.8h, v0.8h, v1.8h
     69   ret <8 x i16> %tmp1
     70 }
     71 
     72 
     73 declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>)
     74 declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>)
     75 
     76 define <2 x i32> @test_smax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
     77 ; CHECK: test_smax_v2i32:
     78   %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
     79 ; CHECK: smax v0.2s, v0.2s, v1.2s
     80   ret <2 x i32> %tmp1
     81 }
     82 
     83 define <2 x i32> @test_umax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
     84 ; CHECK: test_umax_v2i32:
     85   %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
     86 ; CHECK: umax v0.2s, v0.2s, v1.2s
     87   ret <2 x i32> %tmp1
     88 }
     89 
     90 declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>)
     91 declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>)
     92 
     93 define <4 x i32> @test_smax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
     94 ; CHECK: test_smax_v4i32:
     95   %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
     96 ; CHECK: smax v0.4s, v0.4s, v1.4s
     97   ret <4 x i32> %tmp1
     98 }
     99 
    100 define <4 x i32> @test_umax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
    101 ; CHECK: test_umax_v4i32:
    102   %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
    103 ; CHECK: umax v0.4s, v0.4s, v1.4s
    104   ret <4 x i32> %tmp1
    105 }
    106 
    107 declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>)
    108 declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>)
    109 
    110 define <8 x i8> @test_smin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
    111 ; Using registers other than v0, v1 are possible, but would be odd.
    112 ; CHECK: test_smin_v8i8:
    113   %tmp1 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
    114 ; CHECK: smin v0.8b, v0.8b, v1.8b
    115   ret <8 x i8> %tmp1
    116 }
    117 
    118 define <8 x i8> @test_umin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
    119   %tmp1 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
    120 ; CHECK: umin v0.8b, v0.8b, v1.8b
    121   ret <8 x i8> %tmp1
    122 }
    123 
    124 declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>)
    125 declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>)
    126 
    127 define <16 x i8> @test_smin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
    128 ; CHECK: test_smin_v16i8:
    129   %tmp1 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
    130 ; CHECK: smin v0.16b, v0.16b, v1.16b
    131   ret <16 x i8> %tmp1
    132 }
    133 
    134 define <16 x i8> @test_umin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
    135 ; CHECK: test_umin_v16i8:
    136   %tmp1 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
    137 ; CHECK: umin v0.16b, v0.16b, v1.16b
    138   ret <16 x i8> %tmp1
    139 }
    140 
    141 declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>)
    142 declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>)
    143 
    144 define <4 x i16> @test_smin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
    145 ; CHECK: test_smin_v4i16:
    146   %tmp1 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
    147 ; CHECK: smin v0.4h, v0.4h, v1.4h
    148   ret <4 x i16> %tmp1
    149 }
    150 
    151 define <4 x i16> @test_umin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
    152 ; CHECK: test_umin_v4i16:
    153   %tmp1 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
    154 ; CHECK: umin v0.4h, v0.4h, v1.4h
    155   ret <4 x i16> %tmp1
    156 }
    157 
    158 
    159 declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>)
    160 declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>)
    161 
    162 define <8 x i16> @test_smin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
    163 ; CHECK: test_smin_v8i16:
    164   %tmp1 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
    165 ; CHECK: smin v0.8h, v0.8h, v1.8h
    166   ret <8 x i16> %tmp1
    167 }
    168 
    169 define <8 x i16> @test_umin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
    170 ; CHECK: test_umin_v8i16:
    171   %tmp1 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
    172 ; CHECK: umin v0.8h, v0.8h, v1.8h
    173   ret <8 x i16> %tmp1
    174 }
    175 
    176 
    177 declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>)
    178 declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>)
    179 
    180 define <2 x i32> @test_smin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
    181 ; CHECK: test_smin_v2i32:
    182   %tmp1 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
    183 ; CHECK: smin v0.2s, v0.2s, v1.2s
    184   ret <2 x i32> %tmp1
    185 }
    186 
    187 define <2 x i32> @test_umin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
    188 ; CHECK: test_umin_v2i32:
    189   %tmp1 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
    190 ; CHECK: umin v0.2s, v0.2s, v1.2s
    191   ret <2 x i32> %tmp1
    192 }
    193 
    194 declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>)
    195 declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>)
    196 
    197 define <4 x i32> @test_smin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
    198 ; CHECK: test_smin_v4i32:
    199   %tmp1 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
    200 ; CHECK: smin v0.4s, v0.4s, v1.4s
    201   ret <4 x i32> %tmp1
    202 }
    203 
    204 define <4 x i32> @test_umin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
    205 ; CHECK: test_umin_v4i32:
    206   %tmp1 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
    207 ; CHECK: umin v0.4s, v0.4s, v1.4s
    208   ret <4 x i32> %tmp1
    209 }
    210 
    211 declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>)
    212 declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>)
    213 declare <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double>, <2 x double>)
    214 
    215 define <2 x float> @test_fmax_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
    216 ; CHECK: test_fmax_v2f32:
    217         %val = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs)
    218 ; CHECK: fmax v0.2s, v0.2s, v1.2s
    219         ret <2 x float> %val
    220 }
    221 
    222 define <4 x float> @test_fmax_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
    223 ; CHECK: test_fmax_v4f32:
    224         %val = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs)
    225 ; CHECK: fmax v0.4s, v0.4s, v1.4s
    226         ret <4 x float> %val
    227 }
    228 
    229 define <2 x double> @test_fmax_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
    230 ; CHECK: test_fmax_v2f64:
    231         %val = call <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs)
    232 ; CHECK: fmax v0.2d, v0.2d, v1.2d
    233         ret <2 x double> %val
    234 }
    235 
    236 declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>)
    237 declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>)
    238 declare <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double>, <2 x double>)
    239 
    240 define <2 x float> @test_fmin_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
    241 ; CHECK: test_fmin_v2f32:
    242         %val = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %lhs, <2 x float> %rhs)
    243 ; CHECK: fmin v0.2s, v0.2s, v1.2s
    244         ret <2 x float> %val
    245 }
    246 
    247 define <4 x float> @test_fmin_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
    248 ; CHECK: test_fmin_v4f32:
    249         %val = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %lhs, <4 x float> %rhs)
    250 ; CHECK: fmin v0.4s, v0.4s, v1.4s
    251         ret <4 x float> %val
    252 }
    253 
    254 define <2 x double> @test_fmin_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
    255 ; CHECK: test_fmin_v2f64:
    256         %val = call <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double> %lhs, <2 x double> %rhs)
    257 ; CHECK: fmin v0.2d, v0.2d, v1.2d
    258         ret <2 x double> %val
    259 }
    260 
    261 
    262 declare <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float>, <2 x float>)
    263 declare <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float>, <4 x float>)
    264 declare <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double>, <2 x double>)
    265 
    266 define <2 x float> @test_fmaxnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
    267 ; CHECK: test_fmaxnm_v2f32:
    268         %val = call <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
    269 ; CHECK: fmaxnm v0.2s, v0.2s, v1.2s
    270         ret <2 x float> %val
    271 }
    272 
    273 define <4 x float> @test_fmaxnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
    274 ; CHECK: test_fmaxnm_v4f32:
    275         %val = call <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
    276 ; CHECK: fmaxnm v0.4s, v0.4s, v1.4s
    277         ret <4 x float> %val
    278 }
    279 
    280 define <2 x double> @test_fmaxnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
    281 ; CHECK: test_fmaxnm_v2f64:
    282         %val = call <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
    283 ; CHECK: fmaxnm v0.2d, v0.2d, v1.2d
    284         ret <2 x double> %val
    285 }
    286 
    287 declare <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float>, <2 x float>)
    288 declare <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float>, <4 x float>)
    289 declare <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double>, <2 x double>)
    290 
    291 define <2 x float> @test_fminnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
    292 ; CHECK: test_fminnm_v2f32:
    293         %val = call <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
    294 ; CHECK: fminnm v0.2s, v0.2s, v1.2s
    295         ret <2 x float> %val
    296 }
    297 
    298 define <4 x float> @test_fminnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
    299 ; CHECK: test_fminnm_v4f32:
    300         %val = call <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
    301 ; CHECK: fminnm v0.4s, v0.4s, v1.4s
    302         ret <4 x float> %val
    303 }
    304 
    305 define <2 x double> @test_fminnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
    306 ; CHECK: test_fminnm_v2f64:
    307         %val = call <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
    308 ; CHECK: fminnm v0.2d, v0.2d, v1.2d
    309         ret <2 x double> %val
    310 }
    311