Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
      2 
      3 declare <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>)
      4 declare <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>)
      5 
      6 define <8 x i8> @test_smaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
      7 ; Using registers other than v0, v1 are possible, but would be odd.
      8 ; CHECK: test_smaxp_v8i8:
      9   %tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
     10 ; CHECK: smaxp v0.8b, v0.8b, v1.8b
     11   ret <8 x i8> %tmp1
     12 }
     13 
     14 define <8 x i8> @test_umaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
     15   %tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
     16 ; CHECK: umaxp v0.8b, v0.8b, v1.8b
     17   ret <8 x i8> %tmp1
     18 }
     19 
     20 declare <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8>, <16 x i8>)
     21 declare <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8>, <16 x i8>)
     22 
     23 define <16 x i8> @test_smaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
     24 ; CHECK: test_smaxp_v16i8:
     25   %tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
     26 ; CHECK: smaxp v0.16b, v0.16b, v1.16b
     27   ret <16 x i8> %tmp1
     28 }
     29 
     30 define <16 x i8> @test_umaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
     31 ; CHECK: test_umaxp_v16i8:
     32   %tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
     33 ; CHECK: umaxp v0.16b, v0.16b, v1.16b
     34   ret <16 x i8> %tmp1
     35 }
     36 
     37 declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>)
     38 declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>)
     39 
     40 define <4 x i16> @test_smaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
     41 ; CHECK: test_smaxp_v4i16:
     42   %tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
     43 ; CHECK: smaxp v0.4h, v0.4h, v1.4h
     44   ret <4 x i16> %tmp1
     45 }
     46 
     47 define <4 x i16> @test_umaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
     48 ; CHECK: test_umaxp_v4i16:
     49   %tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
     50 ; CHECK: umaxp v0.4h, v0.4h, v1.4h
     51   ret <4 x i16> %tmp1
     52 }
     53 
     54 
     55 declare <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16>, <8 x i16>)
     56 declare <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16>, <8 x i16>)
     57 
     58 define <8 x i16> @test_smaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
     59 ; CHECK: test_smaxp_v8i16:
     60   %tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
     61 ; CHECK: smaxp v0.8h, v0.8h, v1.8h
     62   ret <8 x i16> %tmp1
     63 }
     64 
     65 define <8 x i16> @test_umaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
     66 ; CHECK: test_umaxp_v8i16:
     67   %tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
     68 ; CHECK: umaxp v0.8h, v0.8h, v1.8h
     69   ret <8 x i16> %tmp1
     70 }
     71 
     72 
     73 declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>)
     74 declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>)
     75 
     76 define <2 x i32> @test_smaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
     77 ; CHECK: test_smaxp_v2i32:
     78   %tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
     79 ; CHECK: smaxp v0.2s, v0.2s, v1.2s
     80   ret <2 x i32> %tmp1
     81 }
     82 
     83 define <2 x i32> @test_umaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
     84 ; CHECK: test_umaxp_v2i32:
     85   %tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
     86 ; CHECK: umaxp v0.2s, v0.2s, v1.2s
     87   ret <2 x i32> %tmp1
     88 }
     89 
     90 declare <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32>, <4 x i32>)
     91 declare <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32>, <4 x i32>)
     92 
     93 define <4 x i32> @test_smaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
     94 ; CHECK: test_smaxp_v4i32:
     95   %tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
     96 ; CHECK: smaxp v0.4s, v0.4s, v1.4s
     97   ret <4 x i32> %tmp1
     98 }
     99 
    100 define <4 x i32> @test_umaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
    101 ; CHECK: test_umaxp_v4i32:
    102   %tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
    103 ; CHECK: umaxp v0.4s, v0.4s, v1.4s
    104   ret <4 x i32> %tmp1
    105 }
    106 
    107 declare <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>)
    108 declare <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>)
    109 
    110 define <8 x i8> @test_sminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
    111 ; Using registers other than v0, v1 are possible, but would be odd.
    112 ; CHECK: test_sminp_v8i8:
    113   %tmp1 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
    114 ; CHECK: sminp v0.8b, v0.8b, v1.8b
    115   ret <8 x i8> %tmp1
    116 }
    117 
    118 define <8 x i8> @test_uminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
    119   %tmp1 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
    120 ; CHECK: uminp v0.8b, v0.8b, v1.8b
    121   ret <8 x i8> %tmp1
    122 }
    123 
    124 declare <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8>, <16 x i8>)
    125 declare <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8>, <16 x i8>)
    126 
    127 define <16 x i8> @test_sminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
    128 ; CHECK: test_sminp_v16i8:
    129   %tmp1 = call <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
    130 ; CHECK: sminp v0.16b, v0.16b, v1.16b
    131   ret <16 x i8> %tmp1
    132 }
    133 
    134 define <16 x i8> @test_uminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
    135 ; CHECK: test_uminp_v16i8:
    136   %tmp1 = call <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
    137 ; CHECK: uminp v0.16b, v0.16b, v1.16b
    138   ret <16 x i8> %tmp1
    139 }
    140 
    141 declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>)
    142 declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>)
    143 
    144 define <4 x i16> @test_sminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
    145 ; CHECK: test_sminp_v4i16:
    146   %tmp1 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
    147 ; CHECK: sminp v0.4h, v0.4h, v1.4h
    148   ret <4 x i16> %tmp1
    149 }
    150 
    151 define <4 x i16> @test_uminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
    152 ; CHECK: test_uminp_v4i16:
    153   %tmp1 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
    154 ; CHECK: uminp v0.4h, v0.4h, v1.4h
    155   ret <4 x i16> %tmp1
    156 }
    157 
    158 
    159 declare <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16>, <8 x i16>)
    160 declare <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16>, <8 x i16>)
    161 
    162 define <8 x i16> @test_sminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
    163 ; CHECK: test_sminp_v8i16:
    164   %tmp1 = call <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
    165 ; CHECK: sminp v0.8h, v0.8h, v1.8h
    166   ret <8 x i16> %tmp1
    167 }
    168 
    169 define <8 x i16> @test_uminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
    170 ; CHECK: test_uminp_v8i16:
    171   %tmp1 = call <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
    172 ; CHECK: uminp v0.8h, v0.8h, v1.8h
    173   ret <8 x i16> %tmp1
    174 }
    175 
    176 
    177 declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>)
    178 declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>)
    179 
    180 define <2 x i32> @test_sminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
    181 ; CHECK: test_sminp_v2i32:
    182   %tmp1 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
    183 ; CHECK: sminp v0.2s, v0.2s, v1.2s
    184   ret <2 x i32> %tmp1
    185 }
    186 
    187 define <2 x i32> @test_uminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
    188 ; CHECK: test_uminp_v2i32:
    189   %tmp1 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
    190 ; CHECK: uminp v0.2s, v0.2s, v1.2s
    191   ret <2 x i32> %tmp1
    192 }
    193 
    194 declare <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32>, <4 x i32>)
    195 declare <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32>, <4 x i32>)
    196 
    197 define <4 x i32> @test_sminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
    198 ; CHECK: test_sminp_v4i32:
    199   %tmp1 = call <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
    200 ; CHECK: sminp v0.4s, v0.4s, v1.4s
    201   ret <4 x i32> %tmp1
    202 }
    203 
    204 define <4 x i32> @test_uminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
    205 ; CHECK: test_uminp_v4i32:
    206   %tmp1 = call <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
    207 ; CHECK: uminp v0.4s, v0.4s, v1.4s
    208   ret <4 x i32> %tmp1
    209 }
    210 
    211 declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>)
    212 declare <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float>, <4 x float>)
    213 declare <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double>, <2 x double>)
    214 
    215 define <2 x float> @test_fmaxp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
    216 ; CHECK: test_fmaxp_v2f32:
    217         %val = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs)
    218 ; CHECK: fmaxp v0.2s, v0.2s, v1.2s
    219         ret <2 x float> %val
    220 }
    221 
    222 define <4 x float> @test_fmaxp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
    223 ; CHECK: test_fmaxp_v4f32:
    224         %val = call <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs)
    225 ; CHECK: fmaxp v0.4s, v0.4s, v1.4s
    226         ret <4 x float> %val
    227 }
    228 
    229 define <2 x double> @test_fmaxp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
    230 ; CHECK: test_fmaxp_v2f64:
    231         %val = call <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs)
    232 ; CHECK: fmaxp v0.2d, v0.2d, v1.2d
    233         ret <2 x double> %val
    234 }
    235 
    236 declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>)
    237 declare <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float>, <4 x float>)
    238 declare <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double>, <2 x double>)
    239 
    240 define <2 x float> @test_fminp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
    241 ; CHECK: test_fminp_v2f32:
    242         %val = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %lhs, <2 x float> %rhs)
    243 ; CHECK: fminp v0.2s, v0.2s, v1.2s
    244         ret <2 x float> %val
    245 }
    246 
    247 define <4 x float> @test_fminp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
    248 ; CHECK: test_fminp_v4f32:
    249         %val = call <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float> %lhs, <4 x float> %rhs)
    250 ; CHECK: fminp v0.4s, v0.4s, v1.4s
    251         ret <4 x float> %val
    252 }
    253 
    254 define <2 x double> @test_fminp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
    255 ; CHECK: test_fminp_v2f64:
    256         %val = call <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double> %lhs, <2 x double> %rhs)
    257 ; CHECK: fminp v0.2d, v0.2d, v1.2d
    258         ret <2 x double> %val
    259 }
    260 
    261 declare <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float>, <2 x float>)
    262 declare <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float>, <4 x float>)
    263 declare <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double>, <2 x double>)
    264 
    265 define <2 x float> @test_fmaxnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
    266 ; CHECK: test_fmaxnmp_v2f32:
    267         %val = call <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
    268 ; CHECK: fmaxnmp v0.2s, v0.2s, v1.2s
    269         ret <2 x float> %val
    270 }
    271 
    272 define <4 x float> @test_fmaxnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
    273 ; CHECK: test_fmaxnmp_v4f32:
    274         %val = call <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
    275 ; CHECK: fmaxnmp v0.4s, v0.4s, v1.4s
    276         ret <4 x float> %val
    277 }
    278 
    279 define <2 x double> @test_fmaxnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
    280 ; CHECK: test_fmaxnmp_v2f64:
    281         %val = call <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
    282 ; CHECK: fmaxnmp v0.2d, v0.2d, v1.2d
    283         ret <2 x double> %val
    284 }
    285 
    286 declare <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float>, <2 x float>)
    287 declare <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float>, <4 x float>)
    288 declare <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double>, <2 x double>)
    289 
    290 define <2 x float> @test_fminnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
    291 ; CHECK: test_fminnmp_v2f32:
    292         %val = call <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
    293 ; CHECK: fminnmp v0.2s, v0.2s, v1.2s
    294         ret <2 x float> %val
    295 }
    296 
    297 define <4 x float> @test_fminnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
    298 ; CHECK: test_fminnmp_v4f32:
    299         %val = call <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
    300 ; CHECK: fminnmp v0.4s, v0.4s, v1.4s
    301         ret <4 x float> %val
    302 }
    303 
    304 define <2 x double> @test_fminnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
    305 ; CHECK: test_fminnmp_v2f64:
    306         %val = call <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
    307 ; CHECK: fminnmp v0.2d, v0.2d, v1.2d
    308         ret <2 x double> %val
    309 }
    310 
    311