Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -emit-llvm %s | FileCheck %s
      2 // RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - %s | FileCheck -check-prefix=CHECK-CODEGEN %s
      3 // REQUIRES: aarch64-registered-target
      4 // Test ARM64 SIMD max/min intrinsics
      5 
      6 #include <arm_neon.h>
      7 
      8 // Test a represntative sample of 8 and 16, signed and unsigned, 64 and 128 bit reduction
      9 int8_t test_vmaxv_s8(int8x8_t a1) {
     10   // CHECK: test_vmaxv_s8
     11   return vmaxv_s8(a1);
     12   // CHECK @llvm.aarch64.neon.smaxv.i32.v8i8
     13 }
     14 
     15 uint16_t test_vminvq_u16(uint16x8_t a1) {
     16   // CHECK: test_vminvq_u16
     17   return vminvq_u16(a1);
     18   // CHECK llvm.aarch64.neon.uminv.i16.v8i16
     19 }
     20 
     21 // Test a represntative sample of 8 and 16, signed and unsigned, 64 and 128 bit pairwise
     22 uint8x8_t test_vmin_u8(uint8x8_t a1, uint8x8_t a2) {
     23   // CHECK: test_vmin_u8
     24   return vmin_u8(a1, a2);
     25   // CHECK llvm.aarch64.neon.umin.v8i8
     26 }
     27 
     28 uint8x16_t test_vminq_u8(uint8x16_t a1, uint8x16_t a2) {
     29   // CHECK: test_vminq_u8
     30   return vminq_u8(a1, a2);
     31   // CHECK llvm.aarch64.neon.umin.v16i8
     32 }
     33 
     34 int16x8_t test_vmaxq_s16(int16x8_t a1, int16x8_t a2) {
     35   // CHECK: test_vmaxq_s16
     36   return vmaxq_s16(a1, a2);
     37   // CHECK llvm.aarch64.neon.smax.v8i16
     38 }
     39 
     40 // Test the more complicated cases of [suf]32 and f64
     41 float64x2_t test_vmaxq_f64(float64x2_t a1, float64x2_t a2) {
     42   // CHECK: test_vmaxq_f64
     43   return vmaxq_f64(a1, a2);
     44   // CHECK llvm.aarch64.neon.fmax.v2f64
     45 }
     46 
     47 float32x4_t test_vmaxq_f32(float32x4_t a1, float32x4_t a2) {
     48   // CHECK: test_vmaxq_f32
     49   return vmaxq_f32(a1, a2);
     50   // CHECK llvm.aarch64.neon.fmax.v4f32
     51 }
     52 
     53 float64x2_t test_vminq_f64(float64x2_t a1, float64x2_t a2) {
     54   // CHECK: test_vminq_f64
     55   return vminq_f64(a1, a2);
     56   // CHECK llvm.aarch64.neon.fmin.v2f64
     57 }
     58 
     59 float32x2_t test_vmax_f32(float32x2_t a1, float32x2_t a2) {
     60   // CHECK: test_vmax_f32
     61   return vmax_f32(a1, a2);
     62   // CHECK llvm.aarch64.neon.fmax.v2f32
     63 }
     64 
     65 int32x2_t test_vmax_s32(int32x2_t a1, int32x2_t a2) {
     66   // CHECK: test_vmax_s32
     67   return vmax_s32(a1, a2);
     68   // CHECK llvm.aarch64.neon.smax.v2i32
     69 }
     70 
     71 uint32x2_t test_vmin_u32(uint32x2_t a1, uint32x2_t a2) {
     72   // CHECK: test_vmin_u32
     73   return vmin_u32(a1, a2);
     74   // CHECK llvm.aarch64.neon.umin.v2i32
     75 }
     76 
     77 float32_t test_vmaxnmv_f32(float32x2_t a1) {
     78   // CHECK: test_vmaxnmv_f32
     79   return vmaxnmv_f32(a1);
     80   // CHECK: llvm.aarch64.neon.fmaxnmv.f32.v2f32
     81   // CHECK-NEXT: ret
     82 }
     83 
     84 // this doesn't translate into a valid instruction, regardless of what the
     85 // ARM doc says.
     86 #if 0
     87 float64_t test_vmaxnmvq_f64(float64x2_t a1) {
     88   // CHECK@ test_vmaxnmvq_f64
     89   return vmaxnmvq_f64(a1);
     90   // CHECK@ llvm.aarch64.neon.saddlv.i64.v2i32
     91   // CHECK-NEXT@ ret
     92 }
     93 #endif
     94 
     95 float32_t test_vmaxnmvq_f32(float32x4_t a1) {
     96   // CHECK: test_vmaxnmvq_f32
     97   return vmaxnmvq_f32(a1);
     98   // CHECK: llvm.aarch64.neon.fmaxnmv.f32.v4f32
     99   // CHECK-NEXT: ret
    100 }
    101 
    102 float32_t test_vmaxv_f32(float32x2_t a1) {
    103   // CHECK: test_vmaxv_f32
    104   return vmaxv_f32(a1);
    105   // CHECK: llvm.aarch64.neon.fmaxv.f32.v2f32
    106   // FIXME check that the 2nd and 3rd arguments are the same V register below
    107   // CHECK-CODEGEN: fmaxp.2s
    108   // CHECK-NEXT: ret
    109 }
    110 
    111 int32_t test_vmaxv_s32(int32x2_t a1) {
    112   // CHECK: test_vmaxv_s32
    113   return vmaxv_s32(a1);
    114   // CHECK: llvm.aarch64.neon.smaxv.i32.v2i32
    115   // FIXME check that the 2nd and 3rd arguments are the same V register below
    116   // CHECK-CODEGEN: smaxp.2s
    117   // CHECK-NEXT: ret
    118 }
    119 
    120 uint32_t test_vmaxv_u32(uint32x2_t a1) {
    121   // CHECK: test_vmaxv_u32
    122   return vmaxv_u32(a1);
    123   // CHECK: llvm.aarch64.neon.umaxv.i32.v2i32
    124   // FIXME check that the 2nd and 3rd arguments are the same V register below
    125   // CHECK-CODEGEN: umaxp.2s
    126   // CHECK-NEXT: ret
    127 }
    128 
    129 // FIXME punt on this for now; don't forget to fix CHECKs
    130 #if 0
    131 float64_t test_vmaxvq_f64(float64x2_t a1) {
    132   // CHECK@ test_vmaxvq_f64
    133   return vmaxvq_f64(a1);
    134   // CHECK@ llvm.aarch64.neon.fmaxv.i64.v2f64
    135   // CHECK-NEXT@ ret
    136 }
    137 #endif
    138 
    139 float32_t test_vmaxvq_f32(float32x4_t a1) {
    140   // CHECK: test_vmaxvq_f32
    141   return vmaxvq_f32(a1);
    142   // CHECK: llvm.aarch64.neon.fmaxv.f32.v4f32
    143   // CHECK-NEXT: ret
    144 }
    145 
    146 float32_t test_vminnmv_f32(float32x2_t a1) {
    147   // CHECK: test_vminnmv_f32
    148   return vminnmv_f32(a1);
    149   // CHECK: llvm.aarch64.neon.fminnmv.f32.v2f32
    150   // CHECK-NEXT: ret
    151 }
    152 
    153 float32_t test_vminvq_f32(float32x4_t a1) {
    154   // CHECK: test_vminvq_f32
    155   return vminvq_f32(a1);
    156   // CHECK: llvm.aarch64.neon.fminv.f32.v4f32
    157   // CHECK-NEXT: ret
    158 }
    159 
    160 // this doesn't translate into a valid instruction, regardless of what the ARM
    161 // doc says.
    162 #if 0
    163 float64_t test_vminnmvq_f64(float64x2_t a1) {
    164   // CHECK@ test_vminnmvq_f64
    165   return vminnmvq_f64(a1);
    166   // CHECK@ llvm.aarch64.neon.saddlv.i64.v2i32
    167   // CHECK-NEXT@ ret
    168 }
    169 #endif
    170 
    171 float32_t test_vminnmvq_f32(float32x4_t a1) {
    172   // CHECK: test_vminnmvq_f32
    173   return vminnmvq_f32(a1);
    174   // CHECK: llvm.aarch64.neon.fminnmv.f32.v4f32
    175   // CHECK-NEXT: ret
    176 }
    177 
    178 float32_t test_vminv_f32(float32x2_t a1) {
    179   // CHECK: test_vminv_f32
    180   return vminv_f32(a1);
    181   // CHECK: llvm.aarch64.neon.fminv.f32.v2f32
    182   // CHECK-NEXT: ret
    183 }
    184 
    185 int32_t test_vminv_s32(int32x2_t a1) {
    186   // CHECK: test_vminv_s32
    187   return vminv_s32(a1);
    188   // CHECK: llvm.aarch64.neon.sminv.i32.v2i32
    189   // CHECK-CODEGEN: sminp.2s
    190   // CHECK-NEXT: ret
    191 }
    192 
    193 uint32_t test_vminv_u32(uint32x2_t a1) {
    194   // CHECK: test_vminv_u32
    195   return vminv_u32(a1);
    196   // CHECK: llvm.aarch64.neon.fminv.f32.v2f32
    197 }
    198 
    199 // FIXME punt on this for now; don't forget to fix CHECKs
    200 #if 0
    201 float64_t test_vminvq_f64(float64x2_t a1) {
    202   // CHECK@ test_vminvq_f64
    203   return vminvq_f64(a1);
    204   // CHECK@ llvm.aarch64.neon.saddlv.i64.v2i32
    205   // CHECK-NEXT@ ret
    206 }
    207 #endif
    208