1 // RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -emit-llvm %s | FileCheck %s 2 // RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - %s | FileCheck -check-prefix=CHECK-CODEGEN %s 3 // REQUIRES: aarch64-registered-target 4 // Test ARM64 SIMD max/min intrinsics 5 6 #include <arm_neon.h> 7 8 // Test a represntative sample of 8 and 16, signed and unsigned, 64 and 128 bit reduction 9 int8_t test_vmaxv_s8(int8x8_t a1) { 10 // CHECK: test_vmaxv_s8 11 return vmaxv_s8(a1); 12 // CHECK @llvm.aarch64.neon.smaxv.i32.v8i8 13 } 14 15 uint16_t test_vminvq_u16(uint16x8_t a1) { 16 // CHECK: test_vminvq_u16 17 return vminvq_u16(a1); 18 // CHECK llvm.aarch64.neon.uminv.i16.v8i16 19 } 20 21 // Test a represntative sample of 8 and 16, signed and unsigned, 64 and 128 bit pairwise 22 uint8x8_t test_vmin_u8(uint8x8_t a1, uint8x8_t a2) { 23 // CHECK: test_vmin_u8 24 return vmin_u8(a1, a2); 25 // CHECK llvm.aarch64.neon.umin.v8i8 26 } 27 28 uint8x16_t test_vminq_u8(uint8x16_t a1, uint8x16_t a2) { 29 // CHECK: test_vminq_u8 30 return vminq_u8(a1, a2); 31 // CHECK llvm.aarch64.neon.umin.v16i8 32 } 33 34 int16x8_t test_vmaxq_s16(int16x8_t a1, int16x8_t a2) { 35 // CHECK: test_vmaxq_s16 36 return vmaxq_s16(a1, a2); 37 // CHECK llvm.aarch64.neon.smax.v8i16 38 } 39 40 // Test the more complicated cases of [suf]32 and f64 41 float64x2_t test_vmaxq_f64(float64x2_t a1, float64x2_t a2) { 42 // CHECK: test_vmaxq_f64 43 return vmaxq_f64(a1, a2); 44 // CHECK llvm.aarch64.neon.fmax.v2f64 45 } 46 47 float32x4_t test_vmaxq_f32(float32x4_t a1, float32x4_t a2) { 48 // CHECK: test_vmaxq_f32 49 return vmaxq_f32(a1, a2); 50 // CHECK llvm.aarch64.neon.fmax.v4f32 51 } 52 53 float64x2_t test_vminq_f64(float64x2_t a1, float64x2_t a2) { 54 // CHECK: test_vminq_f64 55 return vminq_f64(a1, a2); 56 // CHECK llvm.aarch64.neon.fmin.v2f64 57 } 58 59 float32x2_t test_vmax_f32(float32x2_t a1, float32x2_t a2) { 60 // CHECK: test_vmax_f32 61 return vmax_f32(a1, a2); 62 // CHECK llvm.aarch64.neon.fmax.v2f32 63 } 64 65 int32x2_t test_vmax_s32(int32x2_t a1, int32x2_t a2) { 66 // CHECK: test_vmax_s32 67 return vmax_s32(a1, a2); 68 // CHECK llvm.aarch64.neon.smax.v2i32 69 } 70 71 uint32x2_t test_vmin_u32(uint32x2_t a1, uint32x2_t a2) { 72 // CHECK: test_vmin_u32 73 return vmin_u32(a1, a2); 74 // CHECK llvm.aarch64.neon.umin.v2i32 75 } 76 77 float32_t test_vmaxnmv_f32(float32x2_t a1) { 78 // CHECK: test_vmaxnmv_f32 79 return vmaxnmv_f32(a1); 80 // CHECK: llvm.aarch64.neon.fmaxnmv.f32.v2f32 81 // CHECK-NEXT: ret 82 } 83 84 // this doesn't translate into a valid instruction, regardless of what the 85 // ARM doc says. 86 #if 0 87 float64_t test_vmaxnmvq_f64(float64x2_t a1) { 88 // CHECK@ test_vmaxnmvq_f64 89 return vmaxnmvq_f64(a1); 90 // CHECK@ llvm.aarch64.neon.saddlv.i64.v2i32 91 // CHECK-NEXT@ ret 92 } 93 #endif 94 95 float32_t test_vmaxnmvq_f32(float32x4_t a1) { 96 // CHECK: test_vmaxnmvq_f32 97 return vmaxnmvq_f32(a1); 98 // CHECK: llvm.aarch64.neon.fmaxnmv.f32.v4f32 99 // CHECK-NEXT: ret 100 } 101 102 float32_t test_vmaxv_f32(float32x2_t a1) { 103 // CHECK: test_vmaxv_f32 104 return vmaxv_f32(a1); 105 // CHECK: llvm.aarch64.neon.fmaxv.f32.v2f32 106 // FIXME check that the 2nd and 3rd arguments are the same V register below 107 // CHECK-CODEGEN: fmaxp.2s 108 // CHECK-NEXT: ret 109 } 110 111 int32_t test_vmaxv_s32(int32x2_t a1) { 112 // CHECK: test_vmaxv_s32 113 return vmaxv_s32(a1); 114 // CHECK: llvm.aarch64.neon.smaxv.i32.v2i32 115 // FIXME check that the 2nd and 3rd arguments are the same V register below 116 // CHECK-CODEGEN: smaxp.2s 117 // CHECK-NEXT: ret 118 } 119 120 uint32_t test_vmaxv_u32(uint32x2_t a1) { 121 // CHECK: test_vmaxv_u32 122 return vmaxv_u32(a1); 123 // CHECK: llvm.aarch64.neon.umaxv.i32.v2i32 124 // FIXME check that the 2nd and 3rd arguments are the same V register below 125 // CHECK-CODEGEN: umaxp.2s 126 // CHECK-NEXT: ret 127 } 128 129 // FIXME punt on this for now; don't forget to fix CHECKs 130 #if 0 131 float64_t test_vmaxvq_f64(float64x2_t a1) { 132 // CHECK@ test_vmaxvq_f64 133 return vmaxvq_f64(a1); 134 // CHECK@ llvm.aarch64.neon.fmaxv.i64.v2f64 135 // CHECK-NEXT@ ret 136 } 137 #endif 138 139 float32_t test_vmaxvq_f32(float32x4_t a1) { 140 // CHECK: test_vmaxvq_f32 141 return vmaxvq_f32(a1); 142 // CHECK: llvm.aarch64.neon.fmaxv.f32.v4f32 143 // CHECK-NEXT: ret 144 } 145 146 float32_t test_vminnmv_f32(float32x2_t a1) { 147 // CHECK: test_vminnmv_f32 148 return vminnmv_f32(a1); 149 // CHECK: llvm.aarch64.neon.fminnmv.f32.v2f32 150 // CHECK-NEXT: ret 151 } 152 153 float32_t test_vminvq_f32(float32x4_t a1) { 154 // CHECK: test_vminvq_f32 155 return vminvq_f32(a1); 156 // CHECK: llvm.aarch64.neon.fminv.f32.v4f32 157 // CHECK-NEXT: ret 158 } 159 160 // this doesn't translate into a valid instruction, regardless of what the ARM 161 // doc says. 162 #if 0 163 float64_t test_vminnmvq_f64(float64x2_t a1) { 164 // CHECK@ test_vminnmvq_f64 165 return vminnmvq_f64(a1); 166 // CHECK@ llvm.aarch64.neon.saddlv.i64.v2i32 167 // CHECK-NEXT@ ret 168 } 169 #endif 170 171 float32_t test_vminnmvq_f32(float32x4_t a1) { 172 // CHECK: test_vminnmvq_f32 173 return vminnmvq_f32(a1); 174 // CHECK: llvm.aarch64.neon.fminnmv.f32.v4f32 175 // CHECK-NEXT: ret 176 } 177 178 float32_t test_vminv_f32(float32x2_t a1) { 179 // CHECK: test_vminv_f32 180 return vminv_f32(a1); 181 // CHECK: llvm.aarch64.neon.fminv.f32.v2f32 182 // CHECK-NEXT: ret 183 } 184 185 int32_t test_vminv_s32(int32x2_t a1) { 186 // CHECK: test_vminv_s32 187 return vminv_s32(a1); 188 // CHECK: llvm.aarch64.neon.sminv.i32.v2i32 189 // CHECK-CODEGEN: sminp.2s 190 // CHECK-NEXT: ret 191 } 192 193 uint32_t test_vminv_u32(uint32x2_t a1) { 194 // CHECK: test_vminv_u32 195 return vminv_u32(a1); 196 // CHECK: llvm.aarch64.neon.fminv.f32.v2f32 197 } 198 199 // FIXME punt on this for now; don't forget to fix CHECKs 200 #if 0 201 float64_t test_vminvq_f64(float64x2_t a1) { 202 // CHECK@ test_vminvq_f64 203 return vminvq_f64(a1); 204 // CHECK@ llvm.aarch64.neon.saddlv.i64.v2i32 205 // CHECK-NEXT@ ret 206 } 207 #endif 208