1 // REQUIRES: aarch64-registered-target 2 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -O3 -o - %s | FileCheck %s 3 4 // Test new aarch64 intrinsics and types 5 6 #include <arm_neon.h> 7 8 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) { 9 // CHECK-LABEL: test_vmla_n_f32 10 return vmla_n_f32(a, b, c); 11 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 12 // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 13 // CHECK-FMA: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 14 // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 15 } 16 17 float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { 18 // CHECK-LABEL: test_vmlaq_n_f32 19 return vmlaq_n_f32(a, b, c); 20 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 21 // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 22 // CHECK-FMA: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 23 // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 24 } 25 26 float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { 27 // CHECK-LABEL: test_vmlaq_n_f64 28 return vmlaq_n_f64(a, b, c); 29 // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 30 // CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 31 // CHECK-FMA: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 32 // CHECK-FMA: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 33 } 34 35 float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { 36 // CHECK-LABEL: test_vmlsq_n_f32 37 return vmlsq_n_f32(a, b, c); 38 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 39 // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 40 // CHECK-FMA: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 41 // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 42 } 43 44 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) { 45 // CHECK-LABEL: test_vmls_n_f32 46 return vmls_n_f32(a, b, c); 47 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 48 // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 49 // CHECK-FMA: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 50 // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 51 } 52 53 float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { 54 // CHECK-LABEL: test_vmlsq_n_f64 55 return vmlsq_n_f64(a, b, c); 56 // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 57 // CHECK: fsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 58 // CHECK-FMA: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 59 // CHECK-FMA: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 60 } 61 62 float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { 63 // CHECK-LABEL: test_vmla_lane_f32_0 64 return vmla_lane_f32(a, b, v, 0); 65 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 66 // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 67 // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 68 } 69 70 float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { 71 // CHECK-LABEL: test_vmlaq_lane_f32_0 72 return vmlaq_lane_f32(a, b, v, 0); 73 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 74 // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 75 // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 76 } 77 78 float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { 79 // CHECK-LABEL: test_vmla_laneq_f32_0 80 return vmla_laneq_f32(a, b, v, 0); 81 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 82 // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 83 // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 84 } 85 86 float32x4_t test_vmlaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { 87 // CHECK-LABEL: test_vmlaq_laneq_f32_0 88 return vmlaq_laneq_f32(a, b, v, 0); 89 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 90 // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 91 // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 92 } 93 94 float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { 95 // CHECK-LABEL: test_vmls_lane_f32_0 96 return vmls_lane_f32(a, b, v, 0); 97 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 98 // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 99 // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 100 } 101 102 float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { 103 // CHECK-LABEL: test_vmlsq_lane_f32_0 104 return vmlsq_lane_f32(a, b, v, 0); 105 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 106 // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 107 // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 108 } 109 110 float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { 111 // CHECK-LABEL: test_vmls_laneq_f32_0 112 return vmls_laneq_f32(a, b, v, 0); 113 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 114 // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 115 // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 116 } 117 118 float32x4_t test_vmlsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { 119 // CHECK-LABEL: test_vmlsq_laneq_f32_0 120 return vmlsq_laneq_f32(a, b, v, 0); 121 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 122 // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 123 // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 124 } 125 126 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { 127 // CHECK-LABEL: test_vmla_lane_f32 128 return vmla_lane_f32(a, b, v, 1); 129 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 130 // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 131 // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 132 } 133 134 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { 135 // CHECK-LABEL: test_vmlaq_lane_f32 136 return vmlaq_lane_f32(a, b, v, 1); 137 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 138 // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 139 // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 140 } 141 142 float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { 143 // CHECK-LABEL: test_vmla_laneq_f32 144 return vmla_laneq_f32(a, b, v, 3); 145 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 146 // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 147 // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 148 } 149 150 float32x4_t test_vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { 151 // CHECK-LABEL: test_vmlaq_laneq_f32 152 return vmlaq_laneq_f32(a, b, v, 3); 153 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 154 // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 155 // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 156 } 157 158 float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { 159 // CHECK-LABEL: test_vmls_lane_f32 160 return vmls_lane_f32(a, b, v, 1); 161 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 162 // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 163 // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 164 } 165 166 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { 167 // CHECK-LABEL: test_vmlsq_lane_f32 168 return vmlsq_lane_f32(a, b, v, 1); 169 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 170 // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 171 // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 172 } 173 float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { 174 // CHECK-LABEL: test_vmls_laneq_f32 175 return vmls_laneq_f32(a, b, v, 3); 176 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 177 // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 178 // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 179 } 180 181 float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { 182 // CHECK-LABEL: test_vmlsq_laneq_f32 183 return vmlsq_laneq_f32(a, b, v, 3); 184 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 185 // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 186 // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 187 } 188 189 float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { 190 // CHECK-LABEL: test_vfmaq_n_f64: 191 return vfmaq_n_f64(a, b, c); 192 // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+\.2d|v[0-9]+\.d\[0\]}} 193 } 194 195 float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { 196 // CHECK-LABEL: test_vfmsq_n_f64: 197 return vfmsq_n_f64(a, b, c); 198 // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+\.2d|v[0-9]+\.d\[0\]}} 199 } 200