1 // REQUIRES: aarch64-registered-target 2 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ 3 // RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK \ 4 // RUN: --check-prefix=CHECK-ARM64 5 6 // Test new aarch64 intrinsics with poly64 7 8 #include <arm_neon.h> 9 10 uint64x1_t test_vceq_p64(poly64x1_t a, poly64x1_t b) { 11 // CHECK-LABEL: test_vceq_p64 12 return vceq_p64(a, b); 13 // CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 14 } 15 16 uint64x2_t test_vceqq_p64(poly64x2_t a, poly64x2_t b) { 17 // CHECK-LABEL: test_vceqq_p64 18 return vceqq_p64(a, b); 19 // CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 20 } 21 22 uint64x1_t test_vtst_p64(poly64x1_t a, poly64x1_t b) { 23 // CHECK-LABEL: test_vtst_p64 24 return vtst_p64(a, b); 25 // CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 26 } 27 28 uint64x2_t test_vtstq_p64(poly64x2_t a, poly64x2_t b) { 29 // CHECK-LABEL: test_vtstq_p64 30 return vtstq_p64(a, b); 31 // CHECK: cmtst {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 32 } 33 34 poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) { 35 // CHECK-LABEL: test_vbsl_p64 36 return vbsl_p64(a, b, c); 37 // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 38 } 39 40 poly64x2_t test_vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c) { 41 // CHECK-LABEL: test_vbslq_p64 42 return vbslq_p64(a, b, c); 43 // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 44 } 45 46 poly64_t test_vget_lane_p64(poly64x1_t v) { 47 // CHECK-LABEL: test_vget_lane_p64 48 return vget_lane_p64(v, 0); 49 // CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 50 } 51 52 poly64_t test_vgetq_lane_p64(poly64x2_t v) { 53 // CHECK-LABEL: test_vgetq_lane_p64 54 return vgetq_lane_p64(v, 1); 55 // CHECK: {{mov|umov}} {{x[0-9]+}}, {{v[0-9]+}}.d[1] 56 } 57 58 poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) { 59 // CHECK-LABEL: test_vset_lane_p64 60 return vset_lane_p64(a, v, 0); 61 // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 62 } 63 64 poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) { 65 // CHECK-LABEL: test_vsetq_lane_p64 66 return vsetq_lane_p64(a, v, 1); 67 // CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}} 68 } 69 70 poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) { 71 // CHECK-LABEL: test_vcopy_lane_p64 72 return vcopy_lane_p64(a, 0, b, 0); 73 74 // CHECK-ARM64: mov v0.16b, v1.16b 75 } 76 77 poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) { 78 // CHECK-LABEL: test_vcopyq_lane_p64 79 return vcopyq_lane_p64(a, 1, b, 0); 80 // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 81 } 82 83 poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) { 84 // CHECK-LABEL: test_vcopyq_laneq_p64 85 return vcopyq_laneq_p64(a, 1, b, 1); 86 } 87 88 poly64x1_t test_vcreate_p64(uint64_t a) { 89 // CHECK-LABEL: test_vcreate_p64 90 return vcreate_p64(a); 91 // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 92 } 93 94 poly64x1_t test_vdup_n_p64(poly64_t a) { 95 // CHECK-LABEL: test_vdup_n_p64 96 return vdup_n_p64(a); 97 // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 98 } 99 poly64x2_t test_vdupq_n_p64(poly64_t a) { 100 // CHECK-LABEL: test_vdupq_n_p64 101 return vdupq_n_p64(a); 102 // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} 103 } 104 105 poly64x1_t test_vmov_n_p64(poly64_t a) { 106 // CHECK-LABEL: test_vmov_n_p64 107 return vmov_n_p64(a); 108 // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 109 } 110 111 poly64x2_t test_vmovq_n_p64(poly64_t a) { 112 // CHECK-LABEL: test_vmovq_n_p64 113 return vmovq_n_p64(a); 114 // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} 115 } 116 117 poly64x1_t test_vdup_lane_p64(poly64x1_t vec) { 118 // CHECK-LABEL: test_vdup_lane_p64 119 return vdup_lane_p64(vec, 0); 120 // CHECK: ret 121 } 122 123 poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) { 124 // CHECK-LABEL: test_vdupq_lane_p64 125 return vdupq_lane_p64(vec, 0); 126 // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 127 } 128 129 poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) { 130 // CHECK-LABEL: test_vdupq_laneq_p64 131 return vdupq_laneq_p64(vec, 1); 132 // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 133 } 134 135 poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) { 136 // CHECK-LABEL: test_vcombine_p64 137 return vcombine_p64(low, high); 138 // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 139 } 140 141 poly64x1_t test_vld1_p64(poly64_t const * ptr) { 142 // CHECK-LABEL: test_vld1_p64 143 return vld1_p64(ptr); 144 // CHECK-ARM64: ldr {{d[0-9]+}}, [{{x[0-9]+|sp}}] 145 } 146 147 poly64x2_t test_vld1q_p64(poly64_t const * ptr) { 148 // CHECK-LABEL: test_vld1q_p64 149 return vld1q_p64(ptr); 150 // CHECK-ARM64: ldr {{q[0-9]+}}, [{{x[0-9]+|sp}}] 151 } 152 153 void test_vst1_p64(poly64_t * ptr, poly64x1_t val) { 154 // CHECK-LABEL: test_vst1_p64 155 return vst1_p64(ptr, val); 156 // CHECK-ARM64: str {{d[0-9]+}}, [{{x[0-9]+|sp}}] 157 } 158 159 void test_vst1q_p64(poly64_t * ptr, poly64x2_t val) { 160 // CHECK-LABEL: test_vst1q_p64 161 return vst1q_p64(ptr, val); 162 // CHECK-ARM64: str {{q[0-9]+}}, [{{x[0-9]+|sp}}] 163 } 164 165 poly64x1x2_t test_vld2_p64(poly64_t const * ptr) { 166 // CHECK-LABEL: test_vld2_p64 167 return vld2_p64(ptr); 168 // CHECK: ld1 {{{ *v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}] 169 } 170 171 poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) { 172 // CHECK-LABEL: test_vld2q_p64 173 return vld2q_p64(ptr); 174 // CHECK: ld2 {{{ *v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}] 175 } 176 177 poly64x1x3_t test_vld3_p64(poly64_t const * ptr) { 178 // CHECK-LABEL: test_vld3_p64 179 return vld3_p64(ptr); 180 // CHECK: ld1 {{{ *v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}] 181 } 182 183 poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) { 184 // CHECK-LABEL: test_vld3q_p64 185 return vld3q_p64(ptr); 186 // CHECK: ld3 {{{ *v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}] 187 } 188 189 poly64x1x4_t test_vld4_p64(poly64_t const * ptr) { 190 // CHECK-LABEL: test_vld4_p64 191 return vld4_p64(ptr); 192 // CHECK: ld1 {{{ *v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}] 193 } 194 195 poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) { 196 // CHECK-LABEL: test_vld4q_p64 197 return vld4q_p64(ptr); 198 // CHECK: ld4 {{{ *v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}] 199 } 200 201 void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) { 202 // CHECK-LABEL: test_vst2_p64 203 return vst2_p64(ptr, val); 204 // CHECK: st1 {{{ *v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}] 205 } 206 207 void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) { 208 // CHECK-LABEL: test_vst2q_p64 209 return vst2q_p64(ptr, val); 210 // CHECK: st2 {{{ *v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}] 211 } 212 213 void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) { 214 // CHECK-LABEL: test_vst3_p64 215 return vst3_p64(ptr, val); 216 // CHECK: st1 {{{ *v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}] 217 } 218 219 void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) { 220 // CHECK-LABEL: test_vst3q_p64 221 return vst3q_p64(ptr, val); 222 // CHECK: st3 {{{ *v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}] 223 } 224 225 void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) { 226 // CHECK-LABEL: test_vst4_p64 227 return vst4_p64(ptr, val); 228 // CHECK: st1 {{{ *v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}] 229 } 230 231 void test_vst4q_p64(poly64_t * ptr, poly64x2x4_t val) { 232 // CHECK-LABEL: test_vst4q_p64 233 return vst4q_p64(ptr, val); 234 // CHECK: st4 {{{ *v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}] 235 } 236 237 poly64x1_t test_vext_p64(poly64x1_t a, poly64x1_t b) { 238 // CHECK-LABEL: test_vext_p64 239 return vext_u64(a, b, 0); 240 241 } 242 243 poly64x2_t test_vextq_p64(poly64x2_t a, poly64x2_t b) { 244 // CHECK-LABEL: test_vextq_p64 245 return vextq_p64(a, b, 1); 246 // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{#0x8|#8}} 247 } 248 249 poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) { 250 // CHECK-LABEL: test_vzip1q_p64 251 return vzip1q_p64(a, b); 252 // CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 253 } 254 255 poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) { 256 // CHECK-LABEL: test_vzip2q_p64 257 return vzip2q_u64(a, b); 258 // CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 259 } 260 261 poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) { 262 // CHECK-LABEL: test_vuzp1q_p64 263 return vuzp1q_p64(a, b); 264 // CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 265 } 266 267 poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) { 268 // CHECK-LABEL: test_vuzp2q_p64 269 return vuzp2q_u64(a, b); 270 // CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 271 } 272 273 poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) { 274 // CHECK-LABEL: test_vtrn1q_p64 275 return vtrn1q_p64(a, b); 276 // CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 277 } 278 279 poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) { 280 // CHECK-LABEL: test_vtrn2q_p64 281 return vtrn2q_u64(a, b); 282 // CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 283 } 284 285 poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) { 286 // CHECK-LABEL: test_vsri_n_p64 287 return vsri_n_p64(a, b, 33); 288 // CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #33 289 } 290 291 poly64x2_t test_vsriq_n_p64(poly64x2_t a, poly64x2_t b) { 292 // CHECK-LABEL: test_vsriq_n_p64 293 return vsriq_n_p64(a, b, 64); 294 // CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #64 295 } 296 297