1 ;RUN: llc < %s -march=arm -mattr=+v7 -mattr=+neon | FileCheck %s 2 3 ;ALIGN = 1 4 ;SIZE = 64 5 ;TYPE = <8 x i8> 6 define void @v64_v8i8_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 7 ;CHECK-LABEL: v64_v8i8_1: 8 entry: 9 %po = getelementptr i8* %out, i32 0 10 %pi = getelementptr i8* %in, i32 0 11 %vi = bitcast i8* %pi to <8 x i8>* 12 %vo = bitcast i8* %po to <8 x i8>* 13 ;CHECK: vld1.8 14 %v1 = load <8 x i8>* %vi, align 1 15 ;CHECK: vst1.8 16 store <8 x i8> %v1, <8 x i8>* %vo, align 1 17 ret void 18 } 19 20 21 ;ALIGN = 1 22 ;SIZE = 64 23 ;TYPE = <4 x i16> 24 define void @v64_v4i16_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 25 ;CHECK-LABEL: v64_v4i16_1: 26 entry: 27 %po = getelementptr i8* %out, i32 0 28 %pi = getelementptr i8* %in, i32 0 29 %vi = bitcast i8* %pi to <4 x i16>* 30 %vo = bitcast i8* %po to <4 x i16>* 31 ;CHECK: vld1.8 32 %v1 = load <4 x i16>* %vi, align 1 33 ;CHECK: vst1.8 34 store <4 x i16> %v1, <4 x i16>* %vo, align 1 35 ret void 36 } 37 38 39 ;ALIGN = 1 40 ;SIZE = 64 41 ;TYPE = <2 x i32> 42 define void @v64_v2i32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 43 ;CHECK-LABEL: v64_v2i32_1: 44 entry: 45 %po = getelementptr i8* %out, i32 0 46 %pi = getelementptr i8* %in, i32 0 47 %vi = bitcast i8* %pi to <2 x i32>* 48 %vo = bitcast i8* %po to <2 x i32>* 49 ;CHECK: vld1.8 50 %v1 = load <2 x i32>* %vi, align 1 51 ;CHECK: vst1.8 52 store <2 x i32> %v1, <2 x i32>* %vo, align 1 53 ret void 54 } 55 56 57 ;ALIGN = 1 58 ;SIZE = 64 59 ;TYPE = <2 x float> 60 define void @v64_v2f32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 61 ;CHECK-LABEL: v64_v2f32_1: 62 entry: 63 %po = getelementptr i8* %out, i32 0 64 %pi = getelementptr i8* %in, i32 0 65 %vi = bitcast i8* %pi to <2 x float>* 66 %vo = bitcast i8* %po to <2 x float>* 67 ;CHECK: vld1.8 68 %v1 = load <2 x float>* %vi, align 1 69 ;CHECK: vst1.8 70 store <2 x float> %v1, <2 x float>* %vo, align 1 71 ret void 72 } 73 74 75 ;ALIGN = 1 76 ;SIZE = 128 77 ;TYPE = <16 x i8> 78 define void @v128_v16i8_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 79 ;CHECK-LABEL: v128_v16i8_1: 80 entry: 81 %po = getelementptr i8* %out, i32 0 82 %pi = getelementptr i8* %in, i32 0 83 %vi = bitcast i8* %pi to <16 x i8>* 84 %vo = bitcast i8* %po to <16 x i8>* 85 ;CHECK: vld1.8 86 %v1 = load <16 x i8>* %vi, align 1 87 ;CHECK: vst1.8 88 store <16 x i8> %v1, <16 x i8>* %vo, align 1 89 ret void 90 } 91 92 93 ;ALIGN = 1 94 ;SIZE = 128 95 ;TYPE = <8 x i16> 96 define void @v128_v8i16_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 97 ;CHECK-LABEL: v128_v8i16_1: 98 entry: 99 %po = getelementptr i8* %out, i32 0 100 %pi = getelementptr i8* %in, i32 0 101 %vi = bitcast i8* %pi to <8 x i16>* 102 %vo = bitcast i8* %po to <8 x i16>* 103 ;CHECK: vld1.8 104 %v1 = load <8 x i16>* %vi, align 1 105 ;CHECK: vst1.8 106 store <8 x i16> %v1, <8 x i16>* %vo, align 1 107 ret void 108 } 109 110 111 ;ALIGN = 1 112 ;SIZE = 128 113 ;TYPE = <4 x i32> 114 define void @v128_v4i32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 115 ;CHECK-LABEL: v128_v4i32_1: 116 entry: 117 %po = getelementptr i8* %out, i32 0 118 %pi = getelementptr i8* %in, i32 0 119 %vi = bitcast i8* %pi to <4 x i32>* 120 %vo = bitcast i8* %po to <4 x i32>* 121 ;CHECK: vld1.8 122 %v1 = load <4 x i32>* %vi, align 1 123 ;CHECK: vst1.8 124 store <4 x i32> %v1, <4 x i32>* %vo, align 1 125 ret void 126 } 127 128 129 ;ALIGN = 1 130 ;SIZE = 128 131 ;TYPE = <2 x i64> 132 define void @v128_v2i64_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 133 ;CHECK-LABEL: v128_v2i64_1: 134 entry: 135 %po = getelementptr i8* %out, i32 0 136 %pi = getelementptr i8* %in, i32 0 137 %vi = bitcast i8* %pi to <2 x i64>* 138 %vo = bitcast i8* %po to <2 x i64>* 139 ;CHECK: vld1.8 140 %v1 = load <2 x i64>* %vi, align 1 141 ;CHECK: vst1.8 142 store <2 x i64> %v1, <2 x i64>* %vo, align 1 143 ret void 144 } 145 146 147 ;ALIGN = 1 148 ;SIZE = 128 149 ;TYPE = <4 x float> 150 define void @v128_v4f32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 151 ;CHECK-LABEL: v128_v4f32_1: 152 entry: 153 %po = getelementptr i8* %out, i32 0 154 %pi = getelementptr i8* %in, i32 0 155 %vi = bitcast i8* %pi to <4 x float>* 156 %vo = bitcast i8* %po to <4 x float>* 157 ;CHECK: vld1.8 158 %v1 = load <4 x float>* %vi, align 1 159 ;CHECK: vst1.8 160 store <4 x float> %v1, <4 x float>* %vo, align 1 161 ret void 162 } 163 164 165 ;ALIGN = 2 166 ;SIZE = 64 167 ;TYPE = <8 x i8> 168 define void @v64_v8i8_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 169 ;CHECK-LABEL: v64_v8i8_2: 170 entry: 171 %po = getelementptr i8* %out, i32 0 172 %pi = getelementptr i8* %in, i32 0 173 %vi = bitcast i8* %pi to <8 x i8>* 174 %vo = bitcast i8* %po to <8 x i8>* 175 ;CHECK: vld1.16 176 %v1 = load <8 x i8>* %vi, align 2 177 ;CHECK: vst1.16 178 store <8 x i8> %v1, <8 x i8>* %vo, align 2 179 ret void 180 } 181 182 183 ;ALIGN = 2 184 ;SIZE = 64 185 ;TYPE = <4 x i16> 186 define void @v64_v4i16_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 187 ;CHECK-LABEL: v64_v4i16_2: 188 entry: 189 %po = getelementptr i8* %out, i32 0 190 %pi = getelementptr i8* %in, i32 0 191 %vi = bitcast i8* %pi to <4 x i16>* 192 %vo = bitcast i8* %po to <4 x i16>* 193 ;CHECK: vld1.16 194 %v1 = load <4 x i16>* %vi, align 2 195 ;CHECK: vst1.16 196 store <4 x i16> %v1, <4 x i16>* %vo, align 2 197 ret void 198 } 199 200 201 ;ALIGN = 2 202 ;SIZE = 64 203 ;TYPE = <2 x i32> 204 define void @v64_v2i32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 205 ;CHECK-LABEL: v64_v2i32_2: 206 entry: 207 %po = getelementptr i8* %out, i32 0 208 %pi = getelementptr i8* %in, i32 0 209 %vi = bitcast i8* %pi to <2 x i32>* 210 %vo = bitcast i8* %po to <2 x i32>* 211 ;CHECK: vld1.16 212 %v1 = load <2 x i32>* %vi, align 2 213 ;CHECK: vst1.16 214 store <2 x i32> %v1, <2 x i32>* %vo, align 2 215 ret void 216 } 217 218 219 ;ALIGN = 2 220 ;SIZE = 64 221 ;TYPE = <2 x float> 222 define void @v64_v2f32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 223 ;CHECK-LABEL: v64_v2f32_2: 224 entry: 225 %po = getelementptr i8* %out, i32 0 226 %pi = getelementptr i8* %in, i32 0 227 %vi = bitcast i8* %pi to <2 x float>* 228 %vo = bitcast i8* %po to <2 x float>* 229 ;CHECK: vld1.16 230 %v1 = load <2 x float>* %vi, align 2 231 ;CHECK: vst1.16 232 store <2 x float> %v1, <2 x float>* %vo, align 2 233 ret void 234 } 235 236 237 ;ALIGN = 2 238 ;SIZE = 128 239 ;TYPE = <16 x i8> 240 define void @v128_v16i8_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 241 ;CHECK-LABEL: v128_v16i8_2: 242 entry: 243 %po = getelementptr i8* %out, i32 0 244 %pi = getelementptr i8* %in, i32 0 245 %vi = bitcast i8* %pi to <16 x i8>* 246 %vo = bitcast i8* %po to <16 x i8>* 247 ;CHECK: vld1.16 248 %v1 = load <16 x i8>* %vi, align 2 249 ;CHECK: vst1.16 250 store <16 x i8> %v1, <16 x i8>* %vo, align 2 251 ret void 252 } 253 254 255 ;ALIGN = 2 256 ;SIZE = 128 257 ;TYPE = <8 x i16> 258 define void @v128_v8i16_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 259 ;CHECK-LABEL: v128_v8i16_2: 260 entry: 261 %po = getelementptr i8* %out, i32 0 262 %pi = getelementptr i8* %in, i32 0 263 %vi = bitcast i8* %pi to <8 x i16>* 264 %vo = bitcast i8* %po to <8 x i16>* 265 ;CHECK: vld1.16 266 %v1 = load <8 x i16>* %vi, align 2 267 ;CHECK: vst1.16 268 store <8 x i16> %v1, <8 x i16>* %vo, align 2 269 ret void 270 } 271 272 273 ;ALIGN = 2 274 ;SIZE = 128 275 ;TYPE = <4 x i32> 276 define void @v128_v4i32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 277 ;CHECK-LABEL: v128_v4i32_2: 278 entry: 279 %po = getelementptr i8* %out, i32 0 280 %pi = getelementptr i8* %in, i32 0 281 %vi = bitcast i8* %pi to <4 x i32>* 282 %vo = bitcast i8* %po to <4 x i32>* 283 ;CHECK: vld1.16 284 %v1 = load <4 x i32>* %vi, align 2 285 ;CHECK: vst1.16 286 store <4 x i32> %v1, <4 x i32>* %vo, align 2 287 ret void 288 } 289 290 291 ;ALIGN = 2 292 ;SIZE = 128 293 ;TYPE = <2 x i64> 294 define void @v128_v2i64_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 295 ;CHECK-LABEL: v128_v2i64_2: 296 entry: 297 %po = getelementptr i8* %out, i32 0 298 %pi = getelementptr i8* %in, i32 0 299 %vi = bitcast i8* %pi to <2 x i64>* 300 %vo = bitcast i8* %po to <2 x i64>* 301 ;CHECK: vld1.16 302 %v1 = load <2 x i64>* %vi, align 2 303 ;CHECK: vst1.16 304 store <2 x i64> %v1, <2 x i64>* %vo, align 2 305 ret void 306 } 307 308 309 ;ALIGN = 2 310 ;SIZE = 128 311 ;TYPE = <4 x float> 312 define void @v128_v4f32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 313 ;CHECK-LABEL: v128_v4f32_2: 314 entry: 315 %po = getelementptr i8* %out, i32 0 316 %pi = getelementptr i8* %in, i32 0 317 %vi = bitcast i8* %pi to <4 x float>* 318 %vo = bitcast i8* %po to <4 x float>* 319 ;CHECK: vld1.16 320 %v1 = load <4 x float>* %vi, align 2 321 ;CHECK: vst1.16 322 store <4 x float> %v1, <4 x float>* %vo, align 2 323 ret void 324 } 325 326 327 ;ALIGN = 4 328 ;SIZE = 64 329 ;TYPE = <8 x i8> 330 define void @v64_v8i8_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 331 ;CHECK-LABEL: v64_v8i8_4: 332 entry: 333 %po = getelementptr i8* %out, i32 0 334 %pi = getelementptr i8* %in, i32 0 335 %vi = bitcast i8* %pi to <8 x i8>* 336 %vo = bitcast i8* %po to <8 x i8>* 337 ;CHECK: vldr 338 %v1 = load <8 x i8>* %vi, align 4 339 ;CHECK: vstr 340 store <8 x i8> %v1, <8 x i8>* %vo, align 4 341 ret void 342 } 343 344 345 ;ALIGN = 4 346 ;SIZE = 64 347 ;TYPE = <4 x i16> 348 define void @v64_v4i16_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 349 ;CHECK-LABEL: v64_v4i16_4: 350 entry: 351 %po = getelementptr i8* %out, i32 0 352 %pi = getelementptr i8* %in, i32 0 353 %vi = bitcast i8* %pi to <4 x i16>* 354 %vo = bitcast i8* %po to <4 x i16>* 355 ;CHECK: vldr 356 %v1 = load <4 x i16>* %vi, align 4 357 ;CHECK: vstr 358 store <4 x i16> %v1, <4 x i16>* %vo, align 4 359 ret void 360 } 361 362 363 ;ALIGN = 4 364 ;SIZE = 64 365 ;TYPE = <2 x i32> 366 define void @v64_v2i32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 367 ;CHECK-LABEL: v64_v2i32_4: 368 entry: 369 %po = getelementptr i8* %out, i32 0 370 %pi = getelementptr i8* %in, i32 0 371 %vi = bitcast i8* %pi to <2 x i32>* 372 %vo = bitcast i8* %po to <2 x i32>* 373 ;CHECK: vldr 374 %v1 = load <2 x i32>* %vi, align 4 375 ;CHECK: vstr 376 store <2 x i32> %v1, <2 x i32>* %vo, align 4 377 ret void 378 } 379 380 381 ;ALIGN = 4 382 ;SIZE = 64 383 ;TYPE = <2 x float> 384 define void @v64_v2f32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 385 ;CHECK-LABEL: v64_v2f32_4: 386 entry: 387 %po = getelementptr i8* %out, i32 0 388 %pi = getelementptr i8* %in, i32 0 389 %vi = bitcast i8* %pi to <2 x float>* 390 %vo = bitcast i8* %po to <2 x float>* 391 ;CHECK: vldr 392 %v1 = load <2 x float>* %vi, align 4 393 ;CHECK: vstr 394 store <2 x float> %v1, <2 x float>* %vo, align 4 395 ret void 396 } 397 398 399 ;ALIGN = 4 400 ;SIZE = 128 401 ;TYPE = <16 x i8> 402 define void @v128_v16i8_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 403 ;CHECK-LABEL: v128_v16i8_4: 404 entry: 405 %po = getelementptr i8* %out, i32 0 406 %pi = getelementptr i8* %in, i32 0 407 %vi = bitcast i8* %pi to <16 x i8>* 408 %vo = bitcast i8* %po to <16 x i8>* 409 ;CHECK: vld1.32 410 %v1 = load <16 x i8>* %vi, align 4 411 ;CHECK: vst1.32 412 store <16 x i8> %v1, <16 x i8>* %vo, align 4 413 ret void 414 } 415 416 417 ;ALIGN = 4 418 ;SIZE = 128 419 ;TYPE = <8 x i16> 420 define void @v128_v8i16_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 421 ;CHECK-LABEL: v128_v8i16_4: 422 entry: 423 %po = getelementptr i8* %out, i32 0 424 %pi = getelementptr i8* %in, i32 0 425 %vi = bitcast i8* %pi to <8 x i16>* 426 %vo = bitcast i8* %po to <8 x i16>* 427 ;CHECK: vld1.32 428 %v1 = load <8 x i16>* %vi, align 4 429 ;CHECK: vst1.32 430 store <8 x i16> %v1, <8 x i16>* %vo, align 4 431 ret void 432 } 433 434 435 ;ALIGN = 4 436 ;SIZE = 128 437 ;TYPE = <4 x i32> 438 define void @v128_v4i32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 439 ;CHECK-LABEL: v128_v4i32_4: 440 entry: 441 %po = getelementptr i8* %out, i32 0 442 %pi = getelementptr i8* %in, i32 0 443 %vi = bitcast i8* %pi to <4 x i32>* 444 %vo = bitcast i8* %po to <4 x i32>* 445 ;CHECK: vld1.32 446 %v1 = load <4 x i32>* %vi, align 4 447 ;CHECK: vst1.32 448 store <4 x i32> %v1, <4 x i32>* %vo, align 4 449 ret void 450 } 451 452 453 ;ALIGN = 4 454 ;SIZE = 128 455 ;TYPE = <2 x i64> 456 define void @v128_v2i64_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 457 ;CHECK-LABEL: v128_v2i64_4: 458 entry: 459 %po = getelementptr i8* %out, i32 0 460 %pi = getelementptr i8* %in, i32 0 461 %vi = bitcast i8* %pi to <2 x i64>* 462 %vo = bitcast i8* %po to <2 x i64>* 463 ;CHECK: vld1.32 464 %v1 = load <2 x i64>* %vi, align 4 465 ;CHECK: vst1.32 466 store <2 x i64> %v1, <2 x i64>* %vo, align 4 467 ret void 468 } 469 470 471 ;ALIGN = 4 472 ;SIZE = 128 473 ;TYPE = <4 x float> 474 define void @v128_v4f32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { 475 ;CHECK-LABEL: v128_v4f32_4: 476 entry: 477 %po = getelementptr i8* %out, i32 0 478 %pi = getelementptr i8* %in, i32 0 479 %vi = bitcast i8* %pi to <4 x float>* 480 %vo = bitcast i8* %po to <4 x float>* 481 ;CHECK: vld1.32 482 %v1 = load <4 x float>* %vi, align 4 483 ;CHECK: vst1.32 484 store <4 x float> %v1, <4 x float>* %vo, align 4 485 ret void 486 } 487 488