1 ; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s 2 3 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 4 5 ; Test the (concat_vectors (bitcast (scalar)), ..) pattern. 6 7 define <8 x i8> @test_concat_scalar_v2i8_to_v8i8_dup(i32 %x) #0 { 8 entry: 9 ; CHECK-LABEL: test_concat_scalar_v2i8_to_v8i8_dup: 10 ; CHECK-NEXT: dup.4h v0, w0 11 ; CHECK-NEXT: ret 12 %t = trunc i32 %x to i16 13 %0 = bitcast i16 %t to <2 x i8> 14 %1 = shufflevector <2 x i8> %0, <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 15 ret <8 x i8> %1 16 } 17 18 define <8 x i8> @test_concat_scalar_v4i8_to_v8i8_dup(i32 %x) #0 { 19 entry: 20 ; CHECK-LABEL: test_concat_scalar_v4i8_to_v8i8_dup: 21 ; CHECK-NEXT: dup.2s v0, w0 22 ; CHECK-NEXT: ret 23 %0 = bitcast i32 %x to <4 x i8> 24 %1 = shufflevector <4 x i8> %0, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 25 ret <8 x i8> %1 26 } 27 28 define <8 x i16> @test_concat_scalar_v2i16_to_v8i16_dup(i32 %x) #0 { 29 entry: 30 ; CHECK-LABEL: test_concat_scalar_v2i16_to_v8i16_dup: 31 ; CHECK-NEXT: dup.4s v0, w0 32 ; CHECK-NEXT: ret 33 %0 = bitcast i32 %x to <2 x i16> 34 %1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1> 35 ret <8 x i16> %1 36 } 37 38 define <8 x i8> @test_concat_scalars_2x_v2i8_to_v8i8(i32 %x, i32 %y) #0 { 39 entry: 40 ; CHECK-LABEL: test_concat_scalars_2x_v2i8_to_v8i8: 41 ; CHECK-NEXT: ins.h v0[0], w0 42 ; CHECK-NEXT: ins.h v0[1], w1 43 ; CHECK-NEXT: ins.h v0[3], w1 44 ; CHECK-NEXT: ret 45 %tx = trunc i32 %x to i16 46 %ty = trunc i32 %y to i16 47 %bx = bitcast i16 %tx to <2 x i8> 48 %by = bitcast i16 %ty to <2 x i8> 49 %r = shufflevector <2 x i8> %bx, <2 x i8> %by, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 2, i32 3> 50 ret <8 x i8> %r 51 } 52 53 define <8 x i8> @test_concat_scalars_2x_v4i8_to_v8i8_dup(i32 %x, i32 %y) #0 { 54 entry: 55 ; CHECK-LABEL: test_concat_scalars_2x_v4i8_to_v8i8_dup: 56 ; CHECK-NEXT: fmov s0, w1 57 ; CHECK-NEXT: ins.s v0[1], w0 58 ; CHECK-NEXT: ret 59 %bx = bitcast i32 %x to <4 x i8> 60 %by = bitcast i32 %y to <4 x i8> 61 %r = shufflevector <4 x i8> %bx, <4 x i8> %by, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 62 ret <8 x i8> %r 63 } 64 65 define <8 x i16> @test_concat_scalars_2x_v2i16_to_v8i16_dup(i32 %x, i32 %y) #0 { 66 entry: 67 ; CHECK-LABEL: test_concat_scalars_2x_v2i16_to_v8i16_dup: 68 ; CHECK-NEXT: fmov s0, w0 69 ; CHECK-NEXT: ins.s v0[1], w1 70 ; CHECK-NEXT: ins.s v0[2], w1 71 ; CHECK-NEXT: ins.s v0[3], w0 72 ; CHECK-NEXT: ret 73 %bx = bitcast i32 %x to <2 x i16> 74 %by = bitcast i32 %y to <2 x i16> 75 %r = shufflevector <2 x i16> %bx, <2 x i16> %by, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1> 76 ret <8 x i16> %r 77 } 78 79 ; Also make sure we minimize bitcasts. 80 81 ; This is a pretty artificial testcase: make sure we bitcast to floating-point 82 ; if any of the scalars is floating-point. 83 define <8 x i8> @test_concat_scalars_mixed_2x_v2i8_to_v8i8(float %dummy, i32 %x, half %y) #0 { 84 entry: 85 ; CHECK-LABEL: test_concat_scalars_mixed_2x_v2i8_to_v8i8: 86 ; CHECK-NEXT: fmov s[[X:[0-9]+]], w0 87 ; CHECK-NEXT: ins.h v0[0], v[[X]][0] 88 ; CHECK-NEXT: ins.h v0[1], v1[0] 89 ; CHECK-NEXT: ins.h v0[2], v[[X]][0] 90 ; CHECK-NEXT: ins.h v0[3], v1[0] 91 ; CHECK-NEXT: ret 92 %t = trunc i32 %x to i16 93 %0 = bitcast i16 %t to <2 x i8> 94 %y0 = bitcast half %y to <2 x i8> 95 %1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 96 ret <8 x i8> %1 97 } 98 99 define <2 x float> @test_concat_scalars_fp_2x_v2i8_to_v8i8(float %dummy, half %x, half %y) #0 { 100 entry: 101 ; CHECK-LABEL: test_concat_scalars_fp_2x_v2i8_to_v8i8: 102 ; CHECK-NEXT: ins.h v0[0], v1[0] 103 ; CHECK-NEXT: ins.h v0[1], v2[0] 104 ; CHECK-NEXT: ins.h v0[2], v1[0] 105 ; CHECK-NEXT: ins.h v0[3], v2[0] 106 ; CHECK-NEXT: ret 107 %0 = bitcast half %x to <2 x i8> 108 %y0 = bitcast half %y to <2 x i8> 109 %1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 110 %2 = bitcast <8 x i8> %1 to <2 x float> 111 ret <2 x float> %2 112 } 113 114 define <4 x float> @test_concat_scalar_fp_v2i16_to_v16i8_dup(float %x) #0 { 115 entry: 116 ; CHECK-LABEL: test_concat_scalar_fp_v2i16_to_v16i8_dup: 117 ; CHECK-NEXT: dup.4s v0, v0[0] 118 ; CHECK-NEXT: ret 119 %0 = bitcast float %x to <2 x i16> 120 %1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1> 121 %2 = bitcast <8 x i16> %1 to <4 x float> 122 ret <4 x float> %2 123 } 124 125 attributes #0 = { nounwind } 126