Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
      2 
      3 define i32 @test_rev_w(i32 %a) nounwind {
      4 entry:
      5 ; CHECK-LABEL: test_rev_w:
      6 ; CHECK: rev w0, w0
      7   %0 = tail call i32 @llvm.bswap.i32(i32 %a)
      8   ret i32 %0
      9 }
     10 
     11 define i64 @test_rev_x(i64 %a) nounwind {
     12 entry:
     13 ; CHECK-LABEL: test_rev_x:
     14 ; CHECK: rev x0, x0
     15   %0 = tail call i64 @llvm.bswap.i64(i64 %a)
     16   ret i64 %0
     17 }
     18 
     19 declare i32 @llvm.bswap.i32(i32) nounwind readnone
     20 declare i64 @llvm.bswap.i64(i64) nounwind readnone
     21 
     22 define i32 @test_rev16_w(i32 %X) nounwind {
     23 entry:
     24 ; CHECK-LABEL: test_rev16_w:
     25 ; CHECK: rev16 w0, w0
     26   %tmp1 = lshr i32 %X, 8
     27   %X15 = bitcast i32 %X to i32
     28   %tmp4 = shl i32 %X15, 8
     29   %tmp2 = and i32 %tmp1, 16711680
     30   %tmp5 = and i32 %tmp4, -16777216
     31   %tmp9 = and i32 %tmp1, 255
     32   %tmp13 = and i32 %tmp4, 65280
     33   %tmp6 = or i32 %tmp5, %tmp2
     34   %tmp10 = or i32 %tmp6, %tmp13
     35   %tmp14 = or i32 %tmp10, %tmp9
     36   ret i32 %tmp14
     37 }
     38 
     39 ; 64-bit REV16 is *not* a swap then a 16-bit rotation:
     40 ;   01234567 ->(bswap) 76543210 ->(rotr) 10765432
     41 ;   01234567 ->(rev16) 10325476
     42 define i64 @test_rev16_x(i64 %a) nounwind {
     43 entry:
     44 ; CHECK-LABEL: test_rev16_x:
     45 ; CHECK-NOT: rev16 x0, x0
     46   %0 = tail call i64 @llvm.bswap.i64(i64 %a)
     47   %1 = lshr i64 %0, 16
     48   %2 = shl i64 %0, 48
     49   %3 = or i64 %1, %2
     50   ret i64 %3
     51 }
     52 
     53 define i64 @test_rev32_x(i64 %a) nounwind {
     54 entry:
     55 ; CHECK-LABEL: test_rev32_x:
     56 ; CHECK: rev32 x0, x0
     57   %0 = tail call i64 @llvm.bswap.i64(i64 %a)
     58   %1 = lshr i64 %0, 32
     59   %2 = shl i64 %0, 32
     60   %3 = or i64 %1, %2
     61   ret i64 %3
     62 }
     63 
     64 define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
     65 ;CHECK-LABEL: test_vrev64D8:
     66 ;CHECK: rev64.8b
     67 	%tmp1 = load <8 x i8>, <8 x i8>* %A
     68 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
     69 	ret <8 x i8> %tmp2
     70 }
     71 
     72 define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
     73 ;CHECK-LABEL: test_vrev64D16:
     74 ;CHECK: rev64.4h
     75 	%tmp1 = load <4 x i16>, <4 x i16>* %A
     76 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
     77 	ret <4 x i16> %tmp2
     78 }
     79 
     80 define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
     81 ;CHECK-LABEL: test_vrev64D32:
     82 ;CHECK: rev64.2s
     83 	%tmp1 = load <2 x i32>, <2 x i32>* %A
     84 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
     85 	ret <2 x i32> %tmp2
     86 }
     87 
     88 define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
     89 ;CHECK-LABEL: test_vrev64Df:
     90 ;CHECK: rev64.2s
     91 	%tmp1 = load <2 x float>, <2 x float>* %A
     92 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
     93 	ret <2 x float> %tmp2
     94 }
     95 
     96 define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
     97 ;CHECK-LABEL: test_vrev64Q8:
     98 ;CHECK: rev64.16b
     99 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    100 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
    101 	ret <16 x i8> %tmp2
    102 }
    103 
    104 define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
    105 ;CHECK-LABEL: test_vrev64Q16:
    106 ;CHECK: rev64.8h
    107 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    108 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    109 	ret <8 x i16> %tmp2
    110 }
    111 
    112 define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
    113 ;CHECK-LABEL: test_vrev64Q32:
    114 ;CHECK: rev64.4s
    115 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    116 	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
    117 	ret <4 x i32> %tmp2
    118 }
    119 
    120 define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
    121 ;CHECK-LABEL: test_vrev64Qf:
    122 ;CHECK: rev64.4s
    123 	%tmp1 = load <4 x float>, <4 x float>* %A
    124 	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
    125 	ret <4 x float> %tmp2
    126 }
    127 
    128 define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
    129 ;CHECK-LABEL: test_vrev32D8:
    130 ;CHECK: rev32.8b
    131 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    132 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    133 	ret <8 x i8> %tmp2
    134 }
    135 
    136 define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
    137 ;CHECK-LABEL: test_vrev32D16:
    138 ;CHECK: rev32.4h
    139 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    140 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
    141 	ret <4 x i16> %tmp2
    142 }
    143 
    144 define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
    145 ;CHECK-LABEL: test_vrev32Q8:
    146 ;CHECK: rev32.16b
    147 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    148 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
    149 	ret <16 x i8> %tmp2
    150 }
    151 
    152 define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
    153 ;CHECK-LABEL: test_vrev32Q16:
    154 ;CHECK: rev32.8h
    155 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    156 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
    157 	ret <8 x i16> %tmp2
    158 }
    159 
    160 define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
    161 ;CHECK-LABEL: test_vrev16D8:
    162 ;CHECK: rev16.8b
    163 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    164 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
    165 	ret <8 x i8> %tmp2
    166 }
    167 
    168 define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
    169 ;CHECK-LABEL: test_vrev16Q8:
    170 ;CHECK: rev16.16b
    171 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    172 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
    173 	ret <16 x i8> %tmp2
    174 }
    175 
    176 ; Undef shuffle indices should not prevent matching to VREV:
    177 
    178 define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
    179 ;CHECK-LABEL: test_vrev64D8_undef:
    180 ;CHECK: rev64.8b
    181 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    182 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
    183 	ret <8 x i8> %tmp2
    184 }
    185 
    186 define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
    187 ;CHECK-LABEL: test_vrev32Q16_undef:
    188 ;CHECK: rev32.8h
    189 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    190 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
    191 	ret <8 x i16> %tmp2
    192 }
    193 
    194 ; vrev <4 x i16> should use REV32 and not REV64
    195 define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
    196 ; CHECK-LABEL: test_vrev64:
    197 ; CHECK: ldr [[DEST:q[0-9]+]],
    198 ; CHECK: st1.h
    199 ; CHECK: st1.h
    200 entry:
    201   %0 = bitcast <4 x i16>* %source to <8 x i16>*
    202   %tmp2 = load <8 x i16>, <8 x i16>* %0, align 4
    203   %tmp3 = extractelement <8 x i16> %tmp2, i32 6
    204   %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
    205   %tmp9 = extractelement <8 x i16> %tmp2, i32 5
    206   %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1
    207   store <2 x i16> %tmp11, <2 x i16>* %dst, align 4
    208   ret void
    209 }
    210 
    211 ; Test vrev of float4
    212 define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest) nounwind noinline ssp {
    213 ; CHECK: float_vrev64
    214 ; CHECK: ldr [[DEST:q[0-9]+]],
    215 ; CHECK: rev64.4s
    216 entry:
    217   %0 = bitcast float* %source to <4 x float>*
    218   %tmp2 = load <4 x float>, <4 x float>* %0, align 4
    219   %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
    220   %arrayidx8 = getelementptr inbounds <4 x float>, <4 x float>* %dest, i32 11
    221   store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
    222   ret void
    223 }
    224 
    225 
    226 define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
    227 ; CHECK-LABEL: test_vrev32_bswap:
    228 ; CHECK: rev32.16b
    229 ; CHECK-NOT: rev
    230 ; CHECK: ret
    231   %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
    232   ret <4 x i32> %bswap
    233 }
    234 
    235 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
    236