Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 
      3 define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
      4 ;CHECK: test_vrev64D8:
      5 ;CHECK: vrev64.8
      6 	%tmp1 = load <8 x i8>* %A
      7 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
      8 	ret <8 x i8> %tmp2
      9 }
     10 
     11 define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
     12 ;CHECK: test_vrev64D16:
     13 ;CHECK: vrev64.16
     14 	%tmp1 = load <4 x i16>* %A
     15 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
     16 	ret <4 x i16> %tmp2
     17 }
     18 
     19 define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
     20 ;CHECK: test_vrev64D32:
     21 ;CHECK: vrev64.32
     22 	%tmp1 = load <2 x i32>* %A
     23 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
     24 	ret <2 x i32> %tmp2
     25 }
     26 
     27 define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
     28 ;CHECK: test_vrev64Df:
     29 ;CHECK: vrev64.32
     30 	%tmp1 = load <2 x float>* %A
     31 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
     32 	ret <2 x float> %tmp2
     33 }
     34 
     35 define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
     36 ;CHECK: test_vrev64Q8:
     37 ;CHECK: vrev64.8
     38 	%tmp1 = load <16 x i8>* %A
     39 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
     40 	ret <16 x i8> %tmp2
     41 }
     42 
     43 define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
     44 ;CHECK: test_vrev64Q16:
     45 ;CHECK: vrev64.16
     46 	%tmp1 = load <8 x i16>* %A
     47 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
     48 	ret <8 x i16> %tmp2
     49 }
     50 
     51 define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
     52 ;CHECK: test_vrev64Q32:
     53 ;CHECK: vrev64.32
     54 	%tmp1 = load <4 x i32>* %A
     55 	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
     56 	ret <4 x i32> %tmp2
     57 }
     58 
     59 define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
     60 ;CHECK: test_vrev64Qf:
     61 ;CHECK: vrev64.32
     62 	%tmp1 = load <4 x float>* %A
     63 	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
     64 	ret <4 x float> %tmp2
     65 }
     66 
     67 define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
     68 ;CHECK: test_vrev32D8:
     69 ;CHECK: vrev32.8
     70 	%tmp1 = load <8 x i8>* %A
     71 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
     72 	ret <8 x i8> %tmp2
     73 }
     74 
     75 define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
     76 ;CHECK: test_vrev32D16:
     77 ;CHECK: vrev32.16
     78 	%tmp1 = load <4 x i16>* %A
     79 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
     80 	ret <4 x i16> %tmp2
     81 }
     82 
     83 define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
     84 ;CHECK: test_vrev32Q8:
     85 ;CHECK: vrev32.8
     86 	%tmp1 = load <16 x i8>* %A
     87 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
     88 	ret <16 x i8> %tmp2
     89 }
     90 
     91 define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
     92 ;CHECK: test_vrev32Q16:
     93 ;CHECK: vrev32.16
     94 	%tmp1 = load <8 x i16>* %A
     95 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
     96 	ret <8 x i16> %tmp2
     97 }
     98 
     99 define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
    100 ;CHECK: test_vrev16D8:
    101 ;CHECK: vrev16.8
    102 	%tmp1 = load <8 x i8>* %A
    103 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
    104 	ret <8 x i8> %tmp2
    105 }
    106 
    107 define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
    108 ;CHECK: test_vrev16Q8:
    109 ;CHECK: vrev16.8
    110 	%tmp1 = load <16 x i8>* %A
    111 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
    112 	ret <16 x i8> %tmp2
    113 }
    114 
    115 ; Undef shuffle indices should not prevent matching to VREV:
    116 
    117 define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
    118 ;CHECK: test_vrev64D8_undef:
    119 ;CHECK: vrev64.8
    120 	%tmp1 = load <8 x i8>* %A
    121 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
    122 	ret <8 x i8> %tmp2
    123 }
    124 
    125 define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
    126 ;CHECK: test_vrev32Q16_undef:
    127 ;CHECK: vrev32.16
    128 	%tmp1 = load <8 x i16>* %A
    129 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
    130 	ret <8 x i16> %tmp2
    131 }
    132 
    133 ; A vcombine feeding a VREV should not obscure things.  Radar 8597007.
    134 
    135 define void @test_with_vcombine(<4 x float>* %v) nounwind {
    136 ;CHECK: test_with_vcombine:
    137 ;CHECK-NOT: vext
    138 ;CHECK: vrev64.32
    139   %tmp1 = load <4 x float>* %v, align 16
    140   %tmp2 = bitcast <4 x float> %tmp1 to <2 x double>
    141   %tmp3 = extractelement <2 x double> %tmp2, i32 0
    142   %tmp4 = bitcast double %tmp3 to <2 x float>
    143   %tmp5 = extractelement <2 x double> %tmp2, i32 1
    144   %tmp6 = bitcast double %tmp5 to <2 x float>
    145   %tmp7 = fadd <2 x float> %tmp6, %tmp6
    146   %tmp8 = shufflevector <2 x float> %tmp4, <2 x float> %tmp7, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
    147   store <4 x float> %tmp8, <4 x float>* %v, align 16
    148   ret void
    149 }
    150 
    151 ; The type <2 x i16> is legalized to <2 x i32> and need to be trunc-stored
    152 ; to <2 x i16> when stored to memory. Currently ARM scalarizes these stores.
    153 ; See PR 11158
    154 define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
    155 ; CHECK: test_vrev64:
    156 ; CHECK: vst1.16
    157 ; CHECK: vst1.16
    158 entry:
    159   %0 = bitcast <4 x i16>* %source to <8 x i16>*
    160   %tmp2 = load <8 x i16>* %0, align 4
    161   %tmp3 = extractelement <8 x i16> %tmp2, i32 6
    162   %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
    163   %tmp9 = extractelement <8 x i16> %tmp2, i32 5
    164   %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1
    165   store <2 x i16> %tmp11, <2 x i16>* %dst, align 4
    166   ret void
    167 }
    168 
    169 ; Test vrev of float4
    170 define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest) nounwind noinline ssp {
    171 ; CHECK: float_vrev64
    172 ; CHECK: vext.32
    173 ; CHECK: vrev64.32
    174 entry:
    175   %0 = bitcast float* %source to <4 x float>*
    176   %tmp2 = load <4 x float>* %0, align 4
    177   %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
    178   %arrayidx8 = getelementptr inbounds <4 x float>* %dest, i32 11
    179   store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
    180   ret void
    181 }
    182 
    183