Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 
      3 define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
      4 ;CHECK: test_vrev64D8:
      5 ;CHECK: vrev64.8
      6 	%tmp1 = load <8 x i8>* %A
      7 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
      8 	ret <8 x i8> %tmp2
      9 }
     10 
     11 define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
     12 ;CHECK: test_vrev64D16:
     13 ;CHECK: vrev64.16
     14 	%tmp1 = load <4 x i16>* %A
     15 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
     16 	ret <4 x i16> %tmp2
     17 }
     18 
     19 define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
     20 ;CHECK: test_vrev64D32:
     21 ;CHECK: vrev64.32
     22 	%tmp1 = load <2 x i32>* %A
     23 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
     24 	ret <2 x i32> %tmp2
     25 }
     26 
     27 define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
     28 ;CHECK: test_vrev64Df:
     29 ;CHECK: vrev64.32
     30 	%tmp1 = load <2 x float>* %A
     31 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
     32 	ret <2 x float> %tmp2
     33 }
     34 
     35 define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
     36 ;CHECK: test_vrev64Q8:
     37 ;CHECK: vrev64.8
     38 	%tmp1 = load <16 x i8>* %A
     39 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
     40 	ret <16 x i8> %tmp2
     41 }
     42 
     43 define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
     44 ;CHECK: test_vrev64Q16:
     45 ;CHECK: vrev64.16
     46 	%tmp1 = load <8 x i16>* %A
     47 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
     48 	ret <8 x i16> %tmp2
     49 }
     50 
     51 define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
     52 ;CHECK: test_vrev64Q32:
     53 ;CHECK: vrev64.32
     54 	%tmp1 = load <4 x i32>* %A
     55 	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
     56 	ret <4 x i32> %tmp2
     57 }
     58 
     59 define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
     60 ;CHECK: test_vrev64Qf:
     61 ;CHECK: vrev64.32
     62 	%tmp1 = load <4 x float>* %A
     63 	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
     64 	ret <4 x float> %tmp2
     65 }
     66 
     67 define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
     68 ;CHECK: test_vrev32D8:
     69 ;CHECK: vrev32.8
     70 	%tmp1 = load <8 x i8>* %A
     71 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
     72 	ret <8 x i8> %tmp2
     73 }
     74 
     75 define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
     76 ;CHECK: test_vrev32D16:
     77 ;CHECK: vrev32.16
     78 	%tmp1 = load <4 x i16>* %A
     79 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
     80 	ret <4 x i16> %tmp2
     81 }
     82 
     83 define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
     84 ;CHECK: test_vrev32Q8:
     85 ;CHECK: vrev32.8
     86 	%tmp1 = load <16 x i8>* %A
     87 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
     88 	ret <16 x i8> %tmp2
     89 }
     90 
     91 define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
     92 ;CHECK: test_vrev32Q16:
     93 ;CHECK: vrev32.16
     94 	%tmp1 = load <8 x i16>* %A
     95 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
     96 	ret <8 x i16> %tmp2
     97 }
     98 
     99 define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
    100 ;CHECK: test_vrev16D8:
    101 ;CHECK: vrev16.8
    102 	%tmp1 = load <8 x i8>* %A
    103 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
    104 	ret <8 x i8> %tmp2
    105 }
    106 
    107 define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
    108 ;CHECK: test_vrev16Q8:
    109 ;CHECK: vrev16.8
    110 	%tmp1 = load <16 x i8>* %A
    111 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
    112 	ret <16 x i8> %tmp2
    113 }
    114 
    115 ; Undef shuffle indices should not prevent matching to VREV:
    116 
    117 define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
    118 ;CHECK: test_vrev64D8_undef:
    119 ;CHECK: vrev64.8
    120 	%tmp1 = load <8 x i8>* %A
    121 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
    122 	ret <8 x i8> %tmp2
    123 }
    124 
    125 define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
    126 ;CHECK: test_vrev32Q16_undef:
    127 ;CHECK: vrev32.16
    128 	%tmp1 = load <8 x i16>* %A
    129 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
    130 	ret <8 x i16> %tmp2
    131 }
    132 
    133 ; A vcombine feeding a VREV should not obscure things.  Radar 8597007.
    134 
    135 define void @test_with_vcombine(<4 x float>* %v) nounwind {
    136 ;CHECK: test_with_vcombine:
    137 ;CHECK-NOT: vext
    138 ;CHECK: vrev64.32
    139   %tmp1 = load <4 x float>* %v, align 16
    140   %tmp2 = bitcast <4 x float> %tmp1 to <2 x double>
    141   %tmp3 = extractelement <2 x double> %tmp2, i32 0
    142   %tmp4 = bitcast double %tmp3 to <2 x float>
    143   %tmp5 = extractelement <2 x double> %tmp2, i32 1
    144   %tmp6 = bitcast double %tmp5 to <2 x float>
    145   %tmp7 = fadd <2 x float> %tmp6, %tmp6
    146   %tmp8 = shufflevector <2 x float> %tmp4, <2 x float> %tmp7, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
    147   store <4 x float> %tmp8, <4 x float>* %v, align 16
    148   ret void
    149 }
    150 
    151 ; vrev <4 x i16> should use VREV32 and not VREV64
    152 define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
    153 ; CHECK: test_vrev64:
    154 ; CHECK: vext.16
    155 ; CHECK: vrev32.16
    156 entry:
    157   %0 = bitcast <4 x i16>* %source to <8 x i16>*
    158   %tmp2 = load <8 x i16>* %0, align 4
    159   %tmp3 = extractelement <8 x i16> %tmp2, i32 6
    160   %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
    161   %tmp9 = extractelement <8 x i16> %tmp2, i32 5
    162   %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1
    163   store <2 x i16> %tmp11, <2 x i16>* %dst, align 4
    164   ret void
    165 }
    166 
    167 ; Test vrev of float4
    168 define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest) nounwind noinline ssp {
    169 ; CHECK: float_vrev64
    170 ; CHECK: vext.32
    171 ; CHECK: vrev64.32
    172 entry:
    173   %0 = bitcast float* %source to <4 x float>*
    174   %tmp2 = load <4 x float>* %0, align 4
    175   %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
    176   %arrayidx8 = getelementptr inbounds <4 x float>* %dest, i32 11
    177   store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
    178   ret void
    179 }
    180 
    181