Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc -mtriple=arm-eabi -mattr=+neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON
      2 ; RUN: llc -mtriple=arm-eabi -mattr=-neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NONEON
      3 
      4 ; NEON-LABEL: load_factor2:
      5 ; NEON: vld2.8 {d16, d17}, [r0]
      6 ; NONEON-LABEL: load_factor2:
      7 ; NONEON-NOT: vld2
      8 define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
      9   %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
     10   %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
     11   %strided.v1 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
     12   %add = add nsw <8 x i8> %strided.v0, %strided.v1
     13   ret <8 x i8> %add
     14 }
     15 
     16 ; NEON-LABEL: load_factor3:
     17 ; NEON: vld3.32 {d16, d17, d18}, [r0]
     18 ; NONEON-LABEL: load_factor3:
     19 ; NONEON-NOT: vld3
     20 define <2 x i32> @load_factor3(i32* %ptr) {
     21   %base = bitcast i32* %ptr to <6 x i32>*
     22   %wide.vec = load <6 x i32>, <6 x i32>* %base, align 4
     23   %strided.v2 = shufflevector <6 x i32> %wide.vec, <6 x i32> undef, <2 x i32> <i32 2, i32 5>
     24   %strided.v1 = shufflevector <6 x i32> %wide.vec, <6 x i32> undef, <2 x i32> <i32 1, i32 4>
     25   %add = add nsw <2 x i32> %strided.v2, %strided.v1
     26   ret <2 x i32> %add
     27 }
     28 
     29 ; NEON-LABEL: load_factor4:
     30 ; NEON: vld4.32 {d16, d18, d20, d22}, [r0]!
     31 ; NEON: vld4.32 {d17, d19, d21, d23}, [r0]
     32 ; NONEON-LABEL: load_factor4:
     33 ; NONEON-NOT: vld4
     34 define <4 x i32> @load_factor4(i32* %ptr) {
     35   %base = bitcast i32* %ptr to <16 x i32>*
     36   %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
     37   %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
     38   %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
     39   %add = add nsw <4 x i32> %strided.v0, %strided.v2
     40   ret <4 x i32> %add
     41 }
     42 
     43 ; NEON-LABEL: store_factor2:
     44 ; NEON: vst2.8 {d16, d17}, [r0]
     45 ; NONEON-LABEL: store_factor2:
     46 ; NONEON-NOT: vst2
     47 define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
     48   %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
     49   store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
     50   ret void
     51 }
     52 
     53 ; NEON-LABEL: store_factor3:
     54 ; NEON: vst3.32 {d16, d18, d20}, [r0]!
     55 ; NEON: vst3.32 {d17, d19, d21}, [r0]
     56 ; NONEON-LABEL: store_factor3:
     57 ; NONEON-NOT: vst3.32
     58 define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
     59   %base = bitcast i32* %ptr to <12 x i32>*
     60   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     61   %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
     62   %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
     63   store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
     64   ret void
     65 }
     66 
     67 ; NEON-LABEL: store_factor4:
     68 ; NEON: vst4.32 {d16, d18, d20, d22}, [r0]!
     69 ; NEON: vst4.32 {d17, d19, d21, d23}, [r0]
     70 ; NONEON-LABEL: store_factor4:
     71 ; NONEON-NOT: vst4
     72 define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
     73   %base = bitcast i32* %ptr to <16 x i32>*
     74   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     75   %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     76   %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
     77   store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
     78   ret void
     79 }
     80 
     81 ; The following cases test that interleaved access of pointer vectors can be
     82 ; matched to ldN/stN instruction.
     83 
     84 ; NEON-LABEL: load_ptrvec_factor2:
     85 ; NEON: vld2.32 {d16, d17}, [r0]
     86 ; NONEON-LABEL: load_ptrvec_factor2:
     87 ; NONEON-NOT: vld2
     88 define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
     89   %base = bitcast i32** %ptr to <4 x i32*>*
     90   %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
     91   %strided.v0 = shufflevector <4 x i32*> %wide.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
     92   ret <2 x i32*> %strided.v0
     93 }
     94 
     95 ; NEON-LABEL: load_ptrvec_factor3:
     96 ; NEON: vld3.32 {d16, d17, d18}, [r0]
     97 ; NONEON-LABEL: load_ptrvec_factor3:
     98 ; NONEON-NOT: vld3
     99 define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
    100   %base = bitcast i32** %ptr to <6 x i32*>*
    101   %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
    102   %strided.v2 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
    103   store <2 x i32*> %strided.v2, <2 x i32*>* %ptr1
    104   %strided.v1 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
    105   store <2 x i32*> %strided.v1, <2 x i32*>* %ptr2
    106   ret void
    107 }
    108 
    109 ; NEON-LABEL: load_ptrvec_factor4:
    110 ; NEON: vld4.32 {d16, d17, d18, d19}, [r0]
    111 ; NONEON-LABEL: load_ptrvec_factor4:
    112 ; NONEON-NOT: vld4
    113 define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
    114   %base = bitcast i32** %ptr to <8 x i32*>*
    115   %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
    116   %strided.v1 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
    117   %strided.v3 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
    118   store <2 x i32*> %strided.v1, <2 x i32*>* %ptr1
    119   store <2 x i32*> %strided.v3, <2 x i32*>* %ptr2
    120   ret void
    121 }
    122 
    123 ; NEON-LABEL: store_ptrvec_factor2:
    124 ; NEON: vst2.32 {d16, d17}, [r0]
    125 ; NONEON-LABEL: store_ptrvec_factor2:
    126 ; NONEON-NOT: vst2
    127 define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
    128   %base = bitcast i32** %ptr to <4 x i32*>*
    129   %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
    130   store <4 x i32*> %interleaved.vec, <4 x i32*>* %base, align 4
    131   ret void
    132 }
    133 
    134 ; NEON-LABEL: store_ptrvec_factor3:
    135 ; NEON: vst3.32 {d16, d17, d18}, [r0]
    136 ; NONEON-LABEL: store_ptrvec_factor3:
    137 ; NONEON-NOT: vst3
    138 define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
    139   %base = bitcast i32** %ptr to <6 x i32*>*
    140   %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    141   %v2_u = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    142   %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_u, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
    143   store <6 x i32*> %interleaved.vec, <6 x i32*>* %base, align 4
    144   ret void
    145 }
    146 
    147 ; NEON-LABEL: store_ptrvec_factor4:
    148 ; NEON: vst4.32 {d16, d17, d18, d19}, [r0]
    149 ; NONEON-LABEL: store_ptrvec_factor4:
    150 ; NONEON-NOT: vst4
    151 define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
    152   %base = bitcast i32* %ptr to <8 x i32*>*
    153   %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    154   %v2_v3 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    155   %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_v3, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
    156   store <8 x i32*> %interleaved.vec, <8 x i32*>* %base, align 4
    157   ret void
    158 }
    159 
    160 ; Following cases check that shuffle maskes with undef indices can be matched
    161 ; into ldN/stN instruction.
    162 
    163 ; NEON-LABEL: load_undef_mask_factor2:
    164 ; NEON: vld2.32 {d16, d17, d18, d19}, [r0]
    165 ; NONEON-LABEL: load_undef_mask_factor2:
    166 ; NONEON-NOT: vld2
    167 define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
    168   %base = bitcast i32* %ptr to <8 x i32>*
    169   %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
    170   %strided.v0 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
    171   %strided.v1 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
    172   %add = add nsw <4 x i32> %strided.v0, %strided.v1
    173   ret <4 x i32> %add
    174 }
    175 
    176 ; NEON-LABEL: load_undef_mask_factor3:
    177 ; NEON: vld3.32 {d16, d18, d20}, [r0]!
    178 ; NEON: vld3.32 {d17, d19, d21}, [r0]
    179 ; NONEON-LABEL: load_undef_mask_factor3:
    180 ; NONEON-NOT: vld3
    181 define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
    182   %base = bitcast i32* %ptr to <12 x i32>*
    183   %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
    184   %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
    185   %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
    186   %add = add nsw <4 x i32> %strided.v2, %strided.v1
    187   ret <4 x i32> %add
    188 }
    189 
    190 ; NEON-LABEL: load_undef_mask_factor4:
    191 ; NEON: vld4.32 {d16, d18, d20, d22}, [r0]!
    192 ; NEON: vld4.32 {d17, d19, d21, d23}, [r0]
    193 ; NONEON-LABEL: load_undef_mask_factor4:
    194 ; NONEON-NOT: vld4
    195 define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
    196   %base = bitcast i32* %ptr to <16 x i32>*
    197   %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
    198   %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
    199   %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
    200   %add = add nsw <4 x i32> %strided.v0, %strided.v2
    201   ret <4 x i32> %add
    202 }
    203 
    204 ; NEON-LABEL: store_undef_mask_factor2:
    205 ; NEON: vst2.32 {d16, d17, d18, d19}, [r0]
    206 ; NONEON-LABEL: store_undef_mask_factor2:
    207 ; NONEON-NOT: vst2
    208 define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
    209   %base = bitcast i32* %ptr to <8 x i32>*
    210   %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
    211   store <8 x i32> %interleaved.vec, <8 x i32>* %base, align 4
    212   ret void
    213 }
    214 
    215 ; NEON-LABEL: store_undef_mask_factor3:
    216 ; NEON: vst3.32 {d16, d18, d20}, [r0]!
    217 ; NEON: vst3.32 {d17, d19, d21}, [r0]
    218 ; NONEON-LABEL: store_undef_mask_factor3:
    219 ; NONEON-NOT: vst3
    220 define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
    221   %base = bitcast i32* %ptr to <12 x i32>*
    222   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    223   %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
    224   %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
    225   store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
    226   ret void
    227 }
    228 
    229 ; NEON-LABEL: store_undef_mask_factor4:
    230 ; NEON: vst4.32 {d16, d18, d20, d22}, [r0]!
    231 ; NEON: vst4.32 {d17, d19, d21, d23}, [r0]
    232 ; NONEON-LABEL: store_undef_mask_factor4:
    233 ; NONEON-NOT: vst4
    234 define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
    235   %base = bitcast i32* %ptr to <16 x i32>*
    236   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    237   %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    238   %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
    239   store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
    240   ret void
    241 }
    242 
    243 ; The following test cases check that address spaces are properly handled
    244 
    245 ; NEON-LABEL: load_address_space
    246 ; NEON: vld3.32
    247 ; NONEON-LABEL: load_address_space
    248 ; NONEON-NOT: vld3
    249 define void @load_address_space(<4 x i32> addrspace(1)* %A, <2 x i32>* %B) {
    250  %tmp = load <4 x i32>, <4 x i32> addrspace(1)* %A
    251  %interleaved = shufflevector <4 x i32> %tmp, <4 x i32> undef, <2 x i32> <i32 0, i32 3>
    252  store <2 x i32> %interleaved, <2 x i32>* %B
    253  ret void
    254 }
    255 
    256 ; NEON-LABEL: store_address_space
    257 ; NEON: vst2.32
    258 ; NONEON-LABEL: store_address_space
    259 ; NONEON-NOT: vst2
    260 define void @store_address_space(<2 x i32>* %A, <2 x i32>* %B, <4 x i32> addrspace(1)* %C) {
    261  %tmp0 = load <2 x i32>, <2 x i32>* %A
    262  %tmp1 = load <2 x i32>, <2 x i32>* %B
    263  %interleaved = shufflevector <2 x i32> %tmp0, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
    264  store <4 x i32> %interleaved, <4 x i32> addrspace(1)* %C
    265  ret void
    266 }
    267 
    268 ; Check that we do something sane with illegal types.
    269 
    270 ; NEON-LABEL: load_illegal_factor2:
    271 ; NEON: BB#0:
    272 ; NEON-NEXT: vld1.64 {d16, d17}, [r0:128]
    273 ; NEON-NEXT: vuzp.32 q8, {{.*}}
    274 ; NEON-NEXT: vmov r0, r1, d16
    275 ; NEON-NEXT: vmov r2, r3, {{.*}}
    276 ; NEON-NEXT: mov pc, lr
    277 ; NONEON-LABEL: load_illegal_factor2:
    278 ; NONEON: BB#0:
    279 ; NONEON-NEXT: ldr [[ELT0:r[0-9]+]], [r0]
    280 ; NONEON-NEXT: ldr r1, [r0, #8]
    281 ; NONEON-NEXT: mov r0, [[ELT0]]
    282 ; NONEON-NEXT: mov pc, lr
    283 define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind {
    284   %tmp1 = load <3 x float>, <3 x float>* %p, align 16
    285   %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
    286   ret <3 x float> %tmp2
    287 }
    288 
    289 ; This lowering isn't great, but it's at least correct.
    290 
    291 ; NEON-LABEL: store_illegal_factor2:
    292 ; NEON: BB#0:
    293 ; NEON-NEXT: vldr d17, [sp]
    294 ; NEON-NEXT: vmov d16, r2, r3
    295 ; NEON-NEXT: vuzp.32 q8, {{.*}}
    296 ; NEON-NEXT: vstr d16, [r0]
    297 ; NEON-NEXT: mov pc, lr
    298 ; NONEON-LABEL: store_illegal_factor2:
    299 ; NONEON: BB#0:
    300 ; NONEON-NEXT: stm r0, {r1, r3}
    301 ; NONEON-NEXT: mov pc, lr
    302 define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {
    303   %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
    304   store <3 x float> %tmp1, <3 x float>* %p, align 16
    305   ret void
    306 }
    307