Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON
      2 ; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true -mattr=-neon < %s | FileCheck %s -check-prefix=NONEON
      3 
      4 ; NEON-LABEL: load_factor2:
      5 ; NEON: ld2 { v0.8b, v1.8b }, [x0]
      6 ; NONEON-LABEL: load_factor2:
      7 ; NONEON-NOT: ld2
      8 define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
      9   %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
     10   %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
     11   %strided.v1 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
     12   %add = add nsw <8 x i8> %strided.v0, %strided.v1
     13   ret <8 x i8> %add
     14 }
     15 
     16 ; NEON-LABEL: load_factor3:
     17 ; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
     18 ; NONEON-LABEL: load_factor3:
     19 ; NONEON-NOT: ld3
     20 define <4 x i32> @load_factor3(i32* %ptr) {
     21   %base = bitcast i32* %ptr to <12 x i32>*
     22   %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
     23   %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
     24   %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
     25   %add = add nsw <4 x i32> %strided.v2, %strided.v1
     26   ret <4 x i32> %add
     27 }
     28 
     29 ; NEON-LABEL: load_factor4:
     30 ; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
     31 ; NONEON-LABEL: load_factor4:
     32 ; NONEON-NOT: ld4
     33 define <4 x i32> @load_factor4(i32* %ptr) {
     34   %base = bitcast i32* %ptr to <16 x i32>*
     35   %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
     36   %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
     37   %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
     38   %add = add nsw <4 x i32> %strided.v0, %strided.v2
     39   ret <4 x i32> %add
     40 }
     41 
     42 ; NEON-LABEL: store_factor2:
     43 ; NEON: st2 { v0.8b, v1.8b }, [x0]
     44 ; NONEON-LABEL: store_factor2:
     45 ; NONEON-NOT: st2
     46 define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
     47   %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
     48   store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
     49   ret void
     50 }
     51 
     52 ; NEON-LABEL: store_factor3:
     53 ; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0]
     54 ; NONEON-LABEL: store_factor3:
     55 ; NONEON-NOT: st3
     56 define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
     57   %base = bitcast i32* %ptr to <12 x i32>*
     58   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     59   %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
     60   %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
     61   store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
     62   ret void
     63 }
     64 
     65 ; NEON-LABEL: store_factor4:
     66 ; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
     67 ; NONEON-LABEL: store_factor4:
     68 ; NONEON-NOT: st4
     69 define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
     70   %base = bitcast i32* %ptr to <16 x i32>*
     71   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     72   %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     73   %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
     74   store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
     75   ret void
     76 }
     77 
     78 ; The following cases test that interleaved access of pointer vectors can be
     79 ; matched to ldN/stN instruction.
     80 
     81 ; NEON-LABEL: load_ptrvec_factor2:
     82 ; NEON: ld2 { v0.2d, v1.2d }, [x0]
     83 ; NONEON-LABEL: load_ptrvec_factor2:
     84 ; NONEON-NOT: ld2
     85 define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
     86   %base = bitcast i32** %ptr to <4 x i32*>*
     87   %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
     88   %strided.v0 = shufflevector <4 x i32*> %wide.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
     89   ret <2 x i32*> %strided.v0
     90 }
     91 
     92 ; NEON-LABEL: load_ptrvec_factor3:
     93 ; NEON: ld3 { v0.2d, v1.2d, v2.2d }, [x0]
     94 ; NONEON-LABEL: load_ptrvec_factor3:
     95 ; NONEON-NOT: ld3
     96 define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
     97   %base = bitcast i32** %ptr to <6 x i32*>*
     98   %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
     99   %strided.v2 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
    100   store <2 x i32*> %strided.v2, <2 x i32*>* %ptr1
    101   %strided.v1 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
    102   store <2 x i32*> %strided.v1, <2 x i32*>* %ptr2
    103   ret void
    104 }
    105 
    106 ; NEON-LABEL: load_ptrvec_factor4:
    107 ; NEON: ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
    108 ; NONEON-LABEL: load_ptrvec_factor4:
    109 ; NONEON-NOT: ld4
    110 define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
    111   %base = bitcast i32** %ptr to <8 x i32*>*
    112   %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
    113   %strided.v1 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
    114   %strided.v3 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
    115   store <2 x i32*> %strided.v1, <2 x i32*>* %ptr1
    116   store <2 x i32*> %strided.v3, <2 x i32*>* %ptr2
    117   ret void
    118 }
    119 
    120 ; NEON-LABEL: store_ptrvec_factor2:
    121 ; NEON: st2 { v0.2d, v1.2d }, [x0]
    122 ; NONEON-LABEL: store_ptrvec_factor2:
    123 ; NONEON-NOT: st2
    124 define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
    125   %base = bitcast i32** %ptr to <4 x i32*>*
    126   %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
    127   store <4 x i32*> %interleaved.vec, <4 x i32*>* %base, align 4
    128   ret void
    129 }
    130 
    131 ; NEON-LABEL: store_ptrvec_factor3:
    132 ; NEON: st3 { v0.2d, v1.2d, v2.2d }, [x0]
    133 ; NONEON-LABEL: store_ptrvec_factor3:
    134 ; NONEON-NOT: st3
    135 define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
    136   %base = bitcast i32** %ptr to <6 x i32*>*
    137   %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    138   %v2_u = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    139   %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_u, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
    140   store <6 x i32*> %interleaved.vec, <6 x i32*>* %base, align 4
    141   ret void
    142 }
    143 
    144 ; NEON-LABEL: store_ptrvec_factor4:
    145 ; NEON: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
    146 ; NONEON-LABEL: store_ptrvec_factor4:
    147 ; NONEON-NOT: st4
    148 define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
    149   %base = bitcast i32* %ptr to <8 x i32*>*
    150   %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    151   %v2_v3 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    152   %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_v3, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
    153   store <8 x i32*> %interleaved.vec, <8 x i32*>* %base, align 4
    154   ret void
    155 }
    156 
    157 ; Following cases check that shuffle maskes with undef indices can be matched
    158 ; into ldN/stN instruction.
    159 
    160 ; NEON-LABEL: load_undef_mask_factor2:
    161 ; NEON: ld2 { v0.4s, v1.4s }, [x0]
    162 ; NONEON-LABEL: load_undef_mask_factor2:
    163 ; NONEON-NOT: ld2
    164 define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
    165   %base = bitcast i32* %ptr to <8 x i32>*
    166   %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
    167   %strided.v0 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
    168   %strided.v1 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
    169   %add = add nsw <4 x i32> %strided.v0, %strided.v1
    170   ret <4 x i32> %add
    171 }
    172 
    173 ; NEON-LABEL: load_undef_mask_factor3:
    174 ; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
    175 ; NONEON-LABEL: load_undef_mask_factor3:
    176 ; NONEON-NOT: ld3
    177 define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
    178   %base = bitcast i32* %ptr to <12 x i32>*
    179   %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
    180   %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
    181   %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
    182   %add = add nsw <4 x i32> %strided.v2, %strided.v1
    183   ret <4 x i32> %add
    184 }
    185 
    186 ; NEON-LABEL: load_undef_mask_factor4:
    187 ; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
    188 ; NONEON-LABEL: load_undef_mask_factor4:
    189 ; NONEON-NOT: ld4
    190 define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
    191   %base = bitcast i32* %ptr to <16 x i32>*
    192   %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
    193   %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
    194   %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
    195   %add = add nsw <4 x i32> %strided.v0, %strided.v2
    196   ret <4 x i32> %add
    197 }
    198 
    199 ; NEON-LABEL: store_undef_mask_factor2:
    200 ; NEON: st2 { v0.4s, v1.4s }, [x0]
    201 ; NONEON-LABEL: store_undef_mask_factor2:
    202 ; NONEON-NOT: st2
    203 define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
    204   %base = bitcast i32* %ptr to <8 x i32>*
    205   %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
    206   store <8 x i32> %interleaved.vec, <8 x i32>* %base, align 4
    207   ret void
    208 }
    209 
    210 ; NEON-LABEL: store_undef_mask_factor3:
    211 ; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0]
    212 ; NONEON-LABEL: store_undef_mask_factor3:
    213 ; NONEON-NOT: st3
    214 define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
    215   %base = bitcast i32* %ptr to <12 x i32>*
    216   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    217   %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
    218   %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
    219   store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
    220   ret void
    221 }
    222 
    223 ; NEON-LABEL: store_undef_mask_factor4:
    224 ; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
    225 ; NONEON-LABEL: store_undef_mask_factor4:
    226 ; NONEON-NOT: st4
    227 define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
    228   %base = bitcast i32* %ptr to <16 x i32>*
    229   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    230   %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    231   %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
    232   store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
    233   ret void
    234 }
    235 
    236 ; Check that we do something sane with illegal types.
    237 
    238 ; NEON-LABEL: load_illegal_factor2:
    239 ; NEON: BB#0:
    240 ; NEON-NEXT: ldr q[[V:[0-9]+]], [x0]
    241 ; NEON-NEXT: uzp1 v0.4s, v[[V]].4s, v{{.*}}.4s
    242 ; NEON-NEXT: ret
    243 ; NONEON-LABEL: load_illegal_factor2:
    244 ; NONEON: BB#0:
    245 ; NONEON-NEXT: ldr s0, [x0]
    246 ; NONEON-NEXT: ldr s1, [x0, #8]
    247 ; NONEON-NEXT: ret
    248 define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind {
    249   %tmp1 = load <3 x float>, <3 x float>* %p, align 16
    250   %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
    251   ret <3 x float> %tmp2
    252 }
    253 
    254 ; NEON-LABEL: store_illegal_factor2:
    255 ; NEON: BB#0:
    256 ; NEON-NEXT: uzp1 v0.4s, v0.4s, v{{.*}}.4s
    257 ; NEON-NEXT: st1 { v0.d }[0], [x0]
    258 ; NEON-NEXT: ret
    259 ; NONEON-LABEL: store_illegal_factor2:
    260 ; NONEON: BB#0:
    261 ; NONEON-NEXT: fmov w[[ELT2:[0-9]+]], s2
    262 ; NONEON-NEXT: fmov w[[RES:[0-9]+]], s0
    263 ; NONEON-NEXT: bfi x[[RES]], x[[ELT2]], #32, #32
    264 ; NONEON-NEXT: str x[[RES]], [x0]
    265 ; NONEON-NEXT: ret
    266 define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {
    267   %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
    268   store <3 x float> %tmp1, <3 x float>* %p, align 16
    269   ret void
    270 }
    271 
    272 ; NEON-LABEL: load_factor2_with_extract_user:
    273 ; NEON: ld2 { v0.4s, v1.4s }, [x0]
    274 ; NEON: mov w0, v0.s[1]
    275 ; NONEON-LABEL: load_factor2_with_extract_user:
    276 ; NONEON-NOT: ld2
    277 define i32 @load_factor2_with_extract_user(<8 x i32>* %a) {
    278   %1 = load <8 x i32>, <8 x i32>* %a, align 8
    279   %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
    280   %3 = extractelement <8 x i32> %1, i32 2
    281   ret i32 %3
    282 }
    283