Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
      2 
      3 ; rdar://9428579
      4 
      5 %type1 = type { <16 x i8> }
      6 %type2 = type { <8 x i8> }
      7 %type3 = type { <4 x i16> }
      8 
      9 
     10 define hidden fastcc void @t1(%type1** %argtable) nounwind {
     11 entry:
     12 ; CHECK-LABEL: t1:
     13 ; CHECK: ldr x[[REG:[0-9]+]], [x0]
     14 ; CHECK: str q0, [x[[REG]]]
     15   %tmp1 = load %type1*, %type1** %argtable, align 8
     16   %tmp2 = getelementptr inbounds %type1, %type1* %tmp1, i64 0, i32 0
     17   store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16
     18   ret void
     19 }
     20 
     21 define hidden fastcc void @t2(%type2** %argtable) nounwind {
     22 entry:
     23 ; CHECK-LABEL: t2:
     24 ; CHECK: ldr x[[REG:[0-9]+]], [x0]
     25 ; CHECK: str d0, [x[[REG]]]
     26   %tmp1 = load %type2*, %type2** %argtable, align 8
     27   %tmp2 = getelementptr inbounds %type2, %type2* %tmp1, i64 0, i32 0
     28   store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8
     29   ret void
     30 }
     31 
     32 ; add a bunch of tests for rdar://11246289
     33 
     34 @globalArray64x2 = common global <2 x i64>* null, align 8
     35 @globalArray32x4 = common global <4 x i32>* null, align 8
     36 @globalArray16x8 = common global <8 x i16>* null, align 8
     37 @globalArray8x16 = common global <16 x i8>* null, align 8
     38 @globalArray64x1 = common global <1 x i64>* null, align 8
     39 @globalArray32x2 = common global <2 x i32>* null, align 8
     40 @globalArray16x4 = common global <4 x i16>* null, align 8
     41 @globalArray8x8 = common global <8 x i8>* null, align 8
     42 @floatglobalArray64x2 = common global <2 x double>* null, align 8
     43 @floatglobalArray32x4 = common global <4 x float>* null, align 8
     44 @floatglobalArray64x1 = common global <1 x double>* null, align 8
     45 @floatglobalArray32x2 = common global <2 x float>* null, align 8
     46 
     47 define void @fct1_64x2(<2 x i64>* nocapture %array, i64 %offset) nounwind ssp {
     48 entry:
     49 ; CHECK-LABEL: fct1_64x2:
     50 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
     51 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]
     52 ; CHECK: ldr [[BASE:x[0-9]+]],
     53 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
     54   %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 %offset
     55   %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16
     56   %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8
     57   %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 %offset
     58   store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
     59   ret void
     60 }
     61 
     62 define void @fct2_64x2(<2 x i64>* nocapture %array) nounwind ssp {
     63 entry:
     64 ; CHECK-LABEL: fct2_64x2:
     65 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
     66 ; CHECK: ldr [[BASE:x[0-9]+]],
     67 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
     68   %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 3
     69   %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16
     70   %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8
     71   %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 5
     72   store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
     73   ret void
     74 }
     75 
     76 define void @fct1_32x4(<4 x i32>* nocapture %array, i64 %offset) nounwind ssp {
     77 entry:
     78 ; CHECK-LABEL: fct1_32x4:
     79 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
     80 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
     81 ; CHECK: ldr [[BASE:x[0-9]+]],
     82 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
     83   %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 %offset
     84   %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16
     85   %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8
     86   %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 %offset
     87   store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
     88   ret void
     89 }
     90 
     91 define void @fct2_32x4(<4 x i32>* nocapture %array) nounwind ssp {
     92 entry:
     93 ; CHECK-LABEL: fct2_32x4:
     94 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
     95 ; CHECK: ldr [[BASE:x[0-9]+]],
     96 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
     97   %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 3
     98   %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16
     99   %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8
    100   %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 5
    101   store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
    102   ret void
    103 }
    104 
    105 define void @fct1_16x8(<8 x i16>* nocapture %array, i64 %offset) nounwind ssp {
    106 entry:
    107 ; CHECK-LABEL: fct1_16x8:
    108 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
    109 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
    110 ; CHECK: ldr [[BASE:x[0-9]+]],
    111 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
    112   %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 %offset
    113   %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16
    114   %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8
    115   %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 %offset
    116   store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
    117   ret void
    118 }
    119 
    120 define void @fct2_16x8(<8 x i16>* nocapture %array) nounwind ssp {
    121 entry:
    122 ; CHECK-LABEL: fct2_16x8:
    123 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
    124 ; CHECK: ldr [[BASE:x[0-9]+]],
    125 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
    126   %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 3
    127   %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16
    128   %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8
    129   %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 5
    130   store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
    131   ret void
    132 }
    133 
    134 define void @fct1_8x16(<16 x i8>* nocapture %array, i64 %offset) nounwind ssp {
    135 entry:
    136 ; CHECK-LABEL: fct1_8x16:
    137 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
    138 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
    139 ; CHECK: ldr [[BASE:x[0-9]+]],
    140 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
    141   %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 %offset
    142   %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16
    143   %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8
    144   %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 %offset
    145   store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
    146   ret void
    147 }
    148 
    149 define void @fct2_8x16(<16 x i8>* nocapture %array) nounwind ssp {
    150 entry:
    151 ; CHECK-LABEL: fct2_8x16:
    152 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
    153 ; CHECK: ldr [[BASE:x[0-9]+]],
    154 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
    155   %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 3
    156   %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16
    157   %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8
    158   %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 5
    159   store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
    160   ret void
    161 }
    162 
    163 define void @fct1_64x1(<1 x i64>* nocapture %array, i64 %offset) nounwind ssp {
    164 entry:
    165 ; CHECK-LABEL: fct1_64x1:
    166 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
    167 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
    168 ; CHECK: ldr [[BASE:x[0-9]+]],
    169 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
    170   %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 %offset
    171   %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8
    172   %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8
    173   %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 %offset
    174   store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
    175   ret void
    176 }
    177 
    178 define void @fct2_64x1(<1 x i64>* nocapture %array) nounwind ssp {
    179 entry:
    180 ; CHECK-LABEL: fct2_64x1:
    181 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
    182 ; CHECK: ldr [[BASE:x[0-9]+]],
    183 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
    184   %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 3
    185   %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8
    186   %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8
    187   %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 5
    188   store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
    189   ret void
    190 }
    191 
    192 define void @fct1_32x2(<2 x i32>* nocapture %array, i64 %offset) nounwind ssp {
    193 entry:
    194 ; CHECK-LABEL: fct1_32x2:
    195 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
    196 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
    197 ; CHECK: ldr [[BASE:x[0-9]+]],
    198 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
    199   %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 %offset
    200   %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8
    201   %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8
    202   %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 %offset
    203   store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
    204   ret void
    205 }
    206 
    207 define void @fct2_32x2(<2 x i32>* nocapture %array) nounwind ssp {
    208 entry:
    209 ; CHECK-LABEL: fct2_32x2:
    210 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
    211 ; CHECK: ldr [[BASE:x[0-9]+]],
    212 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
    213   %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 3
    214   %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8
    215   %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8
    216   %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 5
    217   store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
    218   ret void
    219 }
    220 
    221 define void @fct1_16x4(<4 x i16>* nocapture %array, i64 %offset) nounwind ssp {
    222 entry:
    223 ; CHECK-LABEL: fct1_16x4:
    224 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
    225 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
    226 ; CHECK: ldr [[BASE:x[0-9]+]],
    227 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
    228   %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 %offset
    229   %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8
    230   %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8
    231   %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 %offset
    232   store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
    233   ret void
    234 }
    235 
    236 define void @fct2_16x4(<4 x i16>* nocapture %array) nounwind ssp {
    237 entry:
    238 ; CHECK-LABEL: fct2_16x4:
    239 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
    240 ; CHECK: ldr [[BASE:x[0-9]+]],
    241 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
    242   %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 3
    243   %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8
    244   %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8
    245   %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 5
    246   store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
    247   ret void
    248 }
    249 
    250 define void @fct1_8x8(<8 x i8>* nocapture %array, i64 %offset) nounwind ssp {
    251 entry:
    252 ; CHECK-LABEL: fct1_8x8:
    253 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
    254 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
    255 ; CHECK: ldr [[BASE:x[0-9]+]],
    256 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
    257   %arrayidx = getelementptr inbounds <8 x i8>, <8 x i8>* %array, i64 %offset
    258   %tmp = load <8 x i8>, <8 x i8>* %arrayidx, align 8
    259   %tmp1 = load <8 x i8>*, <8 x i8>** @globalArray8x8, align 8
    260   %arrayidx1 = getelementptr inbounds <8 x i8>, <8 x i8>* %tmp1, i64 %offset
    261   store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8
    262   ret void
    263 }
    264 
    265 ; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
    266 ; registers for unscaled vector accesses
    267 
    268 define <1 x i64> @fct0(i8* %str) nounwind readonly ssp {
    269 entry:
    270 ; CHECK-LABEL: fct0:
    271 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
    272   %p = getelementptr inbounds i8, i8* %str, i64 3
    273   %q = bitcast i8* %p to <1 x i64>*
    274   %0 = load <1 x i64>, <1 x i64>* %q, align 8
    275   ret <1 x i64> %0
    276 }
    277 
    278 define <2 x i32> @fct1(i8* %str) nounwind readonly ssp {
    279 entry:
    280 ; CHECK-LABEL: fct1:
    281 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
    282   %p = getelementptr inbounds i8, i8* %str, i64 3
    283   %q = bitcast i8* %p to <2 x i32>*
    284   %0 = load <2 x i32>, <2 x i32>* %q, align 8
    285   ret <2 x i32> %0
    286 }
    287 
    288 define <4 x i16> @fct2(i8* %str) nounwind readonly ssp {
    289 entry:
    290 ; CHECK-LABEL: fct2:
    291 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
    292   %p = getelementptr inbounds i8, i8* %str, i64 3
    293   %q = bitcast i8* %p to <4 x i16>*
    294   %0 = load <4 x i16>, <4 x i16>* %q, align 8
    295   ret <4 x i16> %0
    296 }
    297 
    298 define <8 x i8> @fct3(i8* %str) nounwind readonly ssp {
    299 entry:
    300 ; CHECK-LABEL: fct3:
    301 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
    302   %p = getelementptr inbounds i8, i8* %str, i64 3
    303   %q = bitcast i8* %p to <8 x i8>*
    304   %0 = load <8 x i8>, <8 x i8>* %q, align 8
    305   ret <8 x i8> %0
    306 }
    307 
    308 define <2 x i64> @fct4(i8* %str) nounwind readonly ssp {
    309 entry:
    310 ; CHECK-LABEL: fct4:
    311 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
    312   %p = getelementptr inbounds i8, i8* %str, i64 3
    313   %q = bitcast i8* %p to <2 x i64>*
    314   %0 = load <2 x i64>, <2 x i64>* %q, align 16
    315   ret <2 x i64> %0
    316 }
    317 
    318 define <4 x i32> @fct5(i8* %str) nounwind readonly ssp {
    319 entry:
    320 ; CHECK-LABEL: fct5:
    321 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
    322   %p = getelementptr inbounds i8, i8* %str, i64 3
    323   %q = bitcast i8* %p to <4 x i32>*
    324   %0 = load <4 x i32>, <4 x i32>* %q, align 16
    325   ret <4 x i32> %0
    326 }
    327 
    328 define <8 x i16> @fct6(i8* %str) nounwind readonly ssp {
    329 entry:
    330 ; CHECK-LABEL: fct6:
    331 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
    332   %p = getelementptr inbounds i8, i8* %str, i64 3
    333   %q = bitcast i8* %p to <8 x i16>*
    334   %0 = load <8 x i16>, <8 x i16>* %q, align 16
    335   ret <8 x i16> %0
    336 }
    337 
    338 define <16 x i8> @fct7(i8* %str) nounwind readonly ssp {
    339 entry:
    340 ; CHECK-LABEL: fct7:
    341 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
    342   %p = getelementptr inbounds i8, i8* %str, i64 3
    343   %q = bitcast i8* %p to <16 x i8>*
    344   %0 = load <16 x i8>, <16 x i8>* %q, align 16
    345   ret <16 x i8> %0
    346 }
    347 
    348 define void @fct8(i8* %str) nounwind ssp {
    349 entry:
    350 ; CHECK-LABEL: fct8:
    351 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
    352 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
    353   %p = getelementptr inbounds i8, i8* %str, i64 3
    354   %q = bitcast i8* %p to <1 x i64>*
    355   %0 = load <1 x i64>, <1 x i64>* %q, align 8
    356   %p2 = getelementptr inbounds i8, i8* %str, i64 4
    357   %q2 = bitcast i8* %p2 to <1 x i64>*
    358   store <1 x i64> %0, <1 x i64>* %q2, align 8
    359   ret void
    360 }
    361 
    362 define void @fct9(i8* %str) nounwind ssp {
    363 entry:
    364 ; CHECK-LABEL: fct9:
    365 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
    366 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
    367   %p = getelementptr inbounds i8, i8* %str, i64 3
    368   %q = bitcast i8* %p to <2 x i32>*
    369   %0 = load <2 x i32>, <2 x i32>* %q, align 8
    370   %p2 = getelementptr inbounds i8, i8* %str, i64 4
    371   %q2 = bitcast i8* %p2 to <2 x i32>*
    372   store <2 x i32> %0, <2 x i32>* %q2, align 8
    373   ret void
    374 }
    375 
    376 define void @fct10(i8* %str) nounwind ssp {
    377 entry:
    378 ; CHECK-LABEL: fct10:
    379 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
    380 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
    381   %p = getelementptr inbounds i8, i8* %str, i64 3
    382   %q = bitcast i8* %p to <4 x i16>*
    383   %0 = load <4 x i16>, <4 x i16>* %q, align 8
    384   %p2 = getelementptr inbounds i8, i8* %str, i64 4
    385   %q2 = bitcast i8* %p2 to <4 x i16>*
    386   store <4 x i16> %0, <4 x i16>* %q2, align 8
    387   ret void
    388 }
    389 
    390 define void @fct11(i8* %str) nounwind ssp {
    391 entry:
    392 ; CHECK-LABEL: fct11:
    393 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
    394 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
    395   %p = getelementptr inbounds i8, i8* %str, i64 3
    396   %q = bitcast i8* %p to <8 x i8>*
    397   %0 = load <8 x i8>, <8 x i8>* %q, align 8
    398   %p2 = getelementptr inbounds i8, i8* %str, i64 4
    399   %q2 = bitcast i8* %p2 to <8 x i8>*
    400   store <8 x i8> %0, <8 x i8>* %q2, align 8
    401   ret void
    402 }
    403 
    404 define void @fct12(i8* %str) nounwind ssp {
    405 entry:
    406 ; CHECK-LABEL: fct12:
    407 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
    408 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
    409   %p = getelementptr inbounds i8, i8* %str, i64 3
    410   %q = bitcast i8* %p to <2 x i64>*
    411   %0 = load <2 x i64>, <2 x i64>* %q, align 16
    412   %p2 = getelementptr inbounds i8, i8* %str, i64 4
    413   %q2 = bitcast i8* %p2 to <2 x i64>*
    414   store <2 x i64> %0, <2 x i64>* %q2, align 16
    415   ret void
    416 }
    417 
    418 define void @fct13(i8* %str) nounwind ssp {
    419 entry:
    420 ; CHECK-LABEL: fct13:
    421 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
    422 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
    423   %p = getelementptr inbounds i8, i8* %str, i64 3
    424   %q = bitcast i8* %p to <4 x i32>*
    425   %0 = load <4 x i32>, <4 x i32>* %q, align 16
    426   %p2 = getelementptr inbounds i8, i8* %str, i64 4
    427   %q2 = bitcast i8* %p2 to <4 x i32>*
    428   store <4 x i32> %0, <4 x i32>* %q2, align 16
    429   ret void
    430 }
    431 
    432 define void @fct14(i8* %str) nounwind ssp {
    433 entry:
    434 ; CHECK-LABEL: fct14:
    435 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
    436 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
    437   %p = getelementptr inbounds i8, i8* %str, i64 3
    438   %q = bitcast i8* %p to <8 x i16>*
    439   %0 = load <8 x i16>, <8 x i16>* %q, align 16
    440   %p2 = getelementptr inbounds i8, i8* %str, i64 4
    441   %q2 = bitcast i8* %p2 to <8 x i16>*
    442   store <8 x i16> %0, <8 x i16>* %q2, align 16
    443   ret void
    444 }
    445 
    446 define void @fct15(i8* %str) nounwind ssp {
    447 entry:
    448 ; CHECK-LABEL: fct15:
    449 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
    450 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
    451   %p = getelementptr inbounds i8, i8* %str, i64 3
    452   %q = bitcast i8* %p to <16 x i8>*
    453   %0 = load <16 x i8>, <16 x i8>* %q, align 16
    454   %p2 = getelementptr inbounds i8, i8* %str, i64 4
    455   %q2 = bitcast i8* %p2 to <16 x i8>*
    456   store <16 x i8> %0, <16 x i8>* %q2, align 16
    457   ret void
    458 }
    459 
    460 ; Check the building of vector from a single loaded value.
    461 ; Part of <rdar://problem/14170854>
    462 ;
    463 ; Single loads with immediate offset.
    464 define <8 x i8> @fct16(i8* nocapture %sp0) {
    465 ; CHECK-LABEL: fct16:
    466 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
    467 ; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
    468 entry:
    469   %addr = getelementptr i8, i8* %sp0, i64 1
    470   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
    471   %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
    472   %vmull.i = mul <8 x i8> %vec, %vec
    473   ret <8 x i8> %vmull.i
    474 }
    475 
    476 define <16 x i8> @fct17(i8* nocapture %sp0) {
    477 ; CHECK-LABEL: fct17:
    478 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
    479 ; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
    480 entry:
    481   %addr = getelementptr i8, i8* %sp0, i64 1
    482   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
    483   %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
    484   %vmull.i = mul <16 x i8> %vec, %vec
    485   ret <16 x i8> %vmull.i
    486 }
    487 
    488 define <4 x i16> @fct18(i16* nocapture %sp0) {
    489 ; CHECK-LABEL: fct18:
    490 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
    491 ; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
    492 entry:
    493   %addr = getelementptr i16, i16* %sp0, i64 1
    494   %pix_sp0.0.copyload = load i16, i16* %addr, align 1
    495   %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
    496   %vmull.i = mul <4 x i16> %vec, %vec
    497   ret <4 x i16> %vmull.i
    498 }
    499 
    500 define <8 x i16> @fct19(i16* nocapture %sp0) {
    501 ; CHECK-LABEL: fct19:
    502 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
    503 ; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
    504 entry:
    505   %addr = getelementptr i16, i16* %sp0, i64 1
    506   %pix_sp0.0.copyload = load i16, i16* %addr, align 1
    507   %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
    508   %vmull.i = mul <8 x i16> %vec, %vec
    509   ret <8 x i16> %vmull.i
    510 }
    511 
    512 define <2 x i32> @fct20(i32* nocapture %sp0) {
    513 ; CHECK-LABEL: fct20:
    514 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
    515 ; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
    516 entry:
    517   %addr = getelementptr i32, i32* %sp0, i64 1
    518   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    519   %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
    520   %vmull.i = mul <2 x i32> %vec, %vec
    521   ret <2 x i32> %vmull.i
    522 }
    523 
    524 define <4 x i32> @fct21(i32* nocapture %sp0) {
    525 ; CHECK-LABEL: fct21:
    526 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
    527 ; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
    528 entry:
    529   %addr = getelementptr i32, i32* %sp0, i64 1
    530   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    531   %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
    532   %vmull.i = mul <4 x i32> %vec, %vec
    533   ret <4 x i32> %vmull.i
    534 }
    535 
    536 define <1 x i64> @fct22(i64* nocapture %sp0) {
    537 ; CHECK-LABEL: fct22:
    538 ; CHECK: ldr d0, [x0, #8]
    539 entry:
    540   %addr = getelementptr i64, i64* %sp0, i64 1
    541   %pix_sp0.0.copyload = load i64, i64* %addr, align 1
    542   %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
    543    ret <1 x i64> %vec
    544 }
    545 
    546 define <2 x i64> @fct23(i64* nocapture %sp0) {
    547 ; CHECK-LABEL: fct23:
    548 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
    549 entry:
    550   %addr = getelementptr i64, i64* %sp0, i64 1
    551   %pix_sp0.0.copyload = load i64, i64* %addr, align 1
    552   %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
    553   ret <2 x i64> %vec
    554 }
    555 
    556 ;
    557 ; Single loads with register offset.
    558 define <8 x i8> @fct24(i8* nocapture %sp0, i64 %offset) {
    559 ; CHECK-LABEL: fct24:
    560 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
    561 ; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
    562 entry:
    563   %addr = getelementptr i8, i8* %sp0, i64 %offset
    564   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
    565   %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
    566   %vmull.i = mul <8 x i8> %vec, %vec
    567   ret <8 x i8> %vmull.i
    568 }
    569 
    570 define <16 x i8> @fct25(i8* nocapture %sp0, i64 %offset) {
    571 ; CHECK-LABEL: fct25:
    572 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
    573 ; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
    574 entry:
    575   %addr = getelementptr i8, i8* %sp0, i64 %offset
    576   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
    577   %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
    578   %vmull.i = mul <16 x i8> %vec, %vec
    579   ret <16 x i8> %vmull.i
    580 }
    581 
    582 define <4 x i16> @fct26(i16* nocapture %sp0, i64 %offset) {
    583 ; CHECK-LABEL: fct26:
    584 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
    585 ; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
    586 entry:
    587   %addr = getelementptr i16, i16* %sp0, i64 %offset
    588   %pix_sp0.0.copyload = load i16, i16* %addr, align 1
    589   %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
    590   %vmull.i = mul <4 x i16> %vec, %vec
    591   ret <4 x i16> %vmull.i
    592 }
    593 
    594 define <8 x i16> @fct27(i16* nocapture %sp0, i64 %offset) {
    595 ; CHECK-LABEL: fct27:
    596 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
    597 ; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
    598 entry:
    599   %addr = getelementptr i16, i16* %sp0, i64 %offset
    600   %pix_sp0.0.copyload = load i16, i16* %addr, align 1
    601   %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
    602   %vmull.i = mul <8 x i16> %vec, %vec
    603   ret <8 x i16> %vmull.i
    604 }
    605 
    606 define <2 x i32> @fct28(i32* nocapture %sp0, i64 %offset) {
    607 ; CHECK-LABEL: fct28:
    608 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
    609 ; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
    610 entry:
    611   %addr = getelementptr i32, i32* %sp0, i64 %offset
    612   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    613   %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
    614   %vmull.i = mul <2 x i32> %vec, %vec
    615   ret <2 x i32> %vmull.i
    616 }
    617 
    618 define <4 x i32> @fct29(i32* nocapture %sp0, i64 %offset) {
    619 ; CHECK-LABEL: fct29:
    620 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
    621 ; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
    622 entry:
    623   %addr = getelementptr i32, i32* %sp0, i64 %offset
    624   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    625   %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
    626   %vmull.i = mul <4 x i32> %vec, %vec
    627   ret <4 x i32> %vmull.i
    628 }
    629 
    630 define <1 x i64> @fct30(i64* nocapture %sp0, i64 %offset) {
    631 ; CHECK-LABEL: fct30:
    632 ; CHECK: ldr d0, [x0, x1, lsl #3]
    633 entry:
    634   %addr = getelementptr i64, i64* %sp0, i64 %offset
    635   %pix_sp0.0.copyload = load i64, i64* %addr, align 1
    636   %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
    637    ret <1 x i64> %vec
    638 }
    639 
    640 define <2 x i64> @fct31(i64* nocapture %sp0, i64 %offset) {
    641 ; CHECK-LABEL: fct31:
    642 ; CHECK: ldr d0, [x0, x1, lsl #3]
    643 entry:
    644   %addr = getelementptr i64, i64* %sp0, i64 %offset
    645   %pix_sp0.0.copyload = load i64, i64* %addr, align 1
    646   %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
    647   ret <2 x i64> %vec
    648 }
    649