Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
      2 
      3 ; rdar://9428579
      4 
      5 %type1 = type { <16 x i8> }
      6 %type2 = type { <8 x i8> }
      7 %type3 = type { <4 x i16> }
      8 
      9 
     10 define hidden fastcc void @t1(%type1** %argtable) nounwind {
     11 entry:
     12 ; CHECK-LABEL: t1:
     13 ; CHECK: ldr x[[REG:[0-9]+]], [x0]
     14 ; CHECK: str q0, [x[[REG]]]
     15   %tmp1 = load %type1** %argtable, align 8
     16   %tmp2 = getelementptr inbounds %type1* %tmp1, i64 0, i32 0
     17   store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16
     18   ret void
     19 }
     20 
     21 define hidden fastcc void @t2(%type2** %argtable) nounwind {
     22 entry:
     23 ; CHECK-LABEL: t2:
     24 ; CHECK: ldr x[[REG:[0-9]+]], [x0]
     25 ; CHECK: str d0, [x[[REG]]]
     26   %tmp1 = load %type2** %argtable, align 8
     27   %tmp2 = getelementptr inbounds %type2* %tmp1, i64 0, i32 0
     28   store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8
     29   ret void
     30 }
     31 
     32 ; add a bunch of tests for rdar://11246289
     33 
     34 @globalArray64x2 = common global <2 x i64>* null, align 8
     35 @globalArray32x4 = common global <4 x i32>* null, align 8
     36 @globalArray16x8 = common global <8 x i16>* null, align 8
     37 @globalArray8x16 = common global <16 x i8>* null, align 8
     38 @globalArray64x1 = common global <1 x i64>* null, align 8
     39 @globalArray32x2 = common global <2 x i32>* null, align 8
     40 @globalArray16x4 = common global <4 x i16>* null, align 8
     41 @globalArray8x8 = common global <8 x i8>* null, align 8
     42 @floatglobalArray64x2 = common global <2 x double>* null, align 8
     43 @floatglobalArray32x4 = common global <4 x float>* null, align 8
     44 @floatglobalArray64x1 = common global <1 x double>* null, align 8
     45 @floatglobalArray32x2 = common global <2 x float>* null, align 8
     46 
     47 define void @fct1_64x2(<2 x i64>* nocapture %array, i64 %offset) nounwind ssp {
     48 entry:
     49 ; CHECK-LABEL: fct1_64x2:
     50 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
     51 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]
     52 ; CHECK: ldr [[BASE:x[0-9]+]],
     53 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
     54   %arrayidx = getelementptr inbounds <2 x i64>* %array, i64 %offset
     55   %tmp = load <2 x i64>* %arrayidx, align 16
     56   %tmp1 = load <2 x i64>** @globalArray64x2, align 8
     57   %arrayidx1 = getelementptr inbounds <2 x i64>* %tmp1, i64 %offset
     58   store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
     59   ret void
     60 }
     61 
     62 define void @fct2_64x2(<2 x i64>* nocapture %array) nounwind ssp {
     63 entry:
     64 ; CHECK-LABEL: fct2_64x2:
     65 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
     66 ; CHECK: ldr [[BASE:x[0-9]+]],
     67 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
     68   %arrayidx = getelementptr inbounds <2 x i64>* %array, i64 3
     69   %tmp = load <2 x i64>* %arrayidx, align 16
     70   %tmp1 = load <2 x i64>** @globalArray64x2, align 8
     71   %arrayidx1 = getelementptr inbounds <2 x i64>* %tmp1, i64 5
     72   store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
     73   ret void
     74 }
     75 
     76 define void @fct1_32x4(<4 x i32>* nocapture %array, i64 %offset) nounwind ssp {
     77 entry:
     78 ; CHECK-LABEL: fct1_32x4:
     79 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
     80 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
     81 ; CHECK: ldr [[BASE:x[0-9]+]],
     82 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
     83   %arrayidx = getelementptr inbounds <4 x i32>* %array, i64 %offset
     84   %tmp = load <4 x i32>* %arrayidx, align 16
     85   %tmp1 = load <4 x i32>** @globalArray32x4, align 8
     86   %arrayidx1 = getelementptr inbounds <4 x i32>* %tmp1, i64 %offset
     87   store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
     88   ret void
     89 }
     90 
     91 define void @fct2_32x4(<4 x i32>* nocapture %array) nounwind ssp {
     92 entry:
     93 ; CHECK-LABEL: fct2_32x4:
     94 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
     95 ; CHECK: ldr [[BASE:x[0-9]+]],
     96 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
     97   %arrayidx = getelementptr inbounds <4 x i32>* %array, i64 3
     98   %tmp = load <4 x i32>* %arrayidx, align 16
     99   %tmp1 = load <4 x i32>** @globalArray32x4, align 8
    100   %arrayidx1 = getelementptr inbounds <4 x i32>* %tmp1, i64 5
    101   store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
    102   ret void
    103 }
    104 
    105 define void @fct1_16x8(<8 x i16>* nocapture %array, i64 %offset) nounwind ssp {
    106 entry:
    107 ; CHECK-LABEL: fct1_16x8:
    108 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
    109 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
    110 ; CHECK: ldr [[BASE:x[0-9]+]],
    111 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
    112   %arrayidx = getelementptr inbounds <8 x i16>* %array, i64 %offset
    113   %tmp = load <8 x i16>* %arrayidx, align 16
    114   %tmp1 = load <8 x i16>** @globalArray16x8, align 8
    115   %arrayidx1 = getelementptr inbounds <8 x i16>* %tmp1, i64 %offset
    116   store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
    117   ret void
    118 }
    119 
    120 define void @fct2_16x8(<8 x i16>* nocapture %array) nounwind ssp {
    121 entry:
    122 ; CHECK-LABEL: fct2_16x8:
    123 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
    124 ; CHECK: ldr [[BASE:x[0-9]+]],
    125 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
    126   %arrayidx = getelementptr inbounds <8 x i16>* %array, i64 3
    127   %tmp = load <8 x i16>* %arrayidx, align 16
    128   %tmp1 = load <8 x i16>** @globalArray16x8, align 8
    129   %arrayidx1 = getelementptr inbounds <8 x i16>* %tmp1, i64 5
    130   store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
    131   ret void
    132 }
    133 
    134 define void @fct1_8x16(<16 x i8>* nocapture %array, i64 %offset) nounwind ssp {
    135 entry:
    136 ; CHECK-LABEL: fct1_8x16:
    137 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
    138 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
    139 ; CHECK: ldr [[BASE:x[0-9]+]],
    140 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
    141   %arrayidx = getelementptr inbounds <16 x i8>* %array, i64 %offset
    142   %tmp = load <16 x i8>* %arrayidx, align 16
    143   %tmp1 = load <16 x i8>** @globalArray8x16, align 8
    144   %arrayidx1 = getelementptr inbounds <16 x i8>* %tmp1, i64 %offset
    145   store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
    146   ret void
    147 }
    148 
    149 define void @fct2_8x16(<16 x i8>* nocapture %array) nounwind ssp {
    150 entry:
    151 ; CHECK-LABEL: fct2_8x16:
    152 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
    153 ; CHECK: ldr [[BASE:x[0-9]+]],
    154 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
    155   %arrayidx = getelementptr inbounds <16 x i8>* %array, i64 3
    156   %tmp = load <16 x i8>* %arrayidx, align 16
    157   %tmp1 = load <16 x i8>** @globalArray8x16, align 8
    158   %arrayidx1 = getelementptr inbounds <16 x i8>* %tmp1, i64 5
    159   store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
    160   ret void
    161 }
    162 
    163 define void @fct1_64x1(<1 x i64>* nocapture %array, i64 %offset) nounwind ssp {
    164 entry:
    165 ; CHECK-LABEL: fct1_64x1:
    166 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
    167 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
    168 ; CHECK: ldr [[BASE:x[0-9]+]],
    169 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
    170   %arrayidx = getelementptr inbounds <1 x i64>* %array, i64 %offset
    171   %tmp = load <1 x i64>* %arrayidx, align 8
    172   %tmp1 = load <1 x i64>** @globalArray64x1, align 8
    173   %arrayidx1 = getelementptr inbounds <1 x i64>* %tmp1, i64 %offset
    174   store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
    175   ret void
    176 }
    177 
    178 define void @fct2_64x1(<1 x i64>* nocapture %array) nounwind ssp {
    179 entry:
    180 ; CHECK-LABEL: fct2_64x1:
    181 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
    182 ; CHECK: ldr [[BASE:x[0-9]+]],
    183 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
    184   %arrayidx = getelementptr inbounds <1 x i64>* %array, i64 3
    185   %tmp = load <1 x i64>* %arrayidx, align 8
    186   %tmp1 = load <1 x i64>** @globalArray64x1, align 8
    187   %arrayidx1 = getelementptr inbounds <1 x i64>* %tmp1, i64 5
    188   store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
    189   ret void
    190 }
    191 
    192 define void @fct1_32x2(<2 x i32>* nocapture %array, i64 %offset) nounwind ssp {
    193 entry:
    194 ; CHECK-LABEL: fct1_32x2:
    195 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
    196 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
    197 ; CHECK: ldr [[BASE:x[0-9]+]],
    198 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
    199   %arrayidx = getelementptr inbounds <2 x i32>* %array, i64 %offset
    200   %tmp = load <2 x i32>* %arrayidx, align 8
    201   %tmp1 = load <2 x i32>** @globalArray32x2, align 8
    202   %arrayidx1 = getelementptr inbounds <2 x i32>* %tmp1, i64 %offset
    203   store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
    204   ret void
    205 }
    206 
    207 define void @fct2_32x2(<2 x i32>* nocapture %array) nounwind ssp {
    208 entry:
    209 ; CHECK-LABEL: fct2_32x2:
    210 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
    211 ; CHECK: ldr [[BASE:x[0-9]+]],
    212 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
    213   %arrayidx = getelementptr inbounds <2 x i32>* %array, i64 3
    214   %tmp = load <2 x i32>* %arrayidx, align 8
    215   %tmp1 = load <2 x i32>** @globalArray32x2, align 8
    216   %arrayidx1 = getelementptr inbounds <2 x i32>* %tmp1, i64 5
    217   store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
    218   ret void
    219 }
    220 
    221 define void @fct1_16x4(<4 x i16>* nocapture %array, i64 %offset) nounwind ssp {
    222 entry:
    223 ; CHECK-LABEL: fct1_16x4:
    224 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
    225 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
    226 ; CHECK: ldr [[BASE:x[0-9]+]],
    227 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
    228   %arrayidx = getelementptr inbounds <4 x i16>* %array, i64 %offset
    229   %tmp = load <4 x i16>* %arrayidx, align 8
    230   %tmp1 = load <4 x i16>** @globalArray16x4, align 8
    231   %arrayidx1 = getelementptr inbounds <4 x i16>* %tmp1, i64 %offset
    232   store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
    233   ret void
    234 }
    235 
    236 define void @fct2_16x4(<4 x i16>* nocapture %array) nounwind ssp {
    237 entry:
    238 ; CHECK-LABEL: fct2_16x4:
    239 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
    240 ; CHECK: ldr [[BASE:x[0-9]+]],
    241 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
    242   %arrayidx = getelementptr inbounds <4 x i16>* %array, i64 3
    243   %tmp = load <4 x i16>* %arrayidx, align 8
    244   %tmp1 = load <4 x i16>** @globalArray16x4, align 8
    245   %arrayidx1 = getelementptr inbounds <4 x i16>* %tmp1, i64 5
    246   store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
    247   ret void
    248 }
    249 
    250 define void @fct1_8x8(<8 x i8>* nocapture %array, i64 %offset) nounwind ssp {
    251 entry:
    252 ; CHECK-LABEL: fct1_8x8:
    253 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
    254 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
    255 ; CHECK: ldr [[BASE:x[0-9]+]],
    256 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
    257   %arrayidx = getelementptr inbounds <8 x i8>* %array, i64 %offset
    258   %tmp = load <8 x i8>* %arrayidx, align 8
    259   %tmp1 = load <8 x i8>** @globalArray8x8, align 8
    260   %arrayidx1 = getelementptr inbounds <8 x i8>* %tmp1, i64 %offset
    261   store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8
    262   ret void
    263 }
    264 
    265 ; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
    266 ; registers for unscaled vector accesses
    267 @str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1
    268 
    269 define <1 x i64> @fct0() nounwind readonly ssp {
    270 entry:
    271 ; CHECK-LABEL: fct0:
    272 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
    273   %0 = load <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
    274   ret <1 x i64> %0
    275 }
    276 
    277 define <2 x i32> @fct1() nounwind readonly ssp {
    278 entry:
    279 ; CHECK-LABEL: fct1:
    280 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
    281   %0 = load <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
    282   ret <2 x i32> %0
    283 }
    284 
    285 define <4 x i16> @fct2() nounwind readonly ssp {
    286 entry:
    287 ; CHECK-LABEL: fct2:
    288 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
    289   %0 = load <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
    290   ret <4 x i16> %0
    291 }
    292 
    293 define <8 x i8> @fct3() nounwind readonly ssp {
    294 entry:
    295 ; CHECK-LABEL: fct3:
    296 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
    297   %0 = load <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
    298   ret <8 x i8> %0
    299 }
    300 
    301 define <2 x i64> @fct4() nounwind readonly ssp {
    302 entry:
    303 ; CHECK-LABEL: fct4:
    304 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
    305   %0 = load <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
    306   ret <2 x i64> %0
    307 }
    308 
    309 define <4 x i32> @fct5() nounwind readonly ssp {
    310 entry:
    311 ; CHECK-LABEL: fct5:
    312 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
    313   %0 = load <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
    314   ret <4 x i32> %0
    315 }
    316 
    317 define <8 x i16> @fct6() nounwind readonly ssp {
    318 entry:
    319 ; CHECK-LABEL: fct6:
    320 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
    321   %0 = load <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
    322   ret <8 x i16> %0
    323 }
    324 
    325 define <16 x i8> @fct7() nounwind readonly ssp {
    326 entry:
    327 ; CHECK-LABEL: fct7:
    328 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
    329   %0 = load <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
    330   ret <16 x i8> %0
    331 }
    332 
    333 define void @fct8() nounwind ssp {
    334 entry:
    335 ; CHECK-LABEL: fct8:
    336 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
    337 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
    338   %0 = load <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
    339   store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
    340   ret void
    341 }
    342 
    343 define void @fct9() nounwind ssp {
    344 entry:
    345 ; CHECK-LABEL: fct9:
    346 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
    347 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
    348   %0 = load <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
    349   store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
    350   ret void
    351 }
    352 
    353 define void @fct10() nounwind ssp {
    354 entry:
    355 ; CHECK-LABEL: fct10:
    356 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
    357 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
    358   %0 = load <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
    359   store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
    360   ret void
    361 }
    362 
    363 define void @fct11() nounwind ssp {
    364 entry:
    365 ; CHECK-LABEL: fct11:
    366 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
    367 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
    368   %0 = load <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
    369   store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
    370   ret void
    371 }
    372 
    373 define void @fct12() nounwind ssp {
    374 entry:
    375 ; CHECK-LABEL: fct12:
    376 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
    377 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
    378   %0 = load <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
    379   store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
    380   ret void
    381 }
    382 
    383 define void @fct13() nounwind ssp {
    384 entry:
    385 ; CHECK-LABEL: fct13:
    386 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
    387 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
    388   %0 = load <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
    389   store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
    390   ret void
    391 }
    392 
    393 define void @fct14() nounwind ssp {
    394 entry:
    395 ; CHECK-LABEL: fct14:
    396 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
    397 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
    398   %0 = load <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
    399   store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
    400   ret void
    401 }
    402 
    403 define void @fct15() nounwind ssp {
    404 entry:
    405 ; CHECK-LABEL: fct15:
    406 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
    407 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
    408   %0 = load <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
    409   store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
    410   ret void
    411 }
    412 
    413 ; Check the building of vector from a single loaded value.
    414 ; Part of <rdar://problem/14170854>
    415 ;
    416 ; Single loads with immediate offset.
    417 define <8 x i8> @fct16(i8* nocapture %sp0) {
    418 ; CHECK-LABEL: fct16:
    419 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
    420 ; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
    421 entry:
    422   %addr = getelementptr i8* %sp0, i64 1
    423   %pix_sp0.0.copyload = load i8* %addr, align 1
    424   %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
    425   %vmull.i = mul <8 x i8> %vec, %vec
    426   ret <8 x i8> %vmull.i
    427 }
    428 
    429 define <16 x i8> @fct17(i8* nocapture %sp0) {
    430 ; CHECK-LABEL: fct17:
    431 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
    432 ; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
    433 entry:
    434   %addr = getelementptr i8* %sp0, i64 1
    435   %pix_sp0.0.copyload = load i8* %addr, align 1
    436   %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
    437   %vmull.i = mul <16 x i8> %vec, %vec
    438   ret <16 x i8> %vmull.i
    439 }
    440 
    441 define <4 x i16> @fct18(i16* nocapture %sp0) {
    442 ; CHECK-LABEL: fct18:
    443 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
    444 ; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
    445 entry:
    446   %addr = getelementptr i16* %sp0, i64 1
    447   %pix_sp0.0.copyload = load i16* %addr, align 1
    448   %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
    449   %vmull.i = mul <4 x i16> %vec, %vec
    450   ret <4 x i16> %vmull.i
    451 }
    452 
    453 define <8 x i16> @fct19(i16* nocapture %sp0) {
    454 ; CHECK-LABEL: fct19:
    455 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
    456 ; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
    457 entry:
    458   %addr = getelementptr i16* %sp0, i64 1
    459   %pix_sp0.0.copyload = load i16* %addr, align 1
    460   %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
    461   %vmull.i = mul <8 x i16> %vec, %vec
    462   ret <8 x i16> %vmull.i
    463 }
    464 
    465 define <2 x i32> @fct20(i32* nocapture %sp0) {
    466 ; CHECK-LABEL: fct20:
    467 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
    468 ; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
    469 entry:
    470   %addr = getelementptr i32* %sp0, i64 1
    471   %pix_sp0.0.copyload = load i32* %addr, align 1
    472   %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
    473   %vmull.i = mul <2 x i32> %vec, %vec
    474   ret <2 x i32> %vmull.i
    475 }
    476 
    477 define <4 x i32> @fct21(i32* nocapture %sp0) {
    478 ; CHECK-LABEL: fct21:
    479 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
    480 ; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
    481 entry:
    482   %addr = getelementptr i32* %sp0, i64 1
    483   %pix_sp0.0.copyload = load i32* %addr, align 1
    484   %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
    485   %vmull.i = mul <4 x i32> %vec, %vec
    486   ret <4 x i32> %vmull.i
    487 }
    488 
    489 define <1 x i64> @fct22(i64* nocapture %sp0) {
    490 ; CHECK-LABEL: fct22:
    491 ; CHECK: ldr d0, [x0, #8]
    492 entry:
    493   %addr = getelementptr i64* %sp0, i64 1
    494   %pix_sp0.0.copyload = load i64* %addr, align 1
    495   %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
    496    ret <1 x i64> %vec
    497 }
    498 
    499 define <2 x i64> @fct23(i64* nocapture %sp0) {
    500 ; CHECK-LABEL: fct23:
    501 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
    502 entry:
    503   %addr = getelementptr i64* %sp0, i64 1
    504   %pix_sp0.0.copyload = load i64* %addr, align 1
    505   %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
    506   ret <2 x i64> %vec
    507 }
    508 
    509 ;
    510 ; Single loads with register offset.
    511 define <8 x i8> @fct24(i8* nocapture %sp0, i64 %offset) {
    512 ; CHECK-LABEL: fct24:
    513 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
    514 ; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
    515 entry:
    516   %addr = getelementptr i8* %sp0, i64 %offset
    517   %pix_sp0.0.copyload = load i8* %addr, align 1
    518   %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
    519   %vmull.i = mul <8 x i8> %vec, %vec
    520   ret <8 x i8> %vmull.i
    521 }
    522 
    523 define <16 x i8> @fct25(i8* nocapture %sp0, i64 %offset) {
    524 ; CHECK-LABEL: fct25:
    525 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
    526 ; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
    527 entry:
    528   %addr = getelementptr i8* %sp0, i64 %offset
    529   %pix_sp0.0.copyload = load i8* %addr, align 1
    530   %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
    531   %vmull.i = mul <16 x i8> %vec, %vec
    532   ret <16 x i8> %vmull.i
    533 }
    534 
    535 define <4 x i16> @fct26(i16* nocapture %sp0, i64 %offset) {
    536 ; CHECK-LABEL: fct26:
    537 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
    538 ; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
    539 entry:
    540   %addr = getelementptr i16* %sp0, i64 %offset
    541   %pix_sp0.0.copyload = load i16* %addr, align 1
    542   %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
    543   %vmull.i = mul <4 x i16> %vec, %vec
    544   ret <4 x i16> %vmull.i
    545 }
    546 
    547 define <8 x i16> @fct27(i16* nocapture %sp0, i64 %offset) {
    548 ; CHECK-LABEL: fct27:
    549 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
    550 ; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
    551 entry:
    552   %addr = getelementptr i16* %sp0, i64 %offset
    553   %pix_sp0.0.copyload = load i16* %addr, align 1
    554   %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
    555   %vmull.i = mul <8 x i16> %vec, %vec
    556   ret <8 x i16> %vmull.i
    557 }
    558 
    559 define <2 x i32> @fct28(i32* nocapture %sp0, i64 %offset) {
    560 ; CHECK-LABEL: fct28:
    561 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
    562 ; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
    563 entry:
    564   %addr = getelementptr i32* %sp0, i64 %offset
    565   %pix_sp0.0.copyload = load i32* %addr, align 1
    566   %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
    567   %vmull.i = mul <2 x i32> %vec, %vec
    568   ret <2 x i32> %vmull.i
    569 }
    570 
    571 define <4 x i32> @fct29(i32* nocapture %sp0, i64 %offset) {
    572 ; CHECK-LABEL: fct29:
    573 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
    574 ; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
    575 entry:
    576   %addr = getelementptr i32* %sp0, i64 %offset
    577   %pix_sp0.0.copyload = load i32* %addr, align 1
    578   %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
    579   %vmull.i = mul <4 x i32> %vec, %vec
    580   ret <4 x i32> %vmull.i
    581 }
    582 
    583 define <1 x i64> @fct30(i64* nocapture %sp0, i64 %offset) {
    584 ; CHECK-LABEL: fct30:
    585 ; CHECK: ldr d0, [x0, x1, lsl #3]
    586 entry:
    587   %addr = getelementptr i64* %sp0, i64 %offset
    588   %pix_sp0.0.copyload = load i64* %addr, align 1
    589   %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
    590    ret <1 x i64> %vec
    591 }
    592 
    593 define <2 x i64> @fct31(i64* nocapture %sp0, i64 %offset) {
    594 ; CHECK-LABEL: fct31:
    595 ; CHECK: ldr d0, [x0, x1, lsl #3]
    596 entry:
    597   %addr = getelementptr i64* %sp0, i64 %offset
    598   %pix_sp0.0.copyload = load i64* %addr, align 1
    599   %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
    600   ret <2 x i64> %vec
    601 }
    602