Home | History | Annotate | Download | only in ARM
      1 ; RUN: opt -codegenprepare -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon -S | FileCheck --check-prefix=IR-BOTH --check-prefix=IR-NORMAL %s
      2 ; RUN: opt -codegenprepare -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon -S -stress-cgp-store-extract | FileCheck --check-prefix=IR-BOTH --check-prefix=IR-STRESS %s
      3 ; RUN: llc -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon | FileCheck --check-prefix=ASM %s
      4 
      5 ; IR-BOTH-LABEL: @simpleOneInstructionPromotion
      6 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
      7 ; IR-BOTH-NEXT: [[VECTOR_OR:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], <i32 undef, i32 1>
      8 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[VECTOR_OR]], i32 1
      9 ; IR-BOTH-NEXT: store i32 [[EXTRACT]], i32* %dest
     10 ; IR-BOTH-NEXT: ret
     11 ;
     12 ; Make sure we got rid of any expensive vmov.32 instructions.
     13 ; ASM-LABEL: simpleOneInstructionPromotion:
     14 ; ASM: vldr [[LOAD:d[0-9]+]], [r0]
     15 ; ASM-NEXT: vorr.i32 [[LOAD]], #0x1
     16 ; ASM-NEXT: vst1.32 {[[LOAD]][1]}, [r1:32]
     17 ; ASM-NEXT: bx
     18 define void @simpleOneInstructionPromotion(<2 x i32>* %addr1, i32* %dest) {
     19   %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
     20   %extract = extractelement <2 x i32> %in1, i32 1
     21   %out = or i32 %extract, 1
     22   store i32 %out, i32* %dest, align 4
     23   ret void
     24 }
     25 
     26 ; IR-BOTH-LABEL: @unsupportedInstructionForPromotion
     27 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
     28 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 0
     29 ; IR-BOTH-NEXT: [[CMP:%[a-zA-Z_0-9-]+]] = icmp eq i32 [[EXTRACT]], %in2
     30 ; IR-BOTH-NEXT: store i1 [[CMP]], i1* %dest
     31 ; IR-BOTH-NEXT: ret
     32 ;
     33 ; ASM-LABEL: unsupportedInstructionForPromotion:
     34 ; ASM: vldr [[LOAD:d[0-9]+]], [r0]
     35 ; ASM: vmov.32 {{r[0-9]+}}, [[LOAD]]
     36 ; ASM: bx
     37 define void @unsupportedInstructionForPromotion(<2 x i32>* %addr1, i32 %in2, i1* %dest) {
     38   %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
     39   %extract = extractelement <2 x i32> %in1, i32 0
     40   %out = icmp eq i32 %extract, %in2
     41   store i1 %out, i1* %dest, align 4
     42   ret void
     43 }
     44 
     45 
     46 ; IR-BOTH-LABEL: @unsupportedChainInDifferentBBs
     47 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
     48 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 0
     49 ; IR-BOTH-NEXT: br i1 %bool, label %bb2, label %end
     50 ; BB2
     51 ; IR-BOTH: [[OR:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1
     52 ; IR-BOTH-NEXT: store i32 [[OR]], i32* %dest, align 4
     53 ; IR-BOTH: ret
     54 ;
     55 ; ASM-LABEL: unsupportedChainInDifferentBBs:
     56 ; ASM: vldrne [[LOAD:d[0-9]+]], [r0]
     57 ; ASM: vmovne.32 {{r[0-9]+}}, [[LOAD]]
     58 ; ASM: bx
     59 define void @unsupportedChainInDifferentBBs(<2 x i32>* %addr1, i32* %dest, i1 %bool) {
     60 bb1:
     61   %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
     62   %extract = extractelement <2 x i32> %in1, i32 0
     63   br i1 %bool, label %bb2, label %end
     64 bb2: 
     65   %out = or i32 %extract, 1
     66   store i32 %out, i32* %dest, align 4
     67   br label %end
     68 end:
     69   ret void
     70 }
     71 
     72 ; IR-LABEL: @chainOfInstructionsToPromote
     73 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
     74 ; IR-BOTH-NEXT: [[VECTOR_OR1:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], <i32 1, i32 undef>
     75 ; IR-BOTH-NEXT: [[VECTOR_OR2:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR1]], <i32 1, i32 undef>
     76 ; IR-BOTH-NEXT: [[VECTOR_OR3:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR2]], <i32 1, i32 undef>
     77 ; IR-BOTH-NEXT: [[VECTOR_OR4:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR3]], <i32 1, i32 undef>
     78 ; IR-BOTH-NEXT: [[VECTOR_OR5:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR4]], <i32 1, i32 undef>
     79 ; IR-BOTH-NEXT: [[VECTOR_OR6:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR5]], <i32 1, i32 undef>
     80 ; IR-BOTH-NEXT: [[VECTOR_OR7:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR6]], <i32 1, i32 undef>
     81 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[VECTOR_OR7]], i32 0
     82 ; IR-BOTH-NEXT: store i32 [[EXTRACT]], i32* %dest
     83 ; IR-BOTH-NEXT: ret
     84 ;
     85 ; ASM-LABEL: chainOfInstructionsToPromote:
     86 ; ASM: vldr [[LOAD:d[0-9]+]], [r0]
     87 ; ASM-NOT: vmov.32 {{r[0-9]+}}, [[LOAD]]
     88 ; ASM: bx
     89 define void @chainOfInstructionsToPromote(<2 x i32>* %addr1, i32* %dest) {
     90   %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
     91   %extract = extractelement <2 x i32> %in1, i32 0
     92   %out1 = or i32 %extract, 1
     93   %out2 = or i32 %out1, 1
     94   %out3 = or i32 %out2, 1
     95   %out4 = or i32 %out3, 1
     96   %out5 = or i32 %out4, 1
     97   %out6 = or i32 %out5, 1
     98   %out7 = or i32 %out6, 1
     99   store i32 %out7, i32* %dest, align 4
    100   ret void
    101 }
    102 
    103 ; IR-BOTH-LABEL: @unsupportedMultiUses
    104 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
    105 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
    106 ; IR-BOTH-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1
    107 ; IR-BOTH-NEXT: store i32 [[OR]], i32* %dest
    108 ; IR-BOTH-NEXT: ret i32 [[OR]]
    109 ;
    110 ; ASM-LABEL: unsupportedMultiUses:
    111 ; ASM: vldr [[LOAD:d[0-9]+]], [r0]
    112 ; ASM: vmov.32 {{r[0-9]+}}, [[LOAD]]
    113 ; ASM: bx
    114 define i32 @unsupportedMultiUses(<2 x i32>* %addr1, i32* %dest) {
    115   %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
    116   %extract = extractelement <2 x i32> %in1, i32 1
    117   %out = or i32 %extract, 1
    118   store i32 %out, i32* %dest, align 4
    119   ret i32 %out
    120 }
    121 
    122 ; Check that we promote we a splat constant when this is a division.
    123 ; The NORMAL mode does not promote anything as divisions are not legal.
    124 ; IR-BOTH-LABEL: @udivCase
    125 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
    126 ; Scalar version:
    127 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
    128 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = udiv i32 [[EXTRACT]], 7
    129 ; Vector version:
    130 ; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = udiv <2 x i32> [[LOAD]], <i32 7, i32 7>
    131 ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1
    132 ;
    133 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
    134 ; IR-BOTH-NEXT: ret
    135 define void @udivCase(<2 x i32>* %addr1, i32* %dest) {
    136   %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
    137   %extract = extractelement <2 x i32> %in1, i32 1
    138   %out = udiv i32 %extract, 7
    139   store i32 %out, i32* %dest, align 4
    140   ret void
    141 }
    142 
    143 ; IR-BOTH-LABEL: @uremCase
    144 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
    145 ; Scalar version:
    146 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
    147 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = urem i32 [[EXTRACT]], 7
    148 ; Vector version:
    149 ; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = urem <2 x i32> [[LOAD]], <i32 7, i32 7>
    150 ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1
    151 ;
    152 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
    153 ; IR-BOTH-NEXT: ret 
    154 define void @uremCase(<2 x i32>* %addr1, i32* %dest) {
    155   %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
    156   %extract = extractelement <2 x i32> %in1, i32 1
    157   %out = urem i32 %extract, 7
    158   store i32 %out, i32* %dest, align 4
    159   ret void
    160 }
    161 
    162 ; IR-BOTH-LABEL: @sdivCase
    163 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
    164 ; Scalar version:
    165 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
    166 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sdiv i32 [[EXTRACT]], 7
    167 ; Vector version:
    168 ; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = sdiv <2 x i32> [[LOAD]], <i32 7, i32 7>
    169 ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1
    170 ;
    171 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
    172 ; IR-BOTH-NEXT: ret 
    173 define void @sdivCase(<2 x i32>* %addr1, i32* %dest) {
    174   %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
    175   %extract = extractelement <2 x i32> %in1, i32 1
    176   %out = sdiv i32 %extract, 7
    177   store i32 %out, i32* %dest, align 4
    178   ret void
    179 }
    180 
    181 ; IR-BOTH-LABEL: @sremCase
    182 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
    183 ; Scalar version:
    184 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
    185 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = srem i32 [[EXTRACT]], 7
    186 ; Vector version:
    187 ; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = srem <2 x i32> [[LOAD]], <i32 7, i32 7>
    188 ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1
    189 ;
    190 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
    191 ; IR-BOTH-NEXT: ret 
    192 define void @sremCase(<2 x i32>* %addr1, i32* %dest) {
    193   %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
    194   %extract = extractelement <2 x i32> %in1, i32 1
    195   %out = srem i32 %extract, 7
    196   store i32 %out, i32* %dest, align 4
    197   ret void
    198 }
    199 
    200 ; IR-BOTH-LABEL: @fdivCase
    201 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
    202 ; Scalar version:  
    203 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
    204 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = fdiv float [[EXTRACT]], 7.0
    205 ; Vector version:
    206 ; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = fdiv <2 x float> [[LOAD]], <float 7.000000e+00, float 7.000000e+00>
    207 ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1
    208 ;
    209 ; IR-BOTH-NEXT: store float [[RES]], float* %dest
    210 ; IR-BOTH-NEXT: ret
    211 define void @fdivCase(<2 x float>* %addr1, float* %dest) {
    212   %in1 = load <2 x float>, <2 x float>* %addr1, align 8   
    213   %extract = extractelement <2 x float> %in1, i32 1
    214   %out = fdiv float %extract, 7.0
    215   store float %out, float* %dest, align 4
    216   ret void
    217 }
    218 
    219 ; IR-BOTH-LABEL: @fremCase
    220 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
    221 ; Scalar version:  
    222 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
    223 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem float [[EXTRACT]], 7.0
    224 ; Vector version:
    225 ; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = frem <2 x float> [[LOAD]], <float 7.000000e+00, float 7.000000e+00>
    226 ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1
    227 ;
    228 ; IR-BOTH-NEXT: store float [[RES]], float* %dest
    229 ; IR-BOTH-NEXT: ret
    230 define void @fremCase(<2 x float>* %addr1, float* %dest) {
    231   %in1 = load <2 x float>, <2 x float>* %addr1, align 8   
    232   %extract = extractelement <2 x float> %in1, i32 1
    233   %out = frem float %extract, 7.0
    234   store float %out, float* %dest, align 4
    235   ret void
    236 }
    237 
    238 ; Check that we do not promote when we may introduce undefined behavior
    239 ; like division by zero.
    240 ; IR-BOTH-LABEL: @undefDivCase
    241 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
    242 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
    243 ; IR-BOTH-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = udiv i32 7, [[EXTRACT]]
    244 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
    245 ; IR-BOTH-NEXT: ret
    246 define void @undefDivCase(<2 x i32>* %addr1, i32* %dest) {
    247   %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
    248   %extract = extractelement <2 x i32> %in1, i32 1
    249   %out = udiv i32 7, %extract
    250   store i32 %out, i32* %dest, align 4
    251   ret void
    252 }
    253 
    254 
    255 ; Check that we do not promote when we may introduce undefined behavior
    256 ; like division by zero.
    257 ; IR-BOTH-LABEL: @undefRemCase
    258 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
    259 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
    260 ; IR-BOTH-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = srem i32 7, [[EXTRACT]]
    261 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
    262 ; IR-BOTH-NEXT: ret
    263 define void @undefRemCase(<2 x i32>* %addr1, i32* %dest) {
    264   %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
    265   %extract = extractelement <2 x i32> %in1, i32 1
    266   %out = srem i32 7, %extract
    267   store i32 %out, i32* %dest, align 4
    268   ret void
    269 }
    270 
    271 ; Check that we use an undef mask for undefined behavior if the fast-math
    272 ; flag is set.
    273 ; IR-BOTH-LABEL: @undefConstantFRemCaseWithFastMath
    274 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
    275 ; Scalar version:  
    276 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
    277 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem nnan float [[EXTRACT]], 7.0
    278 ; Vector version:
    279 ; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = frem nnan <2 x float> [[LOAD]], <float undef, float 7.000000e+00>
    280 ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1
    281 ;
    282 ; IR-BOTH-NEXT: store float [[RES]], float* %dest
    283 ; IR-BOTH-NEXT: ret
    284 define void @undefConstantFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest) {
    285   %in1 = load <2 x float>, <2 x float>* %addr1, align 8   
    286   %extract = extractelement <2 x float> %in1, i32 1
    287   %out = frem nnan float %extract, 7.0
    288   store float %out, float* %dest, align 4
    289   ret void
    290 }
    291 
    292 ; Check that we use an undef mask for undefined behavior if the fast-math
    293 ; flag is set.
    294 ; IR-BOTH-LABEL: @undefVectorFRemCaseWithFastMath
    295 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
    296 ; Scalar version:  
    297 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
    298 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem nnan float 7.000000e+00, [[EXTRACT]]
    299 ; Vector version:
    300 ; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = frem nnan <2 x float> <float undef, float 7.000000e+00>, [[LOAD]]
    301 ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1
    302 ;
    303 ; IR-BOTH-NEXT: store float [[RES]], float* %dest
    304 ; IR-BOTH-NEXT: ret
    305 define void @undefVectorFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest) {
    306   %in1 = load <2 x float>, <2 x float>* %addr1, align 8   
    307   %extract = extractelement <2 x float> %in1, i32 1
    308   %out = frem nnan float 7.0, %extract
    309   store float %out, float* %dest, align 4
    310   ret void
    311 }
    312 
    313 ; Check that we are able to promote floating point value.
    314 ; This requires the STRESS mode, as floating point value are
    315 ; not promote on armv7.
    316 ; IR-BOTH-LABEL: @simpleOneInstructionPromotionFloat
    317 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
    318 ; Scalar version: 
    319 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
    320 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = fadd float [[EXTRACT]], 1.0
    321 ; Vector version:
    322 ; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = fadd <2 x float> [[LOAD]], <float undef, float 1.000000e+00>
    323 ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1
    324 ;
    325 ; IR-BOTH-NEXT: store float [[RES]], float* %dest
    326 ; IR-BOTH-NEXT: ret
    327 define void @simpleOneInstructionPromotionFloat(<2 x float>* %addr1, float* %dest) {
    328   %in1 = load <2 x float>, <2 x float>* %addr1, align 8
    329   %extract = extractelement <2 x float> %in1, i32 1
    330   %out = fadd float %extract, 1.0
    331   store float %out, float* %dest, align 4
    332   ret void
    333 }
    334 
    335 ; Check that we correctly use a splat constant when we cannot
    336 ; determine at compile time the index of the extract.
    337 ; This requires the STRESS modes, as variable index are expensive
    338 ; to lower.
    339 ; IR-BOTH-LABEL: @simpleOneInstructionPromotionVariableIdx
    340 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
    341 ; Scalar version:
    342 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 %idx
    343 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1
    344 ; Vector version:
    345 ; IR-STRESS-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], <i32 1, i32 1>
    346 ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[OR]], i32 %idx
    347 ;
    348 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
    349 ; IR-BOTH-NEXT: ret
    350 define void @simpleOneInstructionPromotionVariableIdx(<2 x i32>* %addr1, i32* %dest, i32 %idx) {
    351   %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
    352   %extract = extractelement <2 x i32> %in1, i32 %idx
    353   %out = or i32 %extract, 1
    354   store i32 %out, i32* %dest, align 4
    355   ret void
    356 }
    357 
    358 ; Check a vector with more than 2 elements.
    359 ; This requires the STRESS mode because currently 'or v8i8' is not marked
    360 ; as legal or custom, althought the actual assembly is better if we were
    361 ; promoting it.
    362 ; IR-BOTH-LABEL: @simpleOneInstructionPromotion8x8
    363 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <8 x i8>, <8 x i8>* %addr1
    364 ; Scalar version:  
    365 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <8 x i8> [[LOAD]], i32 1
    366 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = or i8 [[EXTRACT]], 1
    367 ; Vector version:  
    368 ; IR-STRESS-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or <8 x i8> [[LOAD]], <i8 undef, i8 1, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>
    369 ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <8 x i8> [[OR]], i32 1
    370 ;
    371 ; IR-BOTH-NEXT: store i8 [[RES]], i8* %dest
    372 ; IR-BOTH-NEXT: ret
    373 define void @simpleOneInstructionPromotion8x8(<8 x i8>* %addr1, i8* %dest) {
    374   %in1 = load <8 x i8>, <8 x i8>* %addr1, align 8
    375   %extract = extractelement <8 x i8> %in1, i32 1
    376   %out = or i8 %extract, 1
    377   store i8 %out, i8* %dest, align 4
    378   ret void
    379 }
    380 
    381 ; Check that we optimized the sequence correctly when it can be
    382 ; lowered on a Q register.
    383 ; IR-BOTH-LABEL: @simpleOneInstructionPromotion
    384 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <4 x i32>, <4 x i32>* %addr1
    385 ; IR-BOTH-NEXT: [[VECTOR_OR:%[a-zA-Z_0-9-]+]] = or <4 x i32> [[LOAD]], <i32 undef, i32 1, i32 undef, i32 undef>
    386 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <4 x i32> [[VECTOR_OR]], i32 1
    387 ; IR-BOTH-NEXT: store i32 [[EXTRACT]], i32* %dest
    388 ; IR-BOTH-NEXT: ret
    389 ;
    390 ; Make sure we got rid of any expensive vmov.32 instructions.
    391 ; ASM-LABEL: simpleOneInstructionPromotion4x32:
    392 ; ASM: vld1.64 {[[LOAD:d[0-9]+]], d{{[0-9]+}}}, [r0]
    393 ; The Q register used here must be [[LOAD]] / 2, but we cannot express that.
    394 ; ASM-NEXT: vorr.i32 q{{[[0-9]+}}, #0x1
    395 ; ASM-NEXT: vst1.32 {[[LOAD]][1]}, [r1]
    396 ; ASM-NEXT: bx
    397 define void @simpleOneInstructionPromotion4x32(<4 x i32>* %addr1, i32* %dest) {
    398   %in1 = load <4 x i32>, <4 x i32>* %addr1, align 8
    399   %extract = extractelement <4 x i32> %in1, i32 1
    400   %out = or i32 %extract, 1
    401   store i32 %out, i32* %dest, align 1
    402   ret void
    403 }
    404