Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc -mtriple thumbv7-apple-ios -verify-machineinstrs -o - %s | FileCheck %s
      2 
      3 ; ARM load store optimizer was dealing with a sequence like:
      4 ;     s1 = VLDRS [r0, 1], Q0<imp-def>
      5 ;     s3 = VLDRS [r0, 2], Q0<imp-use,kill>, Q0<imp-def>
      6 ;     s0 = VLDRS [r0, 0], Q0<imp-use,kill>, Q0<imp-def>
      7 ;     s2 = VLDRS [r0, 4], Q0<imp-use,kill>, Q0<imp-def>
      8 ;
      9 ; It decided to combine the {s0, s1} loads into a single instruction in the
     10 ; third position. However, this leaves the instruction defining s3 with a stray
     11 ; imp-use of Q0, which is undefined.
     12 ;
     13 ; The verifier catches this, so this test just makes sure that appropriate
     14 ; liveness flags are added.
     15 ;
     16 ; I believe the change will be tested as long as the vldmia is not the first of
     17 ; the loads. Earlier optimisations may perturb the output over time, but
     18 ; fiddling the indices should be sufficient to restore the test.
     19 
     20 define arm_aapcs_vfpcc <4 x float> @foo(float* %ptr) {
     21 ; CHECK-LABEL: foo:
     22 ; CHECK: vldr s3, [r0, #8]
     23 ; CHECK: vldmia r0, {s0, s1}
     24 ; CHECK: vldr s2, [r0, #16]
     25    %off0 = getelementptr float* %ptr, i32 0
     26    %val0 = load float* %off0
     27    %off1 = getelementptr float* %ptr, i32 1
     28    %val1 = load float* %off1
     29    %off4 = getelementptr float* %ptr, i32 4
     30    %val4 = load float* %off4
     31    %off2 = getelementptr float* %ptr, i32 2
     32    %val2 = load float* %off2
     33 
     34    %vec1 = insertelement <4 x float> undef, float %val0, i32 0
     35    %vec2 = insertelement <4 x float> %vec1, float %val1, i32 1
     36    %vec3 = insertelement <4 x float> %vec2, float %val4, i32 2
     37    %vec4 = insertelement <4 x float> %vec3, float %val2, i32 3
     38 
     39    ret <4 x float> %vec4
     40 }
     41