Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -o - %s | FileCheck %s
      2 
      3 %struct = type { i8, i64, i8, double, i8, <2 x float>, i8, <4 x float> }
      4 
      5 define i32 @test_i64_align() {
      6 ; CHECK-LABEL: test_i64_align:
      7 ; CHECL: movs r0, #8
      8   ret i32 ptrtoint(i64* getelementptr(%struct, %struct* null, i32 0, i32 1) to i32)
      9 }
     10 
     11 define i32 @test_f64_align() {
     12 ; CHECK-LABEL: test_f64_align:
     13 ; CHECL: movs r0, #24
     14   ret i32 ptrtoint(double* getelementptr(%struct, %struct* null, i32 0, i32 3) to i32)
     15 }
     16 
     17 define i32 @test_v2f32_align() {
     18 ; CHECK-LABEL: test_v2f32_align:
     19 ; CHECL: movs r0, #40
     20   ret i32 ptrtoint(<2 x float>* getelementptr(%struct, %struct* null, i32 0, i32 5) to i32)
     21 }
     22 
     23 define i32 @test_v4f32_align() {
     24 ; CHECK-LABEL: test_v4f32_align:
     25 ; CHECL: movs r0, #64
     26   ret i32 ptrtoint(<4 x float>* getelementptr(%struct, %struct* null, i32 0, i32 7) to i32)
     27 }
     28 
     29 ; Key point here is than an extra register has to be saved so that the DPRs end
     30 ; up in an aligned location (as prologue/epilogue inserter had calculated).
     31 define void @test_dpr_unwind_align() {
     32 ; CHECK-LABEL: test_dpr_unwind_align:
     33 ; CHECK: push {r5, r6, r7, lr}
     34 ; CHECK-NOT: sub sp
     35 ; CHECK: vpush {d8, d9}
     36 ; CHECK: .cfi_offset d9, -24
     37 ; CHECK: .cfi_offset d8, -32
     38 ; [...]
     39 ; CHECK: bl _test_i64_align
     40 ; CHECK-NOT: add sp,
     41 ; CHECK: vpop {d8, d9}
     42 ; CHECK-NOT: add sp,
     43 ; CHECK: pop {r5, r6, r7, pc}
     44 
     45   call void asm sideeffect "", "~{r6},~{d8},~{d9}"()
     46 
     47   ; Whatever
     48   call i32 @test_i64_align()
     49   ret void
     50 }
     51 
     52 ; This time, there's no viable way to tack CS-registers onto the list: a real SP
     53 ; adjustment needs to be performed to put d8 and d9 where they should be.
     54 define void @test_dpr_unwind_align_manually() {
     55 ; CHECK-LABEL: test_dpr_unwind_align_manually:
     56 ; CHECK: push {r4, r5, r6, r7, lr}
     57 ; CHECK-NOT: sub sp
     58 ; CHECK: push.w {r8, r11}
     59 ; CHECK: sub sp, #4
     60 ; CHECK: vpush {d8, d9}
     61 ; CHECK: .cfi_offset d9, -40
     62 ; CHECK: .cfi_offset d8, -48
     63 ; [...]
     64 ; CHECK: bl _test_i64_align
     65 ; CHECK-NOT: add sp,
     66 ; CHECK: vpop {d8, d9}
     67 ; CHECK: add sp, #4
     68 ; CHECK: pop.w {r8, r11}
     69 ; CHECK: pop {r4, r5, r6, r7, pc}
     70 
     71   call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{d8},~{d9}"()
     72 
     73   ; Whatever
     74   call i32 @test_i64_align()
     75   ret void
     76 }
     77 
     78 ; If there's only a CS1 area, the sub should be in the right place:
     79 define void @test_dpr_unwind_align_just_cs1() {
     80 ; CHECK-LABEL: test_dpr_unwind_align_just_cs1:
     81 ; CHECK: push {r4, r5, r6, r7, lr}
     82 ; CHECK: sub sp, #4
     83 ; CHECK: vpush {d8, d9}
     84 ; CHECK: .cfi_offset d9, -32
     85 ; CHECK: .cfi_offset d8, -40
     86 ; CHECK: sub sp, #8
     87 ; [...]
     88 ; CHECK: bl _test_i64_align
     89 ; CHECK: add sp, #8
     90 ; CHECK: vpop {d8, d9}
     91 ; CHECK: add sp, #4
     92 ; CHECK: pop {r4, r5, r6, r7, pc}
     93 
     94   call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{d8},~{d9}"()
     95 
     96   ; Whatever
     97   call i32 @test_i64_align()
     98   ret void
     99 }
    100 
    101 ; If there are no DPRs, we shouldn't try to align the stack in stages anyway
    102 define void @test_dpr_unwind_align_no_dprs() {
    103 ; CHECK-LABEL: test_dpr_unwind_align_no_dprs:
    104 ; CHECK: push {r4, r5, r6, r7, lr}
    105 ; CHECK: sub sp, #12
    106 ; [...]
    107 ; CHECK: bl _test_i64_align
    108 ; CHECK: add sp, #12
    109 ; CHECK: pop {r4, r5, r6, r7, pc}
    110 
    111   call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7}"()
    112 
    113   ; Whatever
    114   call i32 @test_i64_align()
    115   ret void
    116 }
    117 
    118 ; 128-bit vectors should use 128-bit (i.e. correctly aligned) slots on
    119 ; the stack.
    120 define <4 x float> @test_v128_stack_pass([8 x double], float, <4 x float> %in) {
    121 ; CHECK-LABEL: test_v128_stack_pass:
    122 ; CHECK: add r[[ADDR:[0-9]+]], sp, #16
    123 ; CHECK: vld1.64 {d0, d1}, [r[[ADDR]]:128]
    124 
    125   ret <4 x float> %in
    126 }
    127 
    128 declare void @varargs(i32, ...)
    129 
    130 ; When varargs are enabled, we go down a different route. Still want 128-bit
    131 ; alignment though.
    132 define void @test_v128_stack_pass_varargs(<4 x float> %in) {
    133 ; CHECK-LABEL: test_v128_stack_pass_varargs:
    134 ; CHECK: add r[[ADDR:[0-9]+]], sp, #16
    135 ; CHECK: vst1.64 {d0, d1}, [r[[ADDR]]:128]
    136 
    137   call void(i32, ...) @varargs(i32 undef, [3 x i32] undef, float undef, <4 x float> %in)
    138   ret void
    139 }
    140 
    141 ; To be compatible with AAPCS's va_start model (store r0-r3 at incoming SP, give
    142 ; a single pointer), 64-bit quantities must be pass
    143 define i64 @test_64bit_gpr_align(i32, i64 %r2_r3, i32 %sp) {
    144 ; CHECK-LABEL: test_64bit_gpr_align:
    145 ; CHECK: ldr [[RHS:r[0-9]+]], [sp]
    146 ; CHECK: adds r0, [[RHS]], r2
    147 ; CHECK: adc r1, r3, #0
    148 
    149   %ext = zext i32 %sp to i64
    150   %sum = add i64 %ext, %r2_r3
    151   ret i64 %sum
    152 }
    153