1 ; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=0 | FileCheck %s 2 ; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=1 | FileCheck %s --check-prefix=NEON 3 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" 4 target triple = "thumbv7-apple-ios" 5 6 ; CHECK: f 7 ; This function is forced to spill a double. 8 ; Verify that the spill slot is properly aligned. 9 ; 10 ; The caller-saved r4 is used as a scratch register for stack realignment. 11 ; CHECK: push {r4, r7, lr} 12 ; CHECK: bic r4, r4, #7 13 ; CHECK: mov sp, r4 14 define void @f(double* nocapture %p) nounwind ssp { 15 entry: 16 %0 = load double* %p, align 4 17 tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind 18 tail call void @g() nounwind 19 store double %0, double* %p, align 4 20 ret void 21 } 22 23 ; NEON: f 24 ; NEON: push {r4, r7, lr} 25 ; NEON: sub.w r4, sp, #64 26 ; NEON: bic r4, r4, #15 27 ; Stack pointer must be updated before the spills. 28 ; NEON: mov sp, r4 29 ; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]! 30 ; NEON: vst1.64 {d12, d13, d14, d15}, [r4:128] 31 ; Stack pointer adjustment for the stack frame contents. 32 ; This could legally happen before the spills. 33 ; Since the spill slot is only 8 bytes, technically it would be fine to only 34 ; subtract #8 here. That would leave sp less aligned than some stack slots, 35 ; and would probably blow MFI's mind. 36 ; NEON: sub sp, #16 37 ; The epilog is free to use another scratch register than r4. 38 ; NEON: add r[[R4:[0-9]+]], sp, #16 39 ; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]]:128]! 40 ; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]]:128] 41 ; The stack pointer restore must happen after the reloads. 42 ; NEON: mov sp, 43 ; NEON: pop 44 45 declare void @g() 46 47 ; Spill 7 d-registers. 48 define void @f7(double* nocapture %p) nounwind ssp { 49 entry: 50 tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14}"() nounwind 51 ret void 52 } 53 54 ; NEON: f7 55 ; NEON: push {r4, r7, lr} 56 ; NEON: sub.w r4, sp, #56 57 ; NEON: bic r4, r4, #15 58 ; Stack pointer must be updated before the spills. 59 ; NEON: mov sp, r4 60 ; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]! 61 ; NEON: vst1.64 {d12, d13}, [r4:128] 62 ; NEON: vstr d14, [r4, #16] 63 ; Epilog 64 ; NEON: vld1.64 {d8, d9, d10, d11}, 65 ; NEON: vld1.64 {d12, d13}, 66 ; NEON: vldr d14, 67 ; The stack pointer restore must happen after the reloads. 68 ; NEON: mov sp, 69 ; NEON: pop 70 71 ; Spill 7 d-registers, leave a hole. 72 define void @f3plus4(double* nocapture %p) nounwind ssp { 73 entry: 74 tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d12},~{d13},~{d14},~{d15}"() nounwind 75 ret void 76 } 77 78 ; Aligned spilling only works for contiguous ranges starting from d8. 79 ; The rest goes to the standard vpush instructions. 80 ; NEON: f3plus4 81 ; NEON: push {r4, r7, lr} 82 ; NEON: vpush {d12, d13, d14, d15} 83 ; NEON: sub.w r4, sp, #24 84 ; NEON: bic r4, r4, #15 85 ; Stack pointer must be updated before the spills. 86 ; NEON: mov sp, r4 87 ; NEON: vst1.64 {d8, d9}, [r4:128] 88 ; NEON: vstr d10, [r4, #16] 89 ; Epilog 90 ; NEON: vld1.64 {d8, d9}, 91 ; NEON: vldr d10, [{{.*}}, #16] 92 ; The stack pointer restore must happen after the reloads. 93 ; NEON: mov sp, 94 ; NEON: vpop {d12, d13, d14, d15} 95 ; NEON: pop 96