1 ; REQUIRES: asserts 2 ; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s 3 ; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=exynos-m1 -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck --check-prefix=EXYNOS %s 4 5 ; Test ldr clustering. 6 ; CHECK: ********** MI Scheduling ********** 7 ; CHECK-LABEL: ldr_int:BB#0 8 ; CHECK: Cluster ld/st SU(1) - SU(2) 9 ; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRWui 10 ; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRWui 11 ; EXYNOS: ********** MI Scheduling ********** 12 ; EXYNOS-LABEL: ldr_int:BB#0 13 ; EXYNOS: Cluster ld/st SU(1) - SU(2) 14 ; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDRWui 15 ; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDRWui 16 define i32 @ldr_int(i32* %a) nounwind { 17 %p1 = getelementptr inbounds i32, i32* %a, i32 1 18 %tmp1 = load i32, i32* %p1, align 2 19 %p2 = getelementptr inbounds i32, i32* %a, i32 2 20 %tmp2 = load i32, i32* %p2, align 2 21 %tmp3 = add i32 %tmp1, %tmp2 22 ret i32 %tmp3 23 } 24 25 ; Test ldpsw clustering 26 ; CHECK: ********** MI Scheduling ********** 27 ; CHECK-LABEL: ldp_sext_int:BB#0 28 ; CHECK: Cluster ld/st SU(1) - SU(2) 29 ; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRSWui 30 ; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRSWui 31 ; EXYNOS: ********** MI Scheduling ********** 32 ; EXYNOS-LABEL: ldp_sext_int:BB#0 33 ; EXYNOS: Cluster ld/st SU(1) - SU(2) 34 ; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDRSWui 35 ; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDRSWui 36 define i64 @ldp_sext_int(i32* %p) nounwind { 37 %tmp = load i32, i32* %p, align 4 38 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 39 %tmp1 = load i32, i32* %add.ptr, align 4 40 %sexttmp = sext i32 %tmp to i64 41 %sexttmp1 = sext i32 %tmp1 to i64 42 %add = add nsw i64 %sexttmp1, %sexttmp 43 ret i64 %add 44 } 45 46 ; Test ldur clustering. 47 ; CHECK: ********** MI Scheduling ********** 48 ; CHECK-LABEL: ldur_int:BB#0 49 ; CHECK: Cluster ld/st SU(2) - SU(1) 50 ; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDURWi 51 ; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDURWi 52 ; EXYNOS: ********** MI Scheduling ********** 53 ; EXYNOS-LABEL: ldur_int:BB#0 54 ; EXYNOS: Cluster ld/st SU(2) - SU(1) 55 ; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDURWi 56 ; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDURWi 57 define i32 @ldur_int(i32* %a) nounwind { 58 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 59 %tmp1 = load i32, i32* %p1, align 2 60 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 61 %tmp2 = load i32, i32* %p2, align 2 62 %tmp3 = add i32 %tmp1, %tmp2 63 ret i32 %tmp3 64 } 65 66 ; Test sext + zext clustering. 67 ; CHECK: ********** MI Scheduling ********** 68 ; CHECK-LABEL: ldp_half_sext_zext_int:BB#0 69 ; CHECK: Cluster ld/st SU(3) - SU(4) 70 ; CHECK: SU(3): %vreg{{[0-9]+}}<def> = LDRSWui 71 ; CHECK: SU(4): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui 72 ; EXYNOS: ********** MI Scheduling ********** 73 ; EXYNOS-LABEL: ldp_half_sext_zext_int:BB#0 74 ; EXYNOS: Cluster ld/st SU(3) - SU(4) 75 ; EXYNOS: SU(3): %vreg{{[0-9]+}}<def> = LDRSWui 76 ; EXYNOS: SU(4): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui 77 define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind { 78 %tmp0 = load i64, i64* %q, align 4 79 %tmp = load i32, i32* %p, align 4 80 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 81 %tmp1 = load i32, i32* %add.ptr, align 4 82 %sexttmp = sext i32 %tmp to i64 83 %sexttmp1 = zext i32 %tmp1 to i64 84 %add = add nsw i64 %sexttmp1, %sexttmp 85 %add1 = add nsw i64 %add, %tmp0 86 ret i64 %add1 87 } 88 89 ; Test zext + sext clustering. 90 ; CHECK: ********** MI Scheduling ********** 91 ; CHECK-LABEL: ldp_half_zext_sext_int:BB#0 92 ; CHECK: Cluster ld/st SU(3) - SU(4) 93 ; CHECK: SU(3): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui 94 ; CHECK: SU(4): %vreg{{[0-9]+}}<def> = LDRSWui 95 ; EXYNOS: ********** MI Scheduling ********** 96 ; EXYNOS-LABEL: ldp_half_zext_sext_int:BB#0 97 ; EXYNOS: Cluster ld/st SU(3) - SU(4) 98 ; EXYNOS: SU(3): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui 99 ; EXYNOS: SU(4): %vreg{{[0-9]+}}<def> = LDRSWui 100 define i64 @ldp_half_zext_sext_int(i64* %q, i32* %p) nounwind { 101 %tmp0 = load i64, i64* %q, align 4 102 %tmp = load i32, i32* %p, align 4 103 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 104 %tmp1 = load i32, i32* %add.ptr, align 4 105 %sexttmp = zext i32 %tmp to i64 106 %sexttmp1 = sext i32 %tmp1 to i64 107 %add = add nsw i64 %sexttmp1, %sexttmp 108 %add1 = add nsw i64 %add, %tmp0 109 ret i64 %add1 110 } 111 112 ; Verify we don't cluster volatile loads. 113 ; CHECK: ********** MI Scheduling ********** 114 ; CHECK-LABEL: ldr_int_volatile:BB#0 115 ; CHECK-NOT: Cluster ld/st 116 ; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRWui 117 ; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRWui 118 ; EXYNOS: ********** MI Scheduling ********** 119 ; EXYNOS-LABEL: ldr_int_volatile:BB#0 120 ; EXYNOS-NOT: Cluster ld/st 121 ; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDRWui 122 ; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDRWui 123 define i32 @ldr_int_volatile(i32* %a) nounwind { 124 %p1 = getelementptr inbounds i32, i32* %a, i32 1 125 %tmp1 = load volatile i32, i32* %p1, align 2 126 %p2 = getelementptr inbounds i32, i32* %a, i32 2 127 %tmp2 = load volatile i32, i32* %p2, align 2 128 %tmp3 = add i32 %tmp1, %tmp2 129 ret i32 %tmp3 130 } 131 132 ; Test ldq clustering (no clustering for Exynos). 133 ; CHECK: ********** MI Scheduling ********** 134 ; CHECK-LABEL: ldq_cluster:BB#0 135 ; CHECK: Cluster ld/st SU(1) - SU(3) 136 ; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRQui 137 ; CHECK: SU(3): %vreg{{[0-9]+}}<def> = LDRQui 138 ; EXYNOS: ********** MI Scheduling ********** 139 ; EXYNOS-LABEL: ldq_cluster:BB#0 140 ; EXYNOS-NOT: Cluster ld/st 141 define <2 x i64> @ldq_cluster(i64* %p) { 142 %a1 = bitcast i64* %p to <2 x i64>* 143 %tmp1 = load <2 x i64>, < 2 x i64>* %a1, align 8 144 %add.ptr2 = getelementptr inbounds i64, i64* %p, i64 2 145 %a2 = bitcast i64* %add.ptr2 to <2 x i64>* 146 %tmp2 = add nsw <2 x i64> %tmp1, %tmp1 147 %tmp3 = load <2 x i64>, <2 x i64>* %a2, align 8 148 %res = mul nsw <2 x i64> %tmp2, %tmp3 149 ret <2 x i64> %res 150 } 151