Home | History | Annotate | Download | only in AArch64
      1 ; REQUIRES: asserts
      2 ; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
      3 ; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=exynos-m1 -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck --check-prefix=EXYNOS %s
      4 
      5 ; Test ldr clustering.
      6 ; CHECK: ********** MI Scheduling **********
      7 ; CHECK-LABEL: ldr_int:BB#0
      8 ; CHECK: Cluster ld/st SU(1) - SU(2)
      9 ; CHECK: SU(1):   %vreg{{[0-9]+}}<def> = LDRWui
     10 ; CHECK: SU(2):   %vreg{{[0-9]+}}<def> = LDRWui
     11 ; EXYNOS: ********** MI Scheduling **********
     12 ; EXYNOS-LABEL: ldr_int:BB#0
     13 ; EXYNOS: Cluster ld/st SU(1) - SU(2)
     14 ; EXYNOS: SU(1):   %vreg{{[0-9]+}}<def> = LDRWui
     15 ; EXYNOS: SU(2):   %vreg{{[0-9]+}}<def> = LDRWui
     16 define i32 @ldr_int(i32* %a) nounwind {
     17   %p1 = getelementptr inbounds i32, i32* %a, i32 1
     18   %tmp1 = load i32, i32* %p1, align 2
     19   %p2 = getelementptr inbounds i32, i32* %a, i32 2
     20   %tmp2 = load i32, i32* %p2, align 2
     21   %tmp3 = add i32 %tmp1, %tmp2
     22   ret i32 %tmp3
     23 }
     24 
     25 ; Test ldpsw clustering
     26 ; CHECK: ********** MI Scheduling **********
     27 ; CHECK-LABEL: ldp_sext_int:BB#0
     28 ; CHECK: Cluster ld/st SU(1) - SU(2)
     29 ; CHECK: SU(1):   %vreg{{[0-9]+}}<def> = LDRSWui
     30 ; CHECK: SU(2):   %vreg{{[0-9]+}}<def> = LDRSWui
     31 ; EXYNOS: ********** MI Scheduling **********
     32 ; EXYNOS-LABEL: ldp_sext_int:BB#0
     33 ; EXYNOS: Cluster ld/st SU(1) - SU(2)
     34 ; EXYNOS: SU(1):   %vreg{{[0-9]+}}<def> = LDRSWui
     35 ; EXYNOS: SU(2):   %vreg{{[0-9]+}}<def> = LDRSWui
     36 define i64 @ldp_sext_int(i32* %p) nounwind {
     37   %tmp = load i32, i32* %p, align 4
     38   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
     39   %tmp1 = load i32, i32* %add.ptr, align 4
     40   %sexttmp = sext i32 %tmp to i64
     41   %sexttmp1 = sext i32 %tmp1 to i64
     42   %add = add nsw i64 %sexttmp1, %sexttmp
     43   ret i64 %add
     44 }
     45 
     46 ; Test ldur clustering.
     47 ; CHECK: ********** MI Scheduling **********
     48 ; CHECK-LABEL: ldur_int:BB#0
     49 ; CHECK: Cluster ld/st SU(2) - SU(1)
     50 ; CHECK: SU(1):   %vreg{{[0-9]+}}<def> = LDURWi
     51 ; CHECK: SU(2):   %vreg{{[0-9]+}}<def> = LDURWi
     52 ; EXYNOS: ********** MI Scheduling **********
     53 ; EXYNOS-LABEL: ldur_int:BB#0
     54 ; EXYNOS: Cluster ld/st SU(2) - SU(1)
     55 ; EXYNOS: SU(1):   %vreg{{[0-9]+}}<def> = LDURWi
     56 ; EXYNOS: SU(2):   %vreg{{[0-9]+}}<def> = LDURWi
     57 define i32 @ldur_int(i32* %a) nounwind {
     58   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
     59   %tmp1 = load i32, i32* %p1, align 2
     60   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
     61   %tmp2 = load i32, i32* %p2, align 2
     62   %tmp3 = add i32 %tmp1, %tmp2
     63   ret i32 %tmp3
     64 }
     65 
     66 ; Test sext + zext clustering.
     67 ; CHECK: ********** MI Scheduling **********
     68 ; CHECK-LABEL: ldp_half_sext_zext_int:BB#0
     69 ; CHECK: Cluster ld/st SU(3) - SU(4)
     70 ; CHECK: SU(3):   %vreg{{[0-9]+}}<def> = LDRSWui
     71 ; CHECK: SU(4):   %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
     72 ; EXYNOS: ********** MI Scheduling **********
     73 ; EXYNOS-LABEL: ldp_half_sext_zext_int:BB#0
     74 ; EXYNOS: Cluster ld/st SU(3) - SU(4)
     75 ; EXYNOS: SU(3):   %vreg{{[0-9]+}}<def> = LDRSWui
     76 ; EXYNOS: SU(4):   %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
     77 define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind {
     78   %tmp0 = load i64, i64* %q, align 4
     79   %tmp = load i32, i32* %p, align 4
     80   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
     81   %tmp1 = load i32, i32* %add.ptr, align 4
     82   %sexttmp = sext i32 %tmp to i64
     83   %sexttmp1 = zext i32 %tmp1 to i64
     84   %add = add nsw i64 %sexttmp1, %sexttmp
     85   %add1 = add nsw i64 %add, %tmp0
     86   ret i64 %add1
     87 }
     88 
     89 ; Test zext + sext clustering.
     90 ; CHECK: ********** MI Scheduling **********
     91 ; CHECK-LABEL: ldp_half_zext_sext_int:BB#0
     92 ; CHECK: Cluster ld/st SU(3) - SU(4)
     93 ; CHECK: SU(3):   %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
     94 ; CHECK: SU(4):   %vreg{{[0-9]+}}<def> = LDRSWui
     95 ; EXYNOS: ********** MI Scheduling **********
     96 ; EXYNOS-LABEL: ldp_half_zext_sext_int:BB#0
     97 ; EXYNOS: Cluster ld/st SU(3) - SU(4)
     98 ; EXYNOS: SU(3):   %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
     99 ; EXYNOS: SU(4):   %vreg{{[0-9]+}}<def> = LDRSWui
    100 define i64 @ldp_half_zext_sext_int(i64* %q, i32* %p) nounwind {
    101   %tmp0 = load i64, i64* %q, align 4
    102   %tmp = load i32, i32* %p, align 4
    103   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
    104   %tmp1 = load i32, i32* %add.ptr, align 4
    105   %sexttmp = zext i32 %tmp to i64
    106   %sexttmp1 = sext i32 %tmp1 to i64
    107   %add = add nsw i64 %sexttmp1, %sexttmp
    108   %add1 = add nsw i64 %add, %tmp0
    109   ret i64 %add1
    110 }
    111 
    112 ; Verify we don't cluster volatile loads.
    113 ; CHECK: ********** MI Scheduling **********
    114 ; CHECK-LABEL: ldr_int_volatile:BB#0
    115 ; CHECK-NOT: Cluster ld/st
    116 ; CHECK: SU(1):   %vreg{{[0-9]+}}<def> = LDRWui
    117 ; CHECK: SU(2):   %vreg{{[0-9]+}}<def> = LDRWui
    118 ; EXYNOS: ********** MI Scheduling **********
    119 ; EXYNOS-LABEL: ldr_int_volatile:BB#0
    120 ; EXYNOS-NOT: Cluster ld/st
    121 ; EXYNOS: SU(1):   %vreg{{[0-9]+}}<def> = LDRWui
    122 ; EXYNOS: SU(2):   %vreg{{[0-9]+}}<def> = LDRWui
    123 define i32 @ldr_int_volatile(i32* %a) nounwind {
    124   %p1 = getelementptr inbounds i32, i32* %a, i32 1
    125   %tmp1 = load volatile i32, i32* %p1, align 2
    126   %p2 = getelementptr inbounds i32, i32* %a, i32 2
    127   %tmp2 = load volatile i32, i32* %p2, align 2
    128   %tmp3 = add i32 %tmp1, %tmp2
    129   ret i32 %tmp3
    130 }
    131 
    132 ; Test ldq clustering (no clustering for Exynos).
    133 ; CHECK: ********** MI Scheduling **********
    134 ; CHECK-LABEL: ldq_cluster:BB#0
    135 ; CHECK: Cluster ld/st SU(1) - SU(3)
    136 ; CHECK: SU(1):   %vreg{{[0-9]+}}<def> = LDRQui
    137 ; CHECK: SU(3):   %vreg{{[0-9]+}}<def> = LDRQui
    138 ; EXYNOS: ********** MI Scheduling **********
    139 ; EXYNOS-LABEL: ldq_cluster:BB#0
    140 ; EXYNOS-NOT: Cluster ld/st
    141 define <2 x i64> @ldq_cluster(i64* %p) {
    142   %a1 = bitcast i64* %p to <2 x i64>*
    143   %tmp1 = load <2 x i64>, < 2 x i64>* %a1, align 8
    144   %add.ptr2 = getelementptr inbounds i64, i64* %p, i64 2
    145   %a2 = bitcast i64* %add.ptr2 to <2 x i64>*
    146   %tmp2 = add nsw <2 x i64> %tmp1, %tmp1
    147   %tmp3 = load <2 x i64>, <2 x i64>* %a2, align 8
    148   %res  = mul nsw <2 x i64> %tmp2, %tmp3
    149   ret <2 x i64> %res
    150 }
    151