Home | History | Annotate | Download | only in AArch64
      1 ; REQUIRES: asserts
      2 ; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
      3 ; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=exynos-m1 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck --check-prefix=EXYNOSM1 %s
      4 ; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=exynos-m3 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
      5 
      6 ; Test ldr clustering.
      7 ; CHECK: ********** MI Scheduling **********
      8 ; CHECK-LABEL: ldr_int:%bb.0
      9 ; CHECK: Cluster ld/st SU(1) - SU(2)
     10 ; CHECK: SU(1):   %{{[0-9]+}}:gpr32 = LDRWui
     11 ; CHECK: SU(2):   %{{[0-9]+}}:gpr32 = LDRWui
     12 ; EXYNOSM1: ********** MI Scheduling **********
     13 ; EXYNOSM1-LABEL: ldr_int:%bb.0
     14 ; EXYNOSM1: Cluster ld/st SU(1) - SU(2)
     15 ; EXYNOSM1: SU(1):   %{{[0-9]+}}:gpr32 = LDRWui
     16 ; EXYNOSM1: SU(2):   %{{[0-9]+}}:gpr32 = LDRWui
     17 define i32 @ldr_int(i32* %a) nounwind {
     18   %p1 = getelementptr inbounds i32, i32* %a, i32 1
     19   %tmp1 = load i32, i32* %p1, align 2
     20   %p2 = getelementptr inbounds i32, i32* %a, i32 2
     21   %tmp2 = load i32, i32* %p2, align 2
     22   %tmp3 = add i32 %tmp1, %tmp2
     23   ret i32 %tmp3
     24 }
     25 
     26 ; Test ldpsw clustering
     27 ; CHECK: ********** MI Scheduling **********
     28 ; CHECK-LABEL: ldp_sext_int:%bb.0
     29 ; CHECK: Cluster ld/st SU(1) - SU(2)
     30 ; CHECK: SU(1):   %{{[0-9]+}}:gpr64 = LDRSWui
     31 ; CHECK: SU(2):   %{{[0-9]+}}:gpr64 = LDRSWui
     32 ; EXYNOSM1: ********** MI Scheduling **********
     33 ; EXYNOSM1-LABEL: ldp_sext_int:%bb.0
     34 ; EXYNOSM1: Cluster ld/st SU(1) - SU(2)
     35 ; EXYNOSM1: SU(1):   %{{[0-9]+}}:gpr64 = LDRSWui
     36 ; EXYNOSM1: SU(2):   %{{[0-9]+}}:gpr64 = LDRSWui
     37 define i64 @ldp_sext_int(i32* %p) nounwind {
     38   %tmp = load i32, i32* %p, align 4
     39   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
     40   %tmp1 = load i32, i32* %add.ptr, align 4
     41   %sexttmp = sext i32 %tmp to i64
     42   %sexttmp1 = sext i32 %tmp1 to i64
     43   %add = add nsw i64 %sexttmp1, %sexttmp
     44   ret i64 %add
     45 }
     46 
     47 ; Test ldur clustering.
     48 ; CHECK: ********** MI Scheduling **********
     49 ; CHECK-LABEL: ldur_int:%bb.0
     50 ; CHECK: Cluster ld/st SU(2) - SU(1)
     51 ; CHECK: SU(1):   %{{[0-9]+}}:gpr32 = LDURWi
     52 ; CHECK: SU(2):   %{{[0-9]+}}:gpr32 = LDURWi
     53 ; EXYNOSM1: ********** MI Scheduling **********
     54 ; EXYNOSM1-LABEL: ldur_int:%bb.0
     55 ; EXYNOSM1: Cluster ld/st SU(2) - SU(1)
     56 ; EXYNOSM1: SU(1):   %{{[0-9]+}}:gpr32 = LDURWi
     57 ; EXYNOSM1: SU(2):   %{{[0-9]+}}:gpr32 = LDURWi
     58 define i32 @ldur_int(i32* %a) nounwind {
     59   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
     60   %tmp1 = load i32, i32* %p1, align 2
     61   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
     62   %tmp2 = load i32, i32* %p2, align 2
     63   %tmp3 = add i32 %tmp1, %tmp2
     64   ret i32 %tmp3
     65 }
     66 
     67 ; Test sext + zext clustering.
     68 ; CHECK: ********** MI Scheduling **********
     69 ; CHECK-LABEL: ldp_half_sext_zext_int:%bb.0
     70 ; CHECK: Cluster ld/st SU(3) - SU(4)
     71 ; CHECK: SU(3):   %{{[0-9]+}}:gpr64 = LDRSWui
     72 ; CHECK: SU(4):   undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui
     73 ; EXYNOSM1: ********** MI Scheduling **********
     74 ; EXYNOSM1-LABEL: ldp_half_sext_zext_int:%bb.0
     75 ; EXYNOSM1: Cluster ld/st SU(3) - SU(4)
     76 ; EXYNOSM1: SU(3):   %{{[0-9]+}}:gpr64 = LDRSWui
     77 ; EXYNOSM1: SU(4):   undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui
     78 define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind {
     79   %tmp0 = load i64, i64* %q, align 4
     80   %tmp = load i32, i32* %p, align 4
     81   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
     82   %tmp1 = load i32, i32* %add.ptr, align 4
     83   %sexttmp = sext i32 %tmp to i64
     84   %sexttmp1 = zext i32 %tmp1 to i64
     85   %add = add nsw i64 %sexttmp1, %sexttmp
     86   %add1 = add nsw i64 %add, %tmp0
     87   ret i64 %add1
     88 }
     89 
     90 ; Test zext + sext clustering.
     91 ; CHECK: ********** MI Scheduling **********
     92 ; CHECK-LABEL: ldp_half_zext_sext_int:%bb.0
     93 ; CHECK: Cluster ld/st SU(3) - SU(4)
     94 ; CHECK: SU(3):   undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui
     95 ; CHECK: SU(4):   %{{[0-9]+}}:gpr64 = LDRSWui
     96 ; EXYNOSM1: ********** MI Scheduling **********
     97 ; EXYNOSM1-LABEL: ldp_half_zext_sext_int:%bb.0
     98 ; EXYNOSM1: Cluster ld/st SU(3) - SU(4)
     99 ; EXYNOSM1: SU(3):   undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui
    100 ; EXYNOSM1: SU(4):   %{{[0-9]+}}:gpr64 = LDRSWui
    101 define i64 @ldp_half_zext_sext_int(i64* %q, i32* %p) nounwind {
    102   %tmp0 = load i64, i64* %q, align 4
    103   %tmp = load i32, i32* %p, align 4
    104   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
    105   %tmp1 = load i32, i32* %add.ptr, align 4
    106   %sexttmp = zext i32 %tmp to i64
    107   %sexttmp1 = sext i32 %tmp1 to i64
    108   %add = add nsw i64 %sexttmp1, %sexttmp
    109   %add1 = add nsw i64 %add, %tmp0
    110   ret i64 %add1
    111 }
    112 
    113 ; Verify we don't cluster volatile loads.
    114 ; CHECK: ********** MI Scheduling **********
    115 ; CHECK-LABEL: ldr_int_volatile:%bb.0
    116 ; CHECK-NOT: Cluster ld/st
    117 ; CHECK: SU(1):   %{{[0-9]+}}:gpr32 = LDRWui
    118 ; CHECK: SU(2):   %{{[0-9]+}}:gpr32 = LDRWui
    119 ; EXYNOSM1: ********** MI Scheduling **********
    120 ; EXYNOSM1-LABEL: ldr_int_volatile:%bb.0
    121 ; EXYNOSM1-NOT: Cluster ld/st
    122 ; EXYNOSM1: SU(1):   %{{[0-9]+}}:gpr32 = LDRWui
    123 ; EXYNOSM1: SU(2):   %{{[0-9]+}}:gpr32 = LDRWui
    124 define i32 @ldr_int_volatile(i32* %a) nounwind {
    125   %p1 = getelementptr inbounds i32, i32* %a, i32 1
    126   %tmp1 = load volatile i32, i32* %p1, align 2
    127   %p2 = getelementptr inbounds i32, i32* %a, i32 2
    128   %tmp2 = load volatile i32, i32* %p2, align 2
    129   %tmp3 = add i32 %tmp1, %tmp2
    130   ret i32 %tmp3
    131 }
    132 
    133 ; Test ldq clustering (no clustering for Exynos).
    134 ; CHECK: ********** MI Scheduling **********
    135 ; CHECK-LABEL: ldq_cluster:%bb.0
    136 ; CHECK: Cluster ld/st SU(1) - SU(3)
    137 ; CHECK: SU(1):   %{{[0-9]+}}:fpr128 = LDRQui
    138 ; CHECK: SU(3):   %{{[0-9]+}}:fpr128 = LDRQui
    139 ; EXYNOSM1: ********** MI Scheduling **********
    140 ; EXYNOSM1-LABEL: ldq_cluster:%bb.0
    141 ; EXYNOSM1-NOT: Cluster ld/st
    142 define <2 x i64> @ldq_cluster(i64* %p) {
    143   %a1 = bitcast i64* %p to <2 x i64>*
    144   %tmp1 = load <2 x i64>, < 2 x i64>* %a1, align 8
    145   %add.ptr2 = getelementptr inbounds i64, i64* %p, i64 2
    146   %a2 = bitcast i64* %add.ptr2 to <2 x i64>*
    147   %tmp2 = add nsw <2 x i64> %tmp1, %tmp1
    148   %tmp3 = load <2 x i64>, <2 x i64>* %a2, align 8
    149   %res  = mul nsw <2 x i64> %tmp2, %tmp3
    150   ret <2 x i64> %res
    151 }
    152