1 # RUN: llc -march=hexagon -run-pass post-RA-sched %s -o - | FileCheck %s 2 3 # Test that the Post RA scheduler does not schedule back-to-back loads 4 # when there is another instruction to schedule. The scheduler avoids 5 # the back-to-back loads to reduce potential bank conflicts. 6 7 # CHECK: = L2_loadrigp 8 # CHECK: = A2_tfr 9 # CHECK: = L2_loadrigp 10 11 # CHECK: = L4_loadri_rr 12 # CHECK: = S2_tstbit_i 13 # CHECK: = L4_loadri_rr 14 15 --- | 16 %s.0 = type { [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [3 x i32], [24 x i32], [8 x %s.1], [5 x i32] } 17 %s.1 = type { i32, i32 } 18 19 @g0 = global i64 0 20 @g1 = global i64 0 21 @g2 = global i32 0 22 @g3 = global i32 0 23 @g4 = global i8 0 24 25 declare i32 @llvm.hexagon.S2.cl0(i32) #0 26 declare i32 @llvm.hexagon.S2.setbit.r(i32, i32) #0 27 declare i64 @llvm.hexagon.M2.vmpy2s.s0(i32, i32) #0 28 declare i64 @llvm.hexagon.M2.vmac2s.s0(i64, i32, i32) #0 29 declare i64 @llvm.hexagon.A2.vaddws(i64, i64) #0 30 declare i32 @llvm.hexagon.A4.modwrapu(i32, i32) #0 31 32 define void @f0(i32 %a0) { 33 b0: 34 %v0 = bitcast [10 x %s.0]* inttoptr (i32 -121502345 to [10 x %s.0]*) to [10 x %s.0]* 35 br label %b1 36 37 b1: ; preds = %b5, %b0 38 %v1 = phi i32 [ 0, %b0 ], [ %v28, %b5 ] 39 %v2 = phi i32 [ 0, %b0 ], [ %v27, %b5 ] 40 %v3 = load i32, i32* @g2, align 4 41 %v4 = load i32, i32* @g3, align 8 42 %v5 = and i32 %v4, %v3 43 %v6 = getelementptr [10 x %s.0], [10 x %s.0]* %v0, i32 0, i32 %v2 44 %v7 = bitcast %s.0* %v6 to %s.0* 45 %v8 = getelementptr %s.0, %s.0* %v7, i32 0, i32 12 46 %v9 = getelementptr %s.0, %s.0* %v7, i32 0, i32 13 47 br label %b2 48 49 b2: ; preds = %b4, %b1 50 %v10 = phi i64 [ %v24, %b4 ], [ 0, %b1 ] 51 %v11 = phi i32 [ %v13, %b4 ], [ %v5, %b1 ] 52 %v12 = tail call i32 @llvm.hexagon.S2.cl0(i32 %v11) 53 %v13 = tail call i32 @llvm.hexagon.S2.setbit.r(i32 %v11, i32 %v12) 54 %v14 = getelementptr [24 x i32], [24 x i32]* %v8, i32 0, i32 %v12 55 %v15 = load i32, i32* %v14, align 4 56 %v16 = tail call i64 @llvm.hexagon.M2.vmpy2s.s0(i32 %v15, i32 %v15) 57 %v17 = getelementptr [24 x i32], [24 x i32]* %v9, i32 0, i32 %v12 58 %v18 = load i32, i32* %v17, align 4 59 %v19 = tail call i64 @llvm.hexagon.M2.vmac2s.s0(i64 %v16, i32 %v18, i32 %v18) 60 %v20 = load i8, i8* @g4, align 1 61 %v21 = and i8 %v20, 1 62 %v22 = icmp eq i8 %v21, 0 63 br i1 %v22, label %b3, label %b4 64 65 b3: ; preds = %b2 66 %v23 = tail call i64 @llvm.hexagon.A2.vaddws(i64 %v10, i64 %v19) 67 store i64 %v23, i64* @g0, align 8 68 br label %b4 69 70 b4: ; preds = %b3, %b2 71 %v24 = phi i64 [ %v23, %b3 ], [ %v10, %b2 ] 72 %v25 = icmp eq i32 %v13, 0 73 br i1 %v25, label %b5, label %b2 74 75 b5: ; preds = %b4 76 %v26 = add i32 %v2, 1 77 %v27 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %v26, i32 10) 78 %v28 = add i32 %v1, 1 79 %v29 = icmp eq i32 %v28, %a0 80 br i1 %v29, label %b6, label %b1 81 82 b6: ; preds = %b5 83 store i64 %v19, i64* @g1, align 8 84 ret void 85 } 86 87 attributes #0 = { nounwind readnone } 88 89 ... 90 --- 91 name: f0 92 alignment: 4 93 tracksRegLiveness: true 94 registers: 95 liveins: 96 - { reg: '$r0', virtual-reg: '' } 97 fixedStack: 98 stack: 99 constants: 100 body: | 101 bb.0: 102 successors: %bb.1(0x80000000) 103 liveins: $r0:0x00000001 104 105 $r3 = A2_tfrsi 0 106 $r2 = A2_tfrsi -121502345 107 $r4 = A2_tfrsi 10 108 J2_loop0r %bb.1, killed $r0, implicit-def $lc0, implicit-def $sa0, implicit-def $usr 109 110 bb.1 (address-taken): 111 successors: %bb.2(0x80000000) 112 liveins: $lc0:0x00000004, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $sa0:0x00000004 113 114 $r5 = M2_mpysip $r3, 1824 115 $r7 = L2_loadrigp @g2, implicit $gp :: (dereferenceable load 4 from @g2) 116 $r8 = L2_loadrigp @g3, implicit killed $gp :: (dereferenceable load 4 from @g3, align 8) 117 $r6 = A2_tfr $r5 118 $r7 = A2_and killed $r8, killed $r7 119 $r5 = M2_accii killed $r5, $r2, 1248 120 $r6 = M2_accii killed $r6, $r2, 1152 121 $d0 = A2_tfrpi 0 122 123 bb.2: 124 successors: %bb.3(0x04000000), %bb.2(0x7c000000) 125 liveins: $lc0:0x00000004, $r0:0x00000001, $r1:0x00000001, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $r5:0x00000001, $r6:0x00000001, $r7:0x00000001, $sa0:0x00000004 126 127 $r8 = S2_cl0 $r7 128 $r12 = L2_loadrubgp @g4, implicit $gp :: (dereferenceable load 1 from @g4) 129 $r7 = S2_setbit_r killed $r7, $r8 130 $r9 = L4_loadri_rr $r6, $r8, 2 :: (load 4 from %ir.v14) 131 $r13 = L4_loadri_rr $r5, killed $r8, 2 :: (load 4 from %ir.v17) 132 $d4 = M2_vmpy2s_s0 killed $r9, $r9, implicit-def dead $usr_ovf 133 $p0 = S2_tstbit_i killed $r12, 0 134 $d4 = M2_vmac2s_s0 killed $d4, killed $r13, $r13, implicit-def dead $usr_ovf 135 $p1 = C2_cmpeqi $r7, 0 136 $d6 = A2_vaddws $d0, $d4, implicit-def dead $usr_ovf 137 $d0 = A2_tfrpt $p0, killed $d0, implicit $d0 138 S4_pstorerdf_abs $p0, @g0, $d6, implicit killed $gp :: (store 8 into @g0) 139 $d0 = A2_tfrpf killed $p0, killed $d6, implicit killed $d0 140 J2_jumpf killed $p1, %bb.2, implicit-def dead $pc 141 142 bb.3: 143 successors: %bb.4(0x04000000), %bb.1(0x7c000000) 144 liveins: $lc0:0x00000004, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $r8:0x00000001, $r9:0x00000001, $sa0:0x00000004 145 146 $r3 = A2_addi killed $r3, 1 147 $r3 = A4_modwrapu killed $r3, $r4 148 ENDLOOP0 %bb.1, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 149 150 bb.4: 151 liveins: $r8:0x00000001, $r9:0x00000001 152 153 S2_storerdgp @g1, killed $d4, implicit killed $gp :: (store 8 into @g1) 154 PS_jmpret killed $r31, implicit-def dead $pc 155 ... 156 157