1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2 3 ; GCN-LABEL: {{^}}udiv32_invariant_denom: 4 ; GCN: v_cvt_f32_u32 5 ; GCN: v_rcp_iflag_f32 6 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000, 7 ; GCN: v_cvt_u32_f32_e32 8 ; GCN-DAG: v_mul_hi_u32 9 ; GCN-DAG: v_mul_lo_i32 10 ; GCN-DAG: v_sub_i32_e32 11 ; GCN-DAG: v_cmp_eq_u32_e64 12 ; GCN-DAG: v_cndmask_b32_e64 13 ; GCN-DAG: v_mul_hi_u32 14 ; GCN-DAG: v_add_i32_e32 15 ; GCN-DAG: v_subrev_i32_e32 16 ; GCN-DAG: v_cndmask_b32_e64 17 ; GCN: [[LOOP:BB[0-9_]+]]: 18 ; GCN-NOT: v_rcp 19 ; GCN: s_cbranch_scc0 [[LOOP]] 20 ; GCN: s_endpgm 21 define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 22 bb: 23 br label %bb3 24 25 bb2: ; preds = %bb3 26 ret void 27 28 bb3: ; preds = %bb3, %bb 29 %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ] 30 %tmp4 = udiv i32 %tmp, %arg1 31 %tmp5 = zext i32 %tmp to i64 32 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5 33 store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4 34 %tmp7 = add nuw nsw i32 %tmp, 1 35 %tmp8 = icmp eq i32 %tmp7, 1024 36 br i1 %tmp8, label %bb2, label %bb3 37 } 38 39 ; GCN-LABEL: {{^}}urem32_invariant_denom: 40 ; GCN: v_cvt_f32_u32 41 ; GCN: v_rcp_iflag_f32 42 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000, 43 ; GCN: v_cvt_u32_f32_e32 44 ; GCN-DAG: v_mul_hi_u32 45 ; GCN-DAG: v_mul_lo_i32 46 ; GCN-DAG: v_sub_i32_e32 47 ; GCN-DAG: v_cmp_eq_u32_e64 48 ; GCN-DAG: v_cndmask_b32_e64 49 ; GCN-DAG: v_mul_hi_u32 50 ; GCN-DAG: v_add_i32_e32 51 ; GCN-DAG: v_subrev_i32_e32 52 ; GCN-DAG: v_cndmask_b32_e64 53 ; GCN: [[LOOP:BB[0-9_]+]]: 54 ; GCN-NOT: v_rcp 55 ; GCN: s_cbranch_scc0 [[LOOP]] 56 ; GCN: s_endpgm 57 define amdgpu_kernel void @urem32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 58 bb: 59 br label %bb3 60 61 bb2: ; preds = %bb3 62 ret void 63 64 bb3: ; preds = %bb3, %bb 65 %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ] 66 %tmp4 = urem i32 %tmp, %arg1 67 %tmp5 = zext i32 %tmp to i64 68 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5 69 store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4 70 %tmp7 = add nuw nsw i32 %tmp, 1 71 %tmp8 = icmp eq i32 %tmp7, 1024 72 br i1 %tmp8, label %bb2, label %bb3 73 } 74 75 ; GCN-LABEL: {{^}}sdiv32_invariant_denom: 76 ; GCN: v_cvt_f32_u32 77 ; GCN: v_rcp_iflag_f32 78 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000, 79 ; GCN: v_cvt_u32_f32_e32 80 ; GCN-DAG: v_mul_hi_u32 81 ; GCN-DAG: v_mul_lo_i32 82 ; GCN-DAG: v_sub_i32_e32 83 ; GCN-DAG: v_cmp_eq_u32_e64 84 ; GCN-DAG: v_cndmask_b32_e64 85 ; GCN-DAG: v_mul_hi_u32 86 ; GCN-DAG: v_add_i32_e32 87 ; GCN-DAG: v_subrev_i32_e32 88 ; GCN-DAG: v_cndmask_b32_e64 89 ; GCN: [[LOOP:BB[0-9_]+]]: 90 ; GCN-NOT: v_rcp 91 ; GCN: s_cbranch_scc0 [[LOOP]] 92 ; GCN: s_endpgm 93 define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 94 bb: 95 br label %bb3 96 97 bb2: ; preds = %bb3 98 ret void 99 100 bb3: ; preds = %bb3, %bb 101 %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ] 102 %tmp4 = sdiv i32 %tmp, %arg1 103 %tmp5 = zext i32 %tmp to i64 104 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5 105 store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4 106 %tmp7 = add nuw nsw i32 %tmp, 1 107 %tmp8 = icmp eq i32 %tmp7, 1024 108 br i1 %tmp8, label %bb2, label %bb3 109 } 110 111 ; GCN-LABEL: {{^}}srem32_invariant_denom: 112 ; GCN: v_cvt_f32_u32 113 ; GCN: v_rcp_iflag_f32 114 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000, 115 ; GCN: v_cvt_u32_f32_e32 116 ; GCN-DAG: v_mul_hi_u32 117 ; GCN-DAG: v_mul_lo_i32 118 ; GCN-DAG: v_sub_i32_e32 119 ; GCN-DAG: v_cmp_eq_u32_e64 120 ; GCN-DAG: v_cndmask_b32_e64 121 ; GCN-DAG: v_mul_hi_u32 122 ; GCN-DAG: v_add_i32_e32 123 ; GCN-DAG: v_subrev_i32_e32 124 ; GCN-DAG: v_cndmask_b32_e64 125 ; GCN: [[LOOP:BB[0-9_]+]]: 126 ; GCN-NOT: v_rcp 127 ; GCN: s_cbranch_scc0 [[LOOP]] 128 ; GCN: s_endpgm 129 define amdgpu_kernel void @srem32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 130 bb: 131 br label %bb3 132 133 bb2: ; preds = %bb3 134 ret void 135 136 bb3: ; preds = %bb3, %bb 137 %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ] 138 %tmp4 = srem i32 %tmp, %arg1 139 %tmp5 = zext i32 %tmp to i64 140 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5 141 store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4 142 %tmp7 = add nuw nsw i32 %tmp, 1 143 %tmp8 = icmp eq i32 %tmp7, 1024 144 br i1 %tmp8, label %bb2, label %bb3 145 } 146 147 ; GCN-LABEL: {{^}}udiv16_invariant_denom: 148 ; GCN: v_cvt_f32_u32 149 ; GCN: v_rcp_iflag_f32 150 ; GCN: [[LOOP:BB[0-9_]+]]: 151 ; GCN-NOT: v_rcp 152 ; GCN: s_cbranch_scc0 [[LOOP]] 153 ; GCN: s_endpgm 154 define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) { 155 bb: 156 br label %bb3 157 158 bb2: ; preds = %bb3 159 ret void 160 161 bb3: ; preds = %bb3, %bb 162 %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ] 163 %tmp4 = udiv i16 %tmp, %arg1 164 %tmp5 = zext i16 %tmp to i64 165 %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5 166 store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2 167 %tmp7 = add nuw nsw i16 %tmp, 1 168 %tmp8 = icmp eq i16 %tmp7, 1024 169 br i1 %tmp8, label %bb2, label %bb3 170 } 171 172 ; GCN-LABEL: {{^}}urem16_invariant_denom: 173 ; GCN: v_cvt_f32_u32 174 ; GCN: v_rcp_iflag_f32 175 ; GCN: [[LOOP:BB[0-9_]+]]: 176 ; GCN-NOT: v_rcp 177 ; GCN: s_cbranch_scc0 [[LOOP]] 178 ; GCN: s_endpgm 179 define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) { 180 bb: 181 br label %bb3 182 183 bb2: ; preds = %bb3 184 ret void 185 186 bb3: ; preds = %bb3, %bb 187 %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ] 188 %tmp4 = urem i16 %tmp, %arg1 189 %tmp5 = zext i16 %tmp to i64 190 %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5 191 store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2 192 %tmp7 = add nuw nsw i16 %tmp, 1 193 %tmp8 = icmp eq i16 %tmp7, 1024 194 br i1 %tmp8, label %bb2, label %bb3 195 } 196 197 ; GCN-LABEL: {{^}}sdiv16_invariant_denom: 198 ; GCN-DAG: s_sext_i32_i16 199 ; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0x7fffffff 200 ; GCN-DAG: v_cvt_f32_i32 201 ; GCN-DAG: v_rcp_iflag_f32 202 ; GCN: [[LOOP:BB[0-9_]+]]: 203 ; GCN-NOT: v_rcp 204 ; GCN: s_cbranch_scc0 [[LOOP]] 205 ; GCN: s_endpgm 206 define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) { 207 bb: 208 br label %bb3 209 210 bb2: ; preds = %bb3 211 ret void 212 213 bb3: ; preds = %bb3, %bb 214 %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ] 215 %tmp4 = sdiv i16 %tmp, %arg1 216 %tmp5 = zext i16 %tmp to i64 217 %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5 218 store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2 219 %tmp7 = add nuw nsw i16 %tmp, 1 220 %tmp8 = icmp eq i16 %tmp7, 1024 221 br i1 %tmp8, label %bb2, label %bb3 222 } 223 224 ; GCN-LABEL: {{^}}srem16_invariant_denom: 225 ; GCN-DAG: s_sext_i32_i16 226 ; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0x7fffffff 227 ; GCN-DAG: v_cvt_f32_i32 228 ; GCN-DAG: v_rcp_iflag_f32 229 ; GCN: [[LOOP:BB[0-9_]+]]: 230 ; GCN-NOT: v_rcp 231 ; GCN: s_cbranch_scc0 [[LOOP]] 232 ; GCN: s_endpgm 233 define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) { 234 bb: 235 br label %bb3 236 237 bb2: ; preds = %bb3 238 ret void 239 240 bb3: ; preds = %bb3, %bb 241 %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ] 242 %tmp4 = srem i16 %tmp, %arg1 243 %tmp5 = zext i16 %tmp to i64 244 %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5 245 store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2 246 %tmp7 = add nuw nsw i16 %tmp, 1 247 %tmp8 = icmp eq i16 %tmp7, 1024 248 br i1 %tmp8, label %bb2, label %bb3 249 } 250