1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5 ; FUNC-LABEL: {{^}}udiv24_i8: 6 ; SI: v_cvt_f32_ubyte 7 ; SI: v_cvt_f32_ubyte 8 ; SI: v_rcp_f32 9 ; SI: v_cvt_u32_f32 10 11 ; EG: UINT_TO_FLT 12 ; EG-DAG: UINT_TO_FLT 13 ; EG-DAG: RECIP_IEEE 14 ; EG: FLT_TO_UINT 15 define void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) { 16 %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1 17 %num = load i8, i8 addrspace(1) * %in 18 %den = load i8, i8 addrspace(1) * %den_ptr 19 %result = udiv i8 %num, %den 20 store i8 %result, i8 addrspace(1)* %out 21 ret void 22 } 23 24 ; FUNC-LABEL: {{^}}udiv24_i16: 25 ; SI: v_cvt_f32_u32 26 ; SI: v_cvt_f32_u32 27 ; SI: v_rcp_f32 28 ; SI: v_cvt_u32_f32 29 30 ; EG: UINT_TO_FLT 31 ; EG-DAG: UINT_TO_FLT 32 ; EG-DAG: RECIP_IEEE 33 ; EG: FLT_TO_UINT 34 define void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 35 %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1 36 %num = load i16, i16 addrspace(1) * %in, align 2 37 %den = load i16, i16 addrspace(1) * %den_ptr, align 2 38 %result = udiv i16 %num, %den 39 store i16 %result, i16 addrspace(1)* %out, align 2 40 ret void 41 } 42 43 ; FUNC-LABEL: {{^}}udiv23_i32: 44 ; SI: v_cvt_f32_u32 45 ; SI-DAG: v_cvt_f32_u32 46 ; SI-DAG: v_rcp_f32 47 ; SI: v_cvt_u32_f32 48 49 ; EG: UINT_TO_FLT 50 ; EG-DAG: UINT_TO_FLT 51 ; EG-DAG: RECIP_IEEE 52 ; EG: FLT_TO_UINT 53 define void @udiv23_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 54 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 55 %num = load i32, i32 addrspace(1) * %in, align 4 56 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 57 %num.i23.0 = shl i32 %num, 9 58 %den.i23.0 = shl i32 %den, 9 59 %num.i23 = lshr i32 %num.i23.0, 9 60 %den.i23 = lshr i32 %den.i23.0, 9 61 %result = udiv i32 %num.i23, %den.i23 62 store i32 %result, i32 addrspace(1)* %out, align 4 63 ret void 64 } 65 66 ; FUNC-LABEL: {{^}}udiv24_i32: 67 ; SI: v_rcp_iflag 68 ; SI-NOT v_rcp_f32 69 ; EG-NOT: RECIP_IEEE 70 define void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 71 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 72 %num = load i32, i32 addrspace(1) * %in, align 4 73 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 74 %num.i24.0 = shl i32 %num, 8 75 %den.i24.0 = shl i32 %den, 8 76 %num.i24 = lshr i32 %num.i24.0, 8 77 %den.i24 = lshr i32 %den.i24.0, 8 78 %result = udiv i32 %num.i24, %den.i24 79 store i32 %result, i32 addrspace(1)* %out, align 4 80 ret void 81 } 82 83 ; FUNC-LABEL: {{^}}no_udiv24_u23_u24_i32: 84 ; SI: v_rcp_iflag 85 ; SI-NOT v_rcp_f32 86 ; EG-NOT: RECIP_IEEE 87 define void @no_udiv24_u23_u24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 88 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 89 %num = load i32, i32 addrspace(1) * %in, align 4 90 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 91 %num.i23.0 = shl i32 %num, 9 92 %den.i24.0 = shl i32 %den, 8 93 %num.i23 = lshr i32 %num.i23.0, 9 94 %den.i24 = lshr i32 %den.i24.0, 8 95 %result = udiv i32 %num.i23, %den.i24 96 store i32 %result, i32 addrspace(1)* %out, align 4 97 ret void 98 } 99 100 ; FUNC-LABEL: {{^}}no_udiv24_u24_u23_i32: 101 ; SI: v_rcp_iflag 102 ; SI-NOT v_rcp_f32 103 ; EG-NOT: RECIP_IEEE 104 define void @no_udiv24_u24_u23_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 105 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 106 %num = load i32, i32 addrspace(1) * %in, align 4 107 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 108 %num.i24.0 = shl i32 %num, 8 109 %den.i23.0 = shl i32 %den, 9 110 %num.i24 = lshr i32 %num.i24.0, 8 111 %den.i23 = lshr i32 %den.i23.0, 9 112 %result = udiv i32 %num.i24, %den.i23 113 store i32 %result, i32 addrspace(1)* %out, align 4 114 ret void 115 } 116 117 ; FUNC-LABEL: {{^}}udiv25_i32: 118 ; RCP_IFLAG is for URECIP in the full 32b alg 119 ; SI: v_rcp_iflag 120 ; SI-NOT: v_rcp_f32 121 122 ; EG-NOT: UINT_TO_FLT 123 ; EG-NOT: RECIP_IEEE 124 define void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 125 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 126 %num = load i32, i32 addrspace(1) * %in, align 4 127 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 128 %num.i25.0 = shl i32 %num, 7 129 %den.i25.0 = shl i32 %den, 7 130 %num.i25 = lshr i32 %num.i25.0, 7 131 %den.i25 = lshr i32 %den.i25.0, 7 132 %result = udiv i32 %num.i25, %den.i25 133 store i32 %result, i32 addrspace(1)* %out, align 4 134 ret void 135 } 136 137 ; FUNC-LABEL: {{^}}test_no_udiv24_i32_1: 138 ; RCP_IFLAG is for URECIP in the full 32b alg 139 ; SI: v_rcp_iflag 140 ; SI-NOT: v_rcp_f32 141 142 ; EG-NOT: UINT_TO_FLT 143 ; EG-NOT: RECIP_IEEE 144 define void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 145 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 146 %num = load i32, i32 addrspace(1) * %in, align 4 147 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 148 %num.i24.0 = shl i32 %num, 8 149 %den.i24.0 = shl i32 %den, 7 150 %num.i24 = lshr i32 %num.i24.0, 8 151 %den.i24 = lshr i32 %den.i24.0, 7 152 %result = udiv i32 %num.i24, %den.i24 153 store i32 %result, i32 addrspace(1)* %out, align 4 154 ret void 155 } 156 157 ; FUNC-LABEL: {{^}}test_no_udiv24_i32_2: 158 ; RCP_IFLAG is for URECIP in the full 32b alg 159 ; SI: v_rcp_iflag 160 ; SI-NOT: v_rcp_f32 161 162 ; EG-NOT: UINT_TO_FLT 163 ; EG-NOT: RECIP_IEEE 164 define void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 165 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 166 %num = load i32, i32 addrspace(1) * %in, align 4 167 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 168 %num.i24.0 = shl i32 %num, 7 169 %den.i24.0 = shl i32 %den, 8 170 %num.i24 = lshr i32 %num.i24.0, 7 171 %den.i24 = lshr i32 %den.i24.0, 8 172 %result = udiv i32 %num.i24, %den.i24 173 store i32 %result, i32 addrspace(1)* %out, align 4 174 ret void 175 } 176 177 ; FUNC-LABEL: {{^}}urem24_i8: 178 ; SI: v_cvt_f32_ubyte 179 ; SI: v_cvt_f32_ubyte 180 ; SI: v_rcp_f32 181 ; SI: v_cvt_u32_f32 182 183 ; EG: UINT_TO_FLT 184 ; EG-DAG: UINT_TO_FLT 185 ; EG-DAG: RECIP_IEEE 186 ; EG: FLT_TO_UINT 187 define void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) { 188 %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1 189 %num = load i8, i8 addrspace(1) * %in 190 %den = load i8, i8 addrspace(1) * %den_ptr 191 %result = urem i8 %num, %den 192 store i8 %result, i8 addrspace(1)* %out 193 ret void 194 } 195 196 ; FUNC-LABEL: {{^}}urem24_i16: 197 ; SI: v_cvt_f32_u32 198 ; SI: v_cvt_f32_u32 199 ; SI: v_rcp_f32 200 ; SI: v_cvt_u32_f32 201 202 ; EG: UINT_TO_FLT 203 ; EG-DAG: UINT_TO_FLT 204 ; EG-DAG: RECIP_IEEE 205 ; EG: FLT_TO_UINT 206 define void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 207 %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1 208 %num = load i16, i16 addrspace(1) * %in, align 2 209 %den = load i16, i16 addrspace(1) * %den_ptr, align 2 210 %result = urem i16 %num, %den 211 store i16 %result, i16 addrspace(1)* %out, align 2 212 ret void 213 } 214 215 ; FUNC-LABEL: {{^}}urem24_i32: 216 ; SI-NOT: v_rcp_f32 217 ; EG-NOT: RECIP_IEEE 218 define void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 219 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 220 %num = load i32, i32 addrspace(1) * %in, align 4 221 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 222 %num.i24.0 = shl i32 %num, 8 223 %den.i24.0 = shl i32 %den, 8 224 %num.i24 = lshr i32 %num.i24.0, 8 225 %den.i24 = lshr i32 %den.i24.0, 8 226 %result = urem i32 %num.i24, %den.i24 227 store i32 %result, i32 addrspace(1)* %out, align 4 228 ret void 229 } 230 231 ; FUNC-LABEL: {{^}}urem25_i32: 232 ; RCP_IFLAG is for URECIP in the full 32b alg 233 ; SI: v_rcp_iflag 234 ; SI-NOT: v_rcp_f32 235 236 ; EG-NOT: UINT_TO_FLT 237 ; EG-NOT: RECIP_IEEE 238 define void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 239 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 240 %num = load i32, i32 addrspace(1) * %in, align 4 241 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 242 %num.i24.0 = shl i32 %num, 7 243 %den.i24.0 = shl i32 %den, 7 244 %num.i24 = lshr i32 %num.i24.0, 7 245 %den.i24 = lshr i32 %den.i24.0, 7 246 %result = urem i32 %num.i24, %den.i24 247 store i32 %result, i32 addrspace(1)* %out, align 4 248 ret void 249 } 250 251 ; FUNC-LABEL: {{^}}test_no_urem24_i32_1: 252 ; RCP_IFLAG is for URECIP in the full 32b alg 253 ; SI: v_rcp_iflag 254 ; SI-NOT: v_rcp_f32 255 256 ; EG-NOT: UINT_TO_FLT 257 ; EG-NOT: RECIP_IEEE 258 define void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 259 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 260 %num = load i32, i32 addrspace(1) * %in, align 4 261 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 262 %num.i24.0 = shl i32 %num, 8 263 %den.i24.0 = shl i32 %den, 7 264 %num.i24 = lshr i32 %num.i24.0, 8 265 %den.i24 = lshr i32 %den.i24.0, 7 266 %result = urem i32 %num.i24, %den.i24 267 store i32 %result, i32 addrspace(1)* %out, align 4 268 ret void 269 } 270 271 ; FUNC-LABEL: {{^}}test_no_urem24_i32_2: 272 ; RCP_IFLAG is for URECIP in the full 32b alg 273 ; SI: v_rcp_iflag 274 ; SI-NOT: v_rcp_f32 275 276 ; EG-NOT: UINT_TO_FLT 277 ; EG-NOT: RECIP_IEEE 278 define void @test_no_urem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 279 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 280 %num = load i32, i32 addrspace(1) * %in, align 4 281 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 282 %num.i24.0 = shl i32 %num, 7 283 %den.i24.0 = shl i32 %den, 8 284 %num.i24 = lshr i32 %num.i24.0, 7 285 %den.i24 = lshr i32 %den.i24.0, 8 286 %result = urem i32 %num.i24, %den.i24 287 store i32 %result, i32 addrspace(1)* %out, align 4 288 ret void 289 } 290 291 ; FUNC-LABEL: {{^}}test_udiv24_u16_u23_i32: 292 ; SI-DAG: v_rcp_f32 293 ; SI-DAG: s_mov_b32 [[MASK:s[0-9]+]], 0x7fffff{{$}} 294 ; SI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], 295 296 ; EG: RECIP_IEEE 297 define void @test_udiv24_u16_u23_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 298 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 299 %num = load i32, i32 addrspace(1) * %in, align 4 300 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 301 %num.i16.0 = shl i32 %num, 16 302 %den.i23.0 = shl i32 %den, 9 303 %num.i16 = lshr i32 %num.i16.0, 16 304 %den.i23 = lshr i32 %den.i23.0, 9 305 %result = udiv i32 %num.i16, %den.i23 306 store i32 %result, i32 addrspace(1)* %out, align 4 307 ret void 308 } 309 310 ; FUNC-LABEL: {{^}}test_udiv24_u23_u16_i32: 311 ; SI-DAG: v_rcp_f32 312 ; SI-DAG: s_mov_b32 [[MASK:s[0-9]+]], 0x7fffff{{$}} 313 ; SI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], 314 315 ; EG: RECIP_IEEE 316 define void @test_udiv24_u23_u16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 317 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 318 %num = load i32, i32 addrspace(1) * %in, align 4 319 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 320 %num.i23.0 = shl i32 %num, 9 321 %den.i16.0 = shl i32 %den, 16 322 %num.i23 = lshr i32 %num.i23.0, 9 323 %den.i16 = lshr i32 %den.i16.0, 16 324 %result = udiv i32 %num.i23, %den.i16 325 store i32 %result, i32 addrspace(1)* %out, align 4 326 ret void 327 } 328