1 ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5 ; FUNC-LABEL: {{^}}udiv24_i8: 6 ; SI: v_cvt_f32_ubyte 7 ; SI: v_cvt_f32_ubyte 8 ; SI: v_rcp_f32 9 ; SI: v_cvt_u32_f32 10 11 ; EG: UINT_TO_FLT 12 ; EG-DAG: UINT_TO_FLT 13 ; EG-DAG: RECIP_IEEE 14 ; EG: FLT_TO_UINT 15 define void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) { 16 %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1 17 %num = load i8, i8 addrspace(1) * %in 18 %den = load i8, i8 addrspace(1) * %den_ptr 19 %result = udiv i8 %num, %den 20 store i8 %result, i8 addrspace(1)* %out 21 ret void 22 } 23 24 ; FUNC-LABEL: {{^}}udiv24_i16: 25 ; SI: v_cvt_f32_u32 26 ; SI: v_cvt_f32_u32 27 ; SI: v_rcp_f32 28 ; SI: v_cvt_u32_f32 29 30 ; EG: UINT_TO_FLT 31 ; EG-DAG: UINT_TO_FLT 32 ; EG-DAG: RECIP_IEEE 33 ; EG: FLT_TO_UINT 34 define void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 35 %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1 36 %num = load i16, i16 addrspace(1) * %in, align 2 37 %den = load i16, i16 addrspace(1) * %den_ptr, align 2 38 %result = udiv i16 %num, %den 39 store i16 %result, i16 addrspace(1)* %out, align 2 40 ret void 41 } 42 43 ; FUNC-LABEL: {{^}}udiv24_i32: 44 ; SI: v_cvt_f32_u32 45 ; SI-DAG: v_cvt_f32_u32 46 ; SI-DAG: v_rcp_f32 47 ; SI: v_cvt_u32_f32 48 49 ; EG: UINT_TO_FLT 50 ; EG-DAG: UINT_TO_FLT 51 ; EG-DAG: RECIP_IEEE 52 ; EG: FLT_TO_UINT 53 define void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 54 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 55 %num = load i32, i32 addrspace(1) * %in, align 4 56 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 57 %num.i24.0 = shl i32 %num, 8 58 %den.i24.0 = shl i32 %den, 8 59 %num.i24 = lshr i32 %num.i24.0, 8 60 %den.i24 = lshr i32 %den.i24.0, 8 61 %result = udiv i32 %num.i24, %den.i24 62 store i32 %result, i32 addrspace(1)* %out, align 4 63 ret void 64 } 65 66 ; FUNC-LABEL: {{^}}udiv25_i32: 67 ; RCP_IFLAG is for URECIP in the full 32b alg 68 ; SI: v_rcp_iflag 69 ; SI-NOT: v_rcp_f32 70 71 ; EG-NOT: UINT_TO_FLT 72 ; EG-NOT: RECIP_IEEE 73 define void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 74 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 75 %num = load i32, i32 addrspace(1) * %in, align 4 76 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 77 %num.i24.0 = shl i32 %num, 7 78 %den.i24.0 = shl i32 %den, 7 79 %num.i24 = lshr i32 %num.i24.0, 7 80 %den.i24 = lshr i32 %den.i24.0, 7 81 %result = udiv i32 %num.i24, %den.i24 82 store i32 %result, i32 addrspace(1)* %out, align 4 83 ret void 84 } 85 86 ; FUNC-LABEL: {{^}}test_no_udiv24_i32_1: 87 ; RCP_IFLAG is for URECIP in the full 32b alg 88 ; SI: v_rcp_iflag 89 ; SI-NOT: v_rcp_f32 90 91 ; EG-NOT: UINT_TO_FLT 92 ; EG-NOT: RECIP_IEEE 93 define void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 94 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 95 %num = load i32, i32 addrspace(1) * %in, align 4 96 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 97 %num.i24.0 = shl i32 %num, 8 98 %den.i24.0 = shl i32 %den, 7 99 %num.i24 = lshr i32 %num.i24.0, 8 100 %den.i24 = lshr i32 %den.i24.0, 7 101 %result = udiv i32 %num.i24, %den.i24 102 store i32 %result, i32 addrspace(1)* %out, align 4 103 ret void 104 } 105 106 ; FUNC-LABEL: {{^}}test_no_udiv24_i32_2: 107 ; RCP_IFLAG is for URECIP in the full 32b alg 108 ; SI: v_rcp_iflag 109 ; SI-NOT: v_rcp_f32 110 111 ; EG-NOT: UINT_TO_FLT 112 ; EG-NOT: RECIP_IEEE 113 define void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 114 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 115 %num = load i32, i32 addrspace(1) * %in, align 4 116 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 117 %num.i24.0 = shl i32 %num, 7 118 %den.i24.0 = shl i32 %den, 8 119 %num.i24 = lshr i32 %num.i24.0, 7 120 %den.i24 = lshr i32 %den.i24.0, 8 121 %result = udiv i32 %num.i24, %den.i24 122 store i32 %result, i32 addrspace(1)* %out, align 4 123 ret void 124 } 125 126 ; FUNC-LABEL: {{^}}urem24_i8: 127 ; SI: v_cvt_f32_ubyte 128 ; SI: v_cvt_f32_ubyte 129 ; SI: v_rcp_f32 130 ; SI: v_cvt_u32_f32 131 132 ; EG: UINT_TO_FLT 133 ; EG-DAG: UINT_TO_FLT 134 ; EG-DAG: RECIP_IEEE 135 ; EG: FLT_TO_UINT 136 define void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) { 137 %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1 138 %num = load i8, i8 addrspace(1) * %in 139 %den = load i8, i8 addrspace(1) * %den_ptr 140 %result = urem i8 %num, %den 141 store i8 %result, i8 addrspace(1)* %out 142 ret void 143 } 144 145 ; FUNC-LABEL: {{^}}urem24_i16: 146 ; SI: v_cvt_f32_u32 147 ; SI: v_cvt_f32_u32 148 ; SI: v_rcp_f32 149 ; SI: v_cvt_u32_f32 150 151 ; EG: UINT_TO_FLT 152 ; EG-DAG: UINT_TO_FLT 153 ; EG-DAG: RECIP_IEEE 154 ; EG: FLT_TO_UINT 155 define void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 156 %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1 157 %num = load i16, i16 addrspace(1) * %in, align 2 158 %den = load i16, i16 addrspace(1) * %den_ptr, align 2 159 %result = urem i16 %num, %den 160 store i16 %result, i16 addrspace(1)* %out, align 2 161 ret void 162 } 163 164 ; FUNC-LABEL: {{^}}urem24_i32: 165 ; SI: v_cvt_f32_u32 166 ; SI: v_cvt_f32_u32 167 ; SI: v_rcp_f32 168 ; SI: v_cvt_u32_f32 169 170 ; EG: UINT_TO_FLT 171 ; EG-DAG: UINT_TO_FLT 172 ; EG-DAG: RECIP_IEEE 173 ; EG: FLT_TO_UINT 174 define void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 175 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 176 %num = load i32, i32 addrspace(1) * %in, align 4 177 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 178 %num.i24.0 = shl i32 %num, 8 179 %den.i24.0 = shl i32 %den, 8 180 %num.i24 = lshr i32 %num.i24.0, 8 181 %den.i24 = lshr i32 %den.i24.0, 8 182 %result = urem i32 %num.i24, %den.i24 183 store i32 %result, i32 addrspace(1)* %out, align 4 184 ret void 185 } 186 187 ; FUNC-LABEL: {{^}}urem25_i32: 188 ; RCP_IFLAG is for URECIP in the full 32b alg 189 ; SI: v_rcp_iflag 190 ; SI-NOT: v_rcp_f32 191 192 ; EG-NOT: UINT_TO_FLT 193 ; EG-NOT: RECIP_IEEE 194 define void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 195 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 196 %num = load i32, i32 addrspace(1) * %in, align 4 197 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 198 %num.i24.0 = shl i32 %num, 7 199 %den.i24.0 = shl i32 %den, 7 200 %num.i24 = lshr i32 %num.i24.0, 7 201 %den.i24 = lshr i32 %den.i24.0, 7 202 %result = urem i32 %num.i24, %den.i24 203 store i32 %result, i32 addrspace(1)* %out, align 4 204 ret void 205 } 206 207 ; FUNC-LABEL: {{^}}test_no_urem24_i32_1: 208 ; RCP_IFLAG is for URECIP in the full 32b alg 209 ; SI: v_rcp_iflag 210 ; SI-NOT: v_rcp_f32 211 212 ; EG-NOT: UINT_TO_FLT 213 ; EG-NOT: RECIP_IEEE 214 define void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 215 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 216 %num = load i32, i32 addrspace(1) * %in, align 4 217 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 218 %num.i24.0 = shl i32 %num, 8 219 %den.i24.0 = shl i32 %den, 7 220 %num.i24 = lshr i32 %num.i24.0, 8 221 %den.i24 = lshr i32 %den.i24.0, 7 222 %result = urem i32 %num.i24, %den.i24 223 store i32 %result, i32 addrspace(1)* %out, align 4 224 ret void 225 } 226 227 ; FUNC-LABEL: {{^}}test_no_urem24_i32_2: 228 ; RCP_IFLAG is for URECIP in the full 32b alg 229 ; SI: v_rcp_iflag 230 ; SI-NOT: v_rcp_f32 231 232 ; EG-NOT: UINT_TO_FLT 233 ; EG-NOT: RECIP_IEEE 234 define void @test_no_urem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 235 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 236 %num = load i32, i32 addrspace(1) * %in, align 4 237 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 238 %num.i24.0 = shl i32 %num, 7 239 %den.i24.0 = shl i32 %den, 8 240 %num.i24 = lshr i32 %num.i24.0, 7 241 %den.i24 = lshr i32 %den.i24.0, 8 242 %result = urem i32 %num.i24, %den.i24 243 store i32 %result, i32 addrspace(1)* %out, align 4 244 ret void 245 } 246