1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s 3 4 define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { 5 ; CHECK-LABEL: @udiv_i32( 6 ; CHECK-NEXT: [[TMP1:%.*]] = uitofp i32 [[Y:%.*]] to float 7 ; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast float 1.000000e+00, [[TMP1]] 8 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], 0x41F0000000000000 9 ; CHECK-NEXT: [[TMP4:%.*]] = fptoui float [[TMP3]] to i32 10 ; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 11 ; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[Y]] to i64 12 ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP5]], [[TMP6]] 13 ; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 14 ; CHECK-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP7]], 32 15 ; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 16 ; CHECK-NEXT: [[TMP11:%.*]] = sub i32 0, [[TMP8]] 17 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP10]], 0 18 ; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]] 19 ; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 20 ; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP4]] to i64 21 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP14]], [[TMP15]] 22 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[TMP16]] to i32 23 ; CHECK-NEXT: [[TMP18:%.*]] = lshr i64 [[TMP16]], 32 24 ; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 25 ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP4]], [[TMP19]] 26 ; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[TMP4]], [[TMP19]] 27 ; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP12]], i32 [[TMP20]], i32 [[TMP21]] 28 ; CHECK-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 29 ; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[X:%.*]] to i64 30 ; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP23]], [[TMP24]] 31 ; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[TMP25]] to i32 32 ; CHECK-NEXT: [[TMP27:%.*]] = lshr i64 [[TMP25]], 32 33 ; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 34 ; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[TMP28]], [[Y]] 35 ; CHECK-NEXT: [[TMP30:%.*]] = sub i32 [[X]], [[TMP29]] 36 ; CHECK-NEXT: [[TMP31:%.*]] = icmp uge i32 [[TMP30]], [[Y]] 37 ; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 -1, i32 0 38 ; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[X]], [[TMP29]] 39 ; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 40 ; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP32]], [[TMP34]] 41 ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], 0 42 ; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP28]], 1 43 ; CHECK-NEXT: [[TMP38:%.*]] = sub i32 [[TMP28]], 1 44 ; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP28]], i32 [[TMP37]] 45 ; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP33]], i32 [[TMP39]], i32 [[TMP38]] 46 ; CHECK-NEXT: store i32 [[TMP40]], i32 addrspace(1)* [[OUT:%.*]] 47 ; CHECK-NEXT: ret void 48 ; 49 %r = udiv i32 %x, %y 50 store i32 %r, i32 addrspace(1)* %out 51 ret void 52 } 53 54 define amdgpu_kernel void @urem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { 55 ; CHECK-LABEL: @urem_i32( 56 ; CHECK-NEXT: [[TMP1:%.*]] = uitofp i32 [[Y:%.*]] to float 57 ; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast float 1.000000e+00, [[TMP1]] 58 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], 0x41F0000000000000 59 ; CHECK-NEXT: [[TMP4:%.*]] = fptoui float [[TMP3]] to i32 60 ; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 61 ; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[Y]] to i64 62 ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP5]], [[TMP6]] 63 ; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 64 ; CHECK-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP7]], 32 65 ; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 66 ; CHECK-NEXT: [[TMP11:%.*]] = sub i32 0, [[TMP8]] 67 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP10]], 0 68 ; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]] 69 ; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 70 ; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP4]] to i64 71 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP14]], [[TMP15]] 72 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[TMP16]] to i32 73 ; CHECK-NEXT: [[TMP18:%.*]] = lshr i64 [[TMP16]], 32 74 ; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 75 ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP4]], [[TMP19]] 76 ; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[TMP4]], [[TMP19]] 77 ; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP12]], i32 [[TMP20]], i32 [[TMP21]] 78 ; CHECK-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 79 ; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[X:%.*]] to i64 80 ; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP23]], [[TMP24]] 81 ; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[TMP25]] to i32 82 ; CHECK-NEXT: [[TMP27:%.*]] = lshr i64 [[TMP25]], 32 83 ; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 84 ; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[TMP28]], [[Y]] 85 ; CHECK-NEXT: [[TMP30:%.*]] = sub i32 [[X]], [[TMP29]] 86 ; CHECK-NEXT: [[TMP31:%.*]] = icmp uge i32 [[TMP30]], [[Y]] 87 ; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 -1, i32 0 88 ; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[X]], [[TMP29]] 89 ; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 90 ; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP32]], [[TMP34]] 91 ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], 0 92 ; CHECK-NEXT: [[TMP37:%.*]] = sub i32 [[TMP30]], [[Y]] 93 ; CHECK-NEXT: [[TMP38:%.*]] = add i32 [[TMP30]], [[Y]] 94 ; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP30]], i32 [[TMP37]] 95 ; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP33]], i32 [[TMP39]], i32 [[TMP38]] 96 ; CHECK-NEXT: store i32 [[TMP40]], i32 addrspace(1)* [[OUT:%.*]] 97 ; CHECK-NEXT: ret void 98 ; 99 %r = urem i32 %x, %y 100 store i32 %r, i32 addrspace(1)* %out 101 ret void 102 } 103 104 define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { 105 ; CHECK-LABEL: @sdiv_i32( 106 ; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 31 107 ; CHECK-NEXT: [[TMP2:%.*]] = ashr i32 [[Y:%.*]], 31 108 ; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] 109 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[X]], [[TMP1]] 110 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[Y]], [[TMP2]] 111 ; CHECK-NEXT: [[TMP6:%.*]] = xor i32 [[TMP4]], [[TMP1]] 112 ; CHECK-NEXT: [[TMP7:%.*]] = xor i32 [[TMP5]], [[TMP2]] 113 ; CHECK-NEXT: [[TMP8:%.*]] = uitofp i32 [[TMP7]] to float 114 ; CHECK-NEXT: [[TMP9:%.*]] = fdiv fast float 1.000000e+00, [[TMP8]] 115 ; CHECK-NEXT: [[TMP10:%.*]] = fmul fast float [[TMP9]], 0x41F0000000000000 116 ; CHECK-NEXT: [[TMP11:%.*]] = fptoui float [[TMP10]] to i32 117 ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 118 ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP7]] to i64 119 ; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP12]], [[TMP13]] 120 ; CHECK-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 121 ; CHECK-NEXT: [[TMP16:%.*]] = lshr i64 [[TMP14]], 32 122 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[TMP16]] to i32 123 ; CHECK-NEXT: [[TMP18:%.*]] = sub i32 0, [[TMP15]] 124 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP17]], 0 125 ; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 [[TMP15]] 126 ; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 127 ; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP11]] to i64 128 ; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP21]], [[TMP22]] 129 ; CHECK-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 130 ; CHECK-NEXT: [[TMP25:%.*]] = lshr i64 [[TMP23]], 32 131 ; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[TMP25]] to i32 132 ; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[TMP11]], [[TMP26]] 133 ; CHECK-NEXT: [[TMP28:%.*]] = sub i32 [[TMP11]], [[TMP26]] 134 ; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP19]], i32 [[TMP27]], i32 [[TMP28]] 135 ; CHECK-NEXT: [[TMP30:%.*]] = zext i32 [[TMP29]] to i64 136 ; CHECK-NEXT: [[TMP31:%.*]] = zext i32 [[TMP6]] to i64 137 ; CHECK-NEXT: [[TMP32:%.*]] = mul i64 [[TMP30]], [[TMP31]] 138 ; CHECK-NEXT: [[TMP33:%.*]] = trunc i64 [[TMP32]] to i32 139 ; CHECK-NEXT: [[TMP34:%.*]] = lshr i64 [[TMP32]], 32 140 ; CHECK-NEXT: [[TMP35:%.*]] = trunc i64 [[TMP34]] to i32 141 ; CHECK-NEXT: [[TMP36:%.*]] = mul i32 [[TMP35]], [[TMP7]] 142 ; CHECK-NEXT: [[TMP37:%.*]] = sub i32 [[TMP6]], [[TMP36]] 143 ; CHECK-NEXT: [[TMP38:%.*]] = icmp uge i32 [[TMP37]], [[TMP7]] 144 ; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 -1, i32 0 145 ; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i32 [[TMP6]], [[TMP36]] 146 ; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 -1, i32 0 147 ; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP39]], [[TMP41]] 148 ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[TMP42]], 0 149 ; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP35]], 1 150 ; CHECK-NEXT: [[TMP45:%.*]] = sub i32 [[TMP35]], 1 151 ; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP43]], i32 [[TMP35]], i32 [[TMP44]] 152 ; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP40]], i32 [[TMP46]], i32 [[TMP45]] 153 ; CHECK-NEXT: [[TMP48:%.*]] = xor i32 [[TMP47]], [[TMP3]] 154 ; CHECK-NEXT: [[TMP49:%.*]] = sub i32 [[TMP48]], [[TMP3]] 155 ; CHECK-NEXT: store i32 [[TMP49]], i32 addrspace(1)* [[OUT:%.*]] 156 ; CHECK-NEXT: ret void 157 ; 158 %r = sdiv i32 %x, %y 159 store i32 %r, i32 addrspace(1)* %out 160 ret void 161 } 162 163 define amdgpu_kernel void @srem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { 164 ; CHECK-LABEL: @srem_i32( 165 ; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 31 166 ; CHECK-NEXT: [[TMP2:%.*]] = ashr i32 [[Y:%.*]], 31 167 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[X]], [[TMP1]] 168 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[Y]], [[TMP2]] 169 ; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP1]] 170 ; CHECK-NEXT: [[TMP6:%.*]] = xor i32 [[TMP4]], [[TMP2]] 171 ; CHECK-NEXT: [[TMP7:%.*]] = uitofp i32 [[TMP6]] to float 172 ; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] 173 ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP8]], 0x41F0000000000000 174 ; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP9]] to i32 175 ; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 176 ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP6]] to i64 177 ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP11]], [[TMP12]] 178 ; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[TMP13]] to i32 179 ; CHECK-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP13]], 32 180 ; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32 181 ; CHECK-NEXT: [[TMP17:%.*]] = sub i32 0, [[TMP14]] 182 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], 0 183 ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32 [[TMP14]] 184 ; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 185 ; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP10]] to i64 186 ; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP20]], [[TMP21]] 187 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i64 [[TMP22]] to i32 188 ; CHECK-NEXT: [[TMP24:%.*]] = lshr i64 [[TMP22]], 32 189 ; CHECK-NEXT: [[TMP25:%.*]] = trunc i64 [[TMP24]] to i32 190 ; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[TMP10]], [[TMP25]] 191 ; CHECK-NEXT: [[TMP27:%.*]] = sub i32 [[TMP10]], [[TMP25]] 192 ; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP18]], i32 [[TMP26]], i32 [[TMP27]] 193 ; CHECK-NEXT: [[TMP29:%.*]] = zext i32 [[TMP28]] to i64 194 ; CHECK-NEXT: [[TMP30:%.*]] = zext i32 [[TMP5]] to i64 195 ; CHECK-NEXT: [[TMP31:%.*]] = mul i64 [[TMP29]], [[TMP30]] 196 ; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP31]] to i32 197 ; CHECK-NEXT: [[TMP33:%.*]] = lshr i64 [[TMP31]], 32 198 ; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[TMP33]] to i32 199 ; CHECK-NEXT: [[TMP35:%.*]] = mul i32 [[TMP34]], [[TMP6]] 200 ; CHECK-NEXT: [[TMP36:%.*]] = sub i32 [[TMP5]], [[TMP35]] 201 ; CHECK-NEXT: [[TMP37:%.*]] = icmp uge i32 [[TMP36]], [[TMP6]] 202 ; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 -1, i32 0 203 ; CHECK-NEXT: [[TMP39:%.*]] = icmp uge i32 [[TMP5]], [[TMP35]] 204 ; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 -1, i32 0 205 ; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP38]], [[TMP40]] 206 ; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[TMP41]], 0 207 ; CHECK-NEXT: [[TMP43:%.*]] = sub i32 [[TMP36]], [[TMP6]] 208 ; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP36]], [[TMP6]] 209 ; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP42]], i32 [[TMP36]], i32 [[TMP43]] 210 ; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP39]], i32 [[TMP45]], i32 [[TMP44]] 211 ; CHECK-NEXT: [[TMP47:%.*]] = xor i32 [[TMP46]], [[TMP1]] 212 ; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP47]], [[TMP1]] 213 ; CHECK-NEXT: store i32 [[TMP48]], i32 addrspace(1)* [[OUT:%.*]] 214 ; CHECK-NEXT: ret void 215 ; 216 %r = srem i32 %x, %y 217 store i32 %r, i32 addrspace(1)* %out 218 ret void 219 } 220 221 define amdgpu_kernel void @udiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { 222 ; CHECK-LABEL: @udiv_i16( 223 ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[X:%.*]] to i32 224 ; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[Y:%.*]] to i32 225 ; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP1]] to float 226 ; CHECK-NEXT: [[TMP4:%.*]] = uitofp i32 [[TMP2]] to float 227 ; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] 228 ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] 229 ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) 230 ; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] 231 ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) 232 ; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 233 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 234 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) 235 ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] 236 ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 1, i32 0 237 ; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP10]], [[TMP14]] 238 ; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 65535 239 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 240 ; CHECK-NEXT: store i16 [[TMP17]], i16 addrspace(1)* [[OUT:%.*]] 241 ; CHECK-NEXT: ret void 242 ; 243 %r = udiv i16 %x, %y 244 store i16 %r, i16 addrspace(1)* %out 245 ret void 246 } 247 248 define amdgpu_kernel void @urem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { 249 ; CHECK-LABEL: @urem_i16( 250 ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[X:%.*]] to i32 251 ; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[Y:%.*]] to i32 252 ; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP1]] to float 253 ; CHECK-NEXT: [[TMP4:%.*]] = uitofp i32 [[TMP2]] to float 254 ; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] 255 ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] 256 ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) 257 ; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] 258 ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) 259 ; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 260 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 261 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) 262 ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] 263 ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 1, i32 0 264 ; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP10]], [[TMP14]] 265 ; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], [[TMP2]] 266 ; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP1]], [[TMP16]] 267 ; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 65535 268 ; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 269 ; CHECK-NEXT: store i16 [[TMP19]], i16 addrspace(1)* [[OUT:%.*]] 270 ; CHECK-NEXT: ret void 271 ; 272 %r = urem i16 %x, %y 273 store i16 %r, i16 addrspace(1)* %out 274 ret void 275 } 276 277 define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { 278 ; CHECK-LABEL: @sdiv_i16( 279 ; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32 280 ; CHECK-NEXT: [[TMP2:%.*]] = sext i16 [[Y:%.*]] to i32 281 ; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] 282 ; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 283 ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 284 ; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float 285 ; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float 286 ; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] 287 ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] 288 ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) 289 ; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] 290 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) 291 ; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 292 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) 293 ; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) 294 ; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] 295 ; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0 296 ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] 297 ; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 298 ; CHECK-NEXT: [[TMP20:%.*]] = sext i16 [[TMP19]] to i32 299 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 300 ; CHECK-NEXT: store i16 [[TMP21]], i16 addrspace(1)* [[OUT:%.*]] 301 ; CHECK-NEXT: ret void 302 ; 303 %r = sdiv i16 %x, %y 304 store i16 %r, i16 addrspace(1)* %out 305 ret void 306 } 307 308 define amdgpu_kernel void @srem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { 309 ; CHECK-LABEL: @srem_i16( 310 ; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32 311 ; CHECK-NEXT: [[TMP2:%.*]] = sext i16 [[Y:%.*]] to i32 312 ; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] 313 ; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 314 ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 315 ; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float 316 ; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float 317 ; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] 318 ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] 319 ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) 320 ; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] 321 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) 322 ; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 323 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) 324 ; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) 325 ; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] 326 ; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0 327 ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] 328 ; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], [[TMP2]] 329 ; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP1]], [[TMP19]] 330 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 331 ; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 332 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16 333 ; CHECK-NEXT: store i16 [[TMP23]], i16 addrspace(1)* [[OUT:%.*]] 334 ; CHECK-NEXT: ret void 335 ; 336 %r = srem i16 %x, %y 337 store i16 %r, i16 addrspace(1)* %out 338 ret void 339 } 340 341 define amdgpu_kernel void @udiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { 342 ; CHECK-LABEL: @udiv_i8( 343 ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[X:%.*]] to i32 344 ; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[Y:%.*]] to i32 345 ; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP1]] to float 346 ; CHECK-NEXT: [[TMP4:%.*]] = uitofp i32 [[TMP2]] to float 347 ; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] 348 ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] 349 ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) 350 ; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] 351 ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) 352 ; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 353 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 354 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) 355 ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] 356 ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 1, i32 0 357 ; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP10]], [[TMP14]] 358 ; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 255 359 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 360 ; CHECK-NEXT: store i8 [[TMP17]], i8 addrspace(1)* [[OUT:%.*]] 361 ; CHECK-NEXT: ret void 362 ; 363 %r = udiv i8 %x, %y 364 store i8 %r, i8 addrspace(1)* %out 365 ret void 366 } 367 368 define amdgpu_kernel void @urem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { 369 ; CHECK-LABEL: @urem_i8( 370 ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[X:%.*]] to i32 371 ; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[Y:%.*]] to i32 372 ; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP1]] to float 373 ; CHECK-NEXT: [[TMP4:%.*]] = uitofp i32 [[TMP2]] to float 374 ; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] 375 ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] 376 ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) 377 ; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] 378 ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) 379 ; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 380 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 381 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) 382 ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] 383 ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 1, i32 0 384 ; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP10]], [[TMP14]] 385 ; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], [[TMP2]] 386 ; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP1]], [[TMP16]] 387 ; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 255 388 ; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i8 389 ; CHECK-NEXT: store i8 [[TMP19]], i8 addrspace(1)* [[OUT:%.*]] 390 ; CHECK-NEXT: ret void 391 ; 392 %r = urem i8 %x, %y 393 store i8 %r, i8 addrspace(1)* %out 394 ret void 395 } 396 397 define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { 398 ; CHECK-LABEL: @sdiv_i8( 399 ; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32 400 ; CHECK-NEXT: [[TMP2:%.*]] = sext i8 [[Y:%.*]] to i32 401 ; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] 402 ; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 403 ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 404 ; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float 405 ; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float 406 ; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] 407 ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] 408 ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) 409 ; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] 410 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) 411 ; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 412 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) 413 ; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) 414 ; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] 415 ; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0 416 ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] 417 ; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i8 418 ; CHECK-NEXT: [[TMP20:%.*]] = sext i8 [[TMP19]] to i32 419 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i8 420 ; CHECK-NEXT: store i8 [[TMP21]], i8 addrspace(1)* [[OUT:%.*]] 421 ; CHECK-NEXT: ret void 422 ; 423 %r = sdiv i8 %x, %y 424 store i8 %r, i8 addrspace(1)* %out 425 ret void 426 } 427 428 define amdgpu_kernel void @srem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { 429 ; CHECK-LABEL: @srem_i8( 430 ; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32 431 ; CHECK-NEXT: [[TMP2:%.*]] = sext i8 [[Y:%.*]] to i32 432 ; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] 433 ; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 434 ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 435 ; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float 436 ; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float 437 ; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] 438 ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] 439 ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) 440 ; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] 441 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) 442 ; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 443 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) 444 ; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) 445 ; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] 446 ; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0 447 ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] 448 ; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], [[TMP2]] 449 ; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP1]], [[TMP19]] 450 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i8 451 ; CHECK-NEXT: [[TMP22:%.*]] = sext i8 [[TMP21]] to i32 452 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i8 453 ; CHECK-NEXT: store i8 [[TMP23]], i8 addrspace(1)* [[OUT:%.*]] 454 ; CHECK-NEXT: ret void 455 ; 456 %r = srem i8 %x, %y 457 store i8 %r, i8 addrspace(1)* %out 458 ret void 459 } 460 461 define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { 462 ; CHECK-LABEL: @udiv_v4i32( 463 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i64 0 464 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[Y:%.*]], i64 0 465 ; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float 466 ; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast float 1.000000e+00, [[TMP3]] 467 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 0x41F0000000000000 468 ; CHECK-NEXT: [[TMP6:%.*]] = fptoui float [[TMP5]] to i32 469 ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 470 ; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP2]] to i64 471 ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP7]], [[TMP8]] 472 ; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 473 ; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP9]], 32 474 ; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32 475 ; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP10]] 476 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], 0 477 ; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP10]] 478 ; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 479 ; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP6]] to i64 480 ; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP16]], [[TMP17]] 481 ; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 482 ; CHECK-NEXT: [[TMP20:%.*]] = lshr i64 [[TMP18]], 32 483 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32 484 ; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP6]], [[TMP21]] 485 ; CHECK-NEXT: [[TMP23:%.*]] = sub i32 [[TMP6]], [[TMP21]] 486 ; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP14]], i32 [[TMP22]], i32 [[TMP23]] 487 ; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 488 ; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP1]] to i64 489 ; CHECK-NEXT: [[TMP27:%.*]] = mul i64 [[TMP25]], [[TMP26]] 490 ; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 491 ; CHECK-NEXT: [[TMP29:%.*]] = lshr i64 [[TMP27]], 32 492 ; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 493 ; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], [[TMP2]] 494 ; CHECK-NEXT: [[TMP32:%.*]] = sub i32 [[TMP1]], [[TMP31]] 495 ; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[TMP32]], [[TMP2]] 496 ; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 497 ; CHECK-NEXT: [[TMP35:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] 498 ; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 -1, i32 0 499 ; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP34]], [[TMP36]] 500 ; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[TMP37]], 0 501 ; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[TMP30]], 1 502 ; CHECK-NEXT: [[TMP40:%.*]] = sub i32 [[TMP30]], 1 503 ; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP38]], i32 [[TMP30]], i32 [[TMP39]] 504 ; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], i32 [[TMP41]], i32 [[TMP40]] 505 ; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> undef, i32 [[TMP42]], i64 0 506 ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i32> [[X]], i64 1 507 ; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i32> [[Y]], i64 1 508 ; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP45]] to float 509 ; CHECK-NEXT: [[TMP47:%.*]] = fdiv fast float 1.000000e+00, [[TMP46]] 510 ; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP47]], 0x41F0000000000000 511 ; CHECK-NEXT: [[TMP49:%.*]] = fptoui float [[TMP48]] to i32 512 ; CHECK-NEXT: [[TMP50:%.*]] = zext i32 [[TMP49]] to i64 513 ; CHECK-NEXT: [[TMP51:%.*]] = zext i32 [[TMP45]] to i64 514 ; CHECK-NEXT: [[TMP52:%.*]] = mul i64 [[TMP50]], [[TMP51]] 515 ; CHECK-NEXT: [[TMP53:%.*]] = trunc i64 [[TMP52]] to i32 516 ; CHECK-NEXT: [[TMP54:%.*]] = lshr i64 [[TMP52]], 32 517 ; CHECK-NEXT: [[TMP55:%.*]] = trunc i64 [[TMP54]] to i32 518 ; CHECK-NEXT: [[TMP56:%.*]] = sub i32 0, [[TMP53]] 519 ; CHECK-NEXT: [[TMP57:%.*]] = icmp eq i32 [[TMP55]], 0 520 ; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 [[TMP53]] 521 ; CHECK-NEXT: [[TMP59:%.*]] = zext i32 [[TMP58]] to i64 522 ; CHECK-NEXT: [[TMP60:%.*]] = zext i32 [[TMP49]] to i64 523 ; CHECK-NEXT: [[TMP61:%.*]] = mul i64 [[TMP59]], [[TMP60]] 524 ; CHECK-NEXT: [[TMP62:%.*]] = trunc i64 [[TMP61]] to i32 525 ; CHECK-NEXT: [[TMP63:%.*]] = lshr i64 [[TMP61]], 32 526 ; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[TMP63]] to i32 527 ; CHECK-NEXT: [[TMP65:%.*]] = add i32 [[TMP49]], [[TMP64]] 528 ; CHECK-NEXT: [[TMP66:%.*]] = sub i32 [[TMP49]], [[TMP64]] 529 ; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP57]], i32 [[TMP65]], i32 [[TMP66]] 530 ; CHECK-NEXT: [[TMP68:%.*]] = zext i32 [[TMP67]] to i64 531 ; CHECK-NEXT: [[TMP69:%.*]] = zext i32 [[TMP44]] to i64 532 ; CHECK-NEXT: [[TMP70:%.*]] = mul i64 [[TMP68]], [[TMP69]] 533 ; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 534 ; CHECK-NEXT: [[TMP72:%.*]] = lshr i64 [[TMP70]], 32 535 ; CHECK-NEXT: [[TMP73:%.*]] = trunc i64 [[TMP72]] to i32 536 ; CHECK-NEXT: [[TMP74:%.*]] = mul i32 [[TMP73]], [[TMP45]] 537 ; CHECK-NEXT: [[TMP75:%.*]] = sub i32 [[TMP44]], [[TMP74]] 538 ; CHECK-NEXT: [[TMP76:%.*]] = icmp uge i32 [[TMP75]], [[TMP45]] 539 ; CHECK-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], i32 -1, i32 0 540 ; CHECK-NEXT: [[TMP78:%.*]] = icmp uge i32 [[TMP44]], [[TMP74]] 541 ; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 -1, i32 0 542 ; CHECK-NEXT: [[TMP80:%.*]] = and i32 [[TMP77]], [[TMP79]] 543 ; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i32 [[TMP80]], 0 544 ; CHECK-NEXT: [[TMP82:%.*]] = add i32 [[TMP73]], 1 545 ; CHECK-NEXT: [[TMP83:%.*]] = sub i32 [[TMP73]], 1 546 ; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP81]], i32 [[TMP73]], i32 [[TMP82]] 547 ; CHECK-NEXT: [[TMP85:%.*]] = select i1 [[TMP78]], i32 [[TMP84]], i32 [[TMP83]] 548 ; CHECK-NEXT: [[TMP86:%.*]] = insertelement <4 x i32> [[TMP43]], i32 [[TMP85]], i64 1 549 ; CHECK-NEXT: [[TMP87:%.*]] = extractelement <4 x i32> [[X]], i64 2 550 ; CHECK-NEXT: [[TMP88:%.*]] = extractelement <4 x i32> [[Y]], i64 2 551 ; CHECK-NEXT: [[TMP89:%.*]] = uitofp i32 [[TMP88]] to float 552 ; CHECK-NEXT: [[TMP90:%.*]] = fdiv fast float 1.000000e+00, [[TMP89]] 553 ; CHECK-NEXT: [[TMP91:%.*]] = fmul fast float [[TMP90]], 0x41F0000000000000 554 ; CHECK-NEXT: [[TMP92:%.*]] = fptoui float [[TMP91]] to i32 555 ; CHECK-NEXT: [[TMP93:%.*]] = zext i32 [[TMP92]] to i64 556 ; CHECK-NEXT: [[TMP94:%.*]] = zext i32 [[TMP88]] to i64 557 ; CHECK-NEXT: [[TMP95:%.*]] = mul i64 [[TMP93]], [[TMP94]] 558 ; CHECK-NEXT: [[TMP96:%.*]] = trunc i64 [[TMP95]] to i32 559 ; CHECK-NEXT: [[TMP97:%.*]] = lshr i64 [[TMP95]], 32 560 ; CHECK-NEXT: [[TMP98:%.*]] = trunc i64 [[TMP97]] to i32 561 ; CHECK-NEXT: [[TMP99:%.*]] = sub i32 0, [[TMP96]] 562 ; CHECK-NEXT: [[TMP100:%.*]] = icmp eq i32 [[TMP98]], 0 563 ; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[TMP99]], i32 [[TMP96]] 564 ; CHECK-NEXT: [[TMP102:%.*]] = zext i32 [[TMP101]] to i64 565 ; CHECK-NEXT: [[TMP103:%.*]] = zext i32 [[TMP92]] to i64 566 ; CHECK-NEXT: [[TMP104:%.*]] = mul i64 [[TMP102]], [[TMP103]] 567 ; CHECK-NEXT: [[TMP105:%.*]] = trunc i64 [[TMP104]] to i32 568 ; CHECK-NEXT: [[TMP106:%.*]] = lshr i64 [[TMP104]], 32 569 ; CHECK-NEXT: [[TMP107:%.*]] = trunc i64 [[TMP106]] to i32 570 ; CHECK-NEXT: [[TMP108:%.*]] = add i32 [[TMP92]], [[TMP107]] 571 ; CHECK-NEXT: [[TMP109:%.*]] = sub i32 [[TMP92]], [[TMP107]] 572 ; CHECK-NEXT: [[TMP110:%.*]] = select i1 [[TMP100]], i32 [[TMP108]], i32 [[TMP109]] 573 ; CHECK-NEXT: [[TMP111:%.*]] = zext i32 [[TMP110]] to i64 574 ; CHECK-NEXT: [[TMP112:%.*]] = zext i32 [[TMP87]] to i64 575 ; CHECK-NEXT: [[TMP113:%.*]] = mul i64 [[TMP111]], [[TMP112]] 576 ; CHECK-NEXT: [[TMP114:%.*]] = trunc i64 [[TMP113]] to i32 577 ; CHECK-NEXT: [[TMP115:%.*]] = lshr i64 [[TMP113]], 32 578 ; CHECK-NEXT: [[TMP116:%.*]] = trunc i64 [[TMP115]] to i32 579 ; CHECK-NEXT: [[TMP117:%.*]] = mul i32 [[TMP116]], [[TMP88]] 580 ; CHECK-NEXT: [[TMP118:%.*]] = sub i32 [[TMP87]], [[TMP117]] 581 ; CHECK-NEXT: [[TMP119:%.*]] = icmp uge i32 [[TMP118]], [[TMP88]] 582 ; CHECK-NEXT: [[TMP120:%.*]] = select i1 [[TMP119]], i32 -1, i32 0 583 ; CHECK-NEXT: [[TMP121:%.*]] = icmp uge i32 [[TMP87]], [[TMP117]] 584 ; CHECK-NEXT: [[TMP122:%.*]] = select i1 [[TMP121]], i32 -1, i32 0 585 ; CHECK-NEXT: [[TMP123:%.*]] = and i32 [[TMP120]], [[TMP122]] 586 ; CHECK-NEXT: [[TMP124:%.*]] = icmp eq i32 [[TMP123]], 0 587 ; CHECK-NEXT: [[TMP125:%.*]] = add i32 [[TMP116]], 1 588 ; CHECK-NEXT: [[TMP126:%.*]] = sub i32 [[TMP116]], 1 589 ; CHECK-NEXT: [[TMP127:%.*]] = select i1 [[TMP124]], i32 [[TMP116]], i32 [[TMP125]] 590 ; CHECK-NEXT: [[TMP128:%.*]] = select i1 [[TMP121]], i32 [[TMP127]], i32 [[TMP126]] 591 ; CHECK-NEXT: [[TMP129:%.*]] = insertelement <4 x i32> [[TMP86]], i32 [[TMP128]], i64 2 592 ; CHECK-NEXT: [[TMP130:%.*]] = extractelement <4 x i32> [[X]], i64 3 593 ; CHECK-NEXT: [[TMP131:%.*]] = extractelement <4 x i32> [[Y]], i64 3 594 ; CHECK-NEXT: [[TMP132:%.*]] = uitofp i32 [[TMP131]] to float 595 ; CHECK-NEXT: [[TMP133:%.*]] = fdiv fast float 1.000000e+00, [[TMP132]] 596 ; CHECK-NEXT: [[TMP134:%.*]] = fmul fast float [[TMP133]], 0x41F0000000000000 597 ; CHECK-NEXT: [[TMP135:%.*]] = fptoui float [[TMP134]] to i32 598 ; CHECK-NEXT: [[TMP136:%.*]] = zext i32 [[TMP135]] to i64 599 ; CHECK-NEXT: [[TMP137:%.*]] = zext i32 [[TMP131]] to i64 600 ; CHECK-NEXT: [[TMP138:%.*]] = mul i64 [[TMP136]], [[TMP137]] 601 ; CHECK-NEXT: [[TMP139:%.*]] = trunc i64 [[TMP138]] to i32 602 ; CHECK-NEXT: [[TMP140:%.*]] = lshr i64 [[TMP138]], 32 603 ; CHECK-NEXT: [[TMP141:%.*]] = trunc i64 [[TMP140]] to i32 604 ; CHECK-NEXT: [[TMP142:%.*]] = sub i32 0, [[TMP139]] 605 ; CHECK-NEXT: [[TMP143:%.*]] = icmp eq i32 [[TMP141]], 0 606 ; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP143]], i32 [[TMP142]], i32 [[TMP139]] 607 ; CHECK-NEXT: [[TMP145:%.*]] = zext i32 [[TMP144]] to i64 608 ; CHECK-NEXT: [[TMP146:%.*]] = zext i32 [[TMP135]] to i64 609 ; CHECK-NEXT: [[TMP147:%.*]] = mul i64 [[TMP145]], [[TMP146]] 610 ; CHECK-NEXT: [[TMP148:%.*]] = trunc i64 [[TMP147]] to i32 611 ; CHECK-NEXT: [[TMP149:%.*]] = lshr i64 [[TMP147]], 32 612 ; CHECK-NEXT: [[TMP150:%.*]] = trunc i64 [[TMP149]] to i32 613 ; CHECK-NEXT: [[TMP151:%.*]] = add i32 [[TMP135]], [[TMP150]] 614 ; CHECK-NEXT: [[TMP152:%.*]] = sub i32 [[TMP135]], [[TMP150]] 615 ; CHECK-NEXT: [[TMP153:%.*]] = select i1 [[TMP143]], i32 [[TMP151]], i32 [[TMP152]] 616 ; CHECK-NEXT: [[TMP154:%.*]] = zext i32 [[TMP153]] to i64 617 ; CHECK-NEXT: [[TMP155:%.*]] = zext i32 [[TMP130]] to i64 618 ; CHECK-NEXT: [[TMP156:%.*]] = mul i64 [[TMP154]], [[TMP155]] 619 ; CHECK-NEXT: [[TMP157:%.*]] = trunc i64 [[TMP156]] to i32 620 ; CHECK-NEXT: [[TMP158:%.*]] = lshr i64 [[TMP156]], 32 621 ; CHECK-NEXT: [[TMP159:%.*]] = trunc i64 [[TMP158]] to i32 622 ; CHECK-NEXT: [[TMP160:%.*]] = mul i32 [[TMP159]], [[TMP131]] 623 ; CHECK-NEXT: [[TMP161:%.*]] = sub i32 [[TMP130]], [[TMP160]] 624 ; CHECK-NEXT: [[TMP162:%.*]] = icmp uge i32 [[TMP161]], [[TMP131]] 625 ; CHECK-NEXT: [[TMP163:%.*]] = select i1 [[TMP162]], i32 -1, i32 0 626 ; CHECK-NEXT: [[TMP164:%.*]] = icmp uge i32 [[TMP130]], [[TMP160]] 627 ; CHECK-NEXT: [[TMP165:%.*]] = select i1 [[TMP164]], i32 -1, i32 0 628 ; CHECK-NEXT: [[TMP166:%.*]] = and i32 [[TMP163]], [[TMP165]] 629 ; CHECK-NEXT: [[TMP167:%.*]] = icmp eq i32 [[TMP166]], 0 630 ; CHECK-NEXT: [[TMP168:%.*]] = add i32 [[TMP159]], 1 631 ; CHECK-NEXT: [[TMP169:%.*]] = sub i32 [[TMP159]], 1 632 ; CHECK-NEXT: [[TMP170:%.*]] = select i1 [[TMP167]], i32 [[TMP159]], i32 [[TMP168]] 633 ; CHECK-NEXT: [[TMP171:%.*]] = select i1 [[TMP164]], i32 [[TMP170]], i32 [[TMP169]] 634 ; CHECK-NEXT: [[TMP172:%.*]] = insertelement <4 x i32> [[TMP129]], i32 [[TMP171]], i64 3 635 ; CHECK-NEXT: store <4 x i32> [[TMP172]], <4 x i32> addrspace(1)* [[OUT:%.*]] 636 ; CHECK-NEXT: ret void 637 ; 638 %r = udiv <4 x i32> %x, %y 639 store <4 x i32> %r, <4 x i32> addrspace(1)* %out 640 ret void 641 } 642 643 define amdgpu_kernel void @urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { 644 ; CHECK-LABEL: @urem_v4i32( 645 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i64 0 646 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[Y:%.*]], i64 0 647 ; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float 648 ; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast float 1.000000e+00, [[TMP3]] 649 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 0x41F0000000000000 650 ; CHECK-NEXT: [[TMP6:%.*]] = fptoui float [[TMP5]] to i32 651 ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 652 ; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP2]] to i64 653 ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP7]], [[TMP8]] 654 ; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 655 ; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP9]], 32 656 ; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32 657 ; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP10]] 658 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], 0 659 ; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP10]] 660 ; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 661 ; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP6]] to i64 662 ; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP16]], [[TMP17]] 663 ; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 664 ; CHECK-NEXT: [[TMP20:%.*]] = lshr i64 [[TMP18]], 32 665 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32 666 ; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP6]], [[TMP21]] 667 ; CHECK-NEXT: [[TMP23:%.*]] = sub i32 [[TMP6]], [[TMP21]] 668 ; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP14]], i32 [[TMP22]], i32 [[TMP23]] 669 ; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 670 ; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP1]] to i64 671 ; CHECK-NEXT: [[TMP27:%.*]] = mul i64 [[TMP25]], [[TMP26]] 672 ; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 673 ; CHECK-NEXT: [[TMP29:%.*]] = lshr i64 [[TMP27]], 32 674 ; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 675 ; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], [[TMP2]] 676 ; CHECK-NEXT: [[TMP32:%.*]] = sub i32 [[TMP1]], [[TMP31]] 677 ; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[TMP32]], [[TMP2]] 678 ; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 679 ; CHECK-NEXT: [[TMP35:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] 680 ; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 -1, i32 0 681 ; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP34]], [[TMP36]] 682 ; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[TMP37]], 0 683 ; CHECK-NEXT: [[TMP39:%.*]] = sub i32 [[TMP32]], [[TMP2]] 684 ; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[TMP32]], [[TMP2]] 685 ; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP38]], i32 [[TMP32]], i32 [[TMP39]] 686 ; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], i32 [[TMP41]], i32 [[TMP40]] 687 ; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> undef, i32 [[TMP42]], i64 0 688 ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i32> [[X]], i64 1 689 ; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i32> [[Y]], i64 1 690 ; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP45]] to float 691 ; CHECK-NEXT: [[TMP47:%.*]] = fdiv fast float 1.000000e+00, [[TMP46]] 692 ; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP47]], 0x41F0000000000000 693 ; CHECK-NEXT: [[TMP49:%.*]] = fptoui float [[TMP48]] to i32 694 ; CHECK-NEXT: [[TMP50:%.*]] = zext i32 [[TMP49]] to i64 695 ; CHECK-NEXT: [[TMP51:%.*]] = zext i32 [[TMP45]] to i64 696 ; CHECK-NEXT: [[TMP52:%.*]] = mul i64 [[TMP50]], [[TMP51]] 697 ; CHECK-NEXT: [[TMP53:%.*]] = trunc i64 [[TMP52]] to i32 698 ; CHECK-NEXT: [[TMP54:%.*]] = lshr i64 [[TMP52]], 32 699 ; CHECK-NEXT: [[TMP55:%.*]] = trunc i64 [[TMP54]] to i32 700 ; CHECK-NEXT: [[TMP56:%.*]] = sub i32 0, [[TMP53]] 701 ; CHECK-NEXT: [[TMP57:%.*]] = icmp eq i32 [[TMP55]], 0 702 ; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 [[TMP53]] 703 ; CHECK-NEXT: [[TMP59:%.*]] = zext i32 [[TMP58]] to i64 704 ; CHECK-NEXT: [[TMP60:%.*]] = zext i32 [[TMP49]] to i64 705 ; CHECK-NEXT: [[TMP61:%.*]] = mul i64 [[TMP59]], [[TMP60]] 706 ; CHECK-NEXT: [[TMP62:%.*]] = trunc i64 [[TMP61]] to i32 707 ; CHECK-NEXT: [[TMP63:%.*]] = lshr i64 [[TMP61]], 32 708 ; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[TMP63]] to i32 709 ; CHECK-NEXT: [[TMP65:%.*]] = add i32 [[TMP49]], [[TMP64]] 710 ; CHECK-NEXT: [[TMP66:%.*]] = sub i32 [[TMP49]], [[TMP64]] 711 ; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP57]], i32 [[TMP65]], i32 [[TMP66]] 712 ; CHECK-NEXT: [[TMP68:%.*]] = zext i32 [[TMP67]] to i64 713 ; CHECK-NEXT: [[TMP69:%.*]] = zext i32 [[TMP44]] to i64 714 ; CHECK-NEXT: [[TMP70:%.*]] = mul i64 [[TMP68]], [[TMP69]] 715 ; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 716 ; CHECK-NEXT: [[TMP72:%.*]] = lshr i64 [[TMP70]], 32 717 ; CHECK-NEXT: [[TMP73:%.*]] = trunc i64 [[TMP72]] to i32 718 ; CHECK-NEXT: [[TMP74:%.*]] = mul i32 [[TMP73]], [[TMP45]] 719 ; CHECK-NEXT: [[TMP75:%.*]] = sub i32 [[TMP44]], [[TMP74]] 720 ; CHECK-NEXT: [[TMP76:%.*]] = icmp uge i32 [[TMP75]], [[TMP45]] 721 ; CHECK-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], i32 -1, i32 0 722 ; CHECK-NEXT: [[TMP78:%.*]] = icmp uge i32 [[TMP44]], [[TMP74]] 723 ; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 -1, i32 0 724 ; CHECK-NEXT: [[TMP80:%.*]] = and i32 [[TMP77]], [[TMP79]] 725 ; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i32 [[TMP80]], 0 726 ; CHECK-NEXT: [[TMP82:%.*]] = sub i32 [[TMP75]], [[TMP45]] 727 ; CHECK-NEXT: [[TMP83:%.*]] = add i32 [[TMP75]], [[TMP45]] 728 ; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP81]], i32 [[TMP75]], i32 [[TMP82]] 729 ; CHECK-NEXT: [[TMP85:%.*]] = select i1 [[TMP78]], i32 [[TMP84]], i32 [[TMP83]] 730 ; CHECK-NEXT: [[TMP86:%.*]] = insertelement <4 x i32> [[TMP43]], i32 [[TMP85]], i64 1 731 ; CHECK-NEXT: [[TMP87:%.*]] = extractelement <4 x i32> [[X]], i64 2 732 ; CHECK-NEXT: [[TMP88:%.*]] = extractelement <4 x i32> [[Y]], i64 2 733 ; CHECK-NEXT: [[TMP89:%.*]] = uitofp i32 [[TMP88]] to float 734 ; CHECK-NEXT: [[TMP90:%.*]] = fdiv fast float 1.000000e+00, [[TMP89]] 735 ; CHECK-NEXT: [[TMP91:%.*]] = fmul fast float [[TMP90]], 0x41F0000000000000 736 ; CHECK-NEXT: [[TMP92:%.*]] = fptoui float [[TMP91]] to i32 737 ; CHECK-NEXT: [[TMP93:%.*]] = zext i32 [[TMP92]] to i64 738 ; CHECK-NEXT: [[TMP94:%.*]] = zext i32 [[TMP88]] to i64 739 ; CHECK-NEXT: [[TMP95:%.*]] = mul i64 [[TMP93]], [[TMP94]] 740 ; CHECK-NEXT: [[TMP96:%.*]] = trunc i64 [[TMP95]] to i32 741 ; CHECK-NEXT: [[TMP97:%.*]] = lshr i64 [[TMP95]], 32 742 ; CHECK-NEXT: [[TMP98:%.*]] = trunc i64 [[TMP97]] to i32 743 ; CHECK-NEXT: [[TMP99:%.*]] = sub i32 0, [[TMP96]] 744 ; CHECK-NEXT: [[TMP100:%.*]] = icmp eq i32 [[TMP98]], 0 745 ; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[TMP99]], i32 [[TMP96]] 746 ; CHECK-NEXT: [[TMP102:%.*]] = zext i32 [[TMP101]] to i64 747 ; CHECK-NEXT: [[TMP103:%.*]] = zext i32 [[TMP92]] to i64 748 ; CHECK-NEXT: [[TMP104:%.*]] = mul i64 [[TMP102]], [[TMP103]] 749 ; CHECK-NEXT: [[TMP105:%.*]] = trunc i64 [[TMP104]] to i32 750 ; CHECK-NEXT: [[TMP106:%.*]] = lshr i64 [[TMP104]], 32 751 ; CHECK-NEXT: [[TMP107:%.*]] = trunc i64 [[TMP106]] to i32 752 ; CHECK-NEXT: [[TMP108:%.*]] = add i32 [[TMP92]], [[TMP107]] 753 ; CHECK-NEXT: [[TMP109:%.*]] = sub i32 [[TMP92]], [[TMP107]] 754 ; CHECK-NEXT: [[TMP110:%.*]] = select i1 [[TMP100]], i32 [[TMP108]], i32 [[TMP109]] 755 ; CHECK-NEXT: [[TMP111:%.*]] = zext i32 [[TMP110]] to i64 756 ; CHECK-NEXT: [[TMP112:%.*]] = zext i32 [[TMP87]] to i64 757 ; CHECK-NEXT: [[TMP113:%.*]] = mul i64 [[TMP111]], [[TMP112]] 758 ; CHECK-NEXT: [[TMP114:%.*]] = trunc i64 [[TMP113]] to i32 759 ; CHECK-NEXT: [[TMP115:%.*]] = lshr i64 [[TMP113]], 32 760 ; CHECK-NEXT: [[TMP116:%.*]] = trunc i64 [[TMP115]] to i32 761 ; CHECK-NEXT: [[TMP117:%.*]] = mul i32 [[TMP116]], [[TMP88]] 762 ; CHECK-NEXT: [[TMP118:%.*]] = sub i32 [[TMP87]], [[TMP117]] 763 ; CHECK-NEXT: [[TMP119:%.*]] = icmp uge i32 [[TMP118]], [[TMP88]] 764 ; CHECK-NEXT: [[TMP120:%.*]] = select i1 [[TMP119]], i32 -1, i32 0 765 ; CHECK-NEXT: [[TMP121:%.*]] = icmp uge i32 [[TMP87]], [[TMP117]] 766 ; CHECK-NEXT: [[TMP122:%.*]] = select i1 [[TMP121]], i32 -1, i32 0 767 ; CHECK-NEXT: [[TMP123:%.*]] = and i32 [[TMP120]], [[TMP122]] 768 ; CHECK-NEXT: [[TMP124:%.*]] = icmp eq i32 [[TMP123]], 0 769 ; CHECK-NEXT: [[TMP125:%.*]] = sub i32 [[TMP118]], [[TMP88]] 770 ; CHECK-NEXT: [[TMP126:%.*]] = add i32 [[TMP118]], [[TMP88]] 771 ; CHECK-NEXT: [[TMP127:%.*]] = select i1 [[TMP124]], i32 [[TMP118]], i32 [[TMP125]] 772 ; CHECK-NEXT: [[TMP128:%.*]] = select i1 [[TMP121]], i32 [[TMP127]], i32 [[TMP126]] 773 ; CHECK-NEXT: [[TMP129:%.*]] = insertelement <4 x i32> [[TMP86]], i32 [[TMP128]], i64 2 774 ; CHECK-NEXT: [[TMP130:%.*]] = extractelement <4 x i32> [[X]], i64 3 775 ; CHECK-NEXT: [[TMP131:%.*]] = extractelement <4 x i32> [[Y]], i64 3 776 ; CHECK-NEXT: [[TMP132:%.*]] = uitofp i32 [[TMP131]] to float 777 ; CHECK-NEXT: [[TMP133:%.*]] = fdiv fast float 1.000000e+00, [[TMP132]] 778 ; CHECK-NEXT: [[TMP134:%.*]] = fmul fast float [[TMP133]], 0x41F0000000000000 779 ; CHECK-NEXT: [[TMP135:%.*]] = fptoui float [[TMP134]] to i32 780 ; CHECK-NEXT: [[TMP136:%.*]] = zext i32 [[TMP135]] to i64 781 ; CHECK-NEXT: [[TMP137:%.*]] = zext i32 [[TMP131]] to i64 782 ; CHECK-NEXT: [[TMP138:%.*]] = mul i64 [[TMP136]], [[TMP137]] 783 ; CHECK-NEXT: [[TMP139:%.*]] = trunc i64 [[TMP138]] to i32 784 ; CHECK-NEXT: [[TMP140:%.*]] = lshr i64 [[TMP138]], 32 785 ; CHECK-NEXT: [[TMP141:%.*]] = trunc i64 [[TMP140]] to i32 786 ; CHECK-NEXT: [[TMP142:%.*]] = sub i32 0, [[TMP139]] 787 ; CHECK-NEXT: [[TMP143:%.*]] = icmp eq i32 [[TMP141]], 0 788 ; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP143]], i32 [[TMP142]], i32 [[TMP139]] 789 ; CHECK-NEXT: [[TMP145:%.*]] = zext i32 [[TMP144]] to i64 790 ; CHECK-NEXT: [[TMP146:%.*]] = zext i32 [[TMP135]] to i64 791 ; CHECK-NEXT: [[TMP147:%.*]] = mul i64 [[TMP145]], [[TMP146]] 792 ; CHECK-NEXT: [[TMP148:%.*]] = trunc i64 [[TMP147]] to i32 793 ; CHECK-NEXT: [[TMP149:%.*]] = lshr i64 [[TMP147]], 32 794 ; CHECK-NEXT: [[TMP150:%.*]] = trunc i64 [[TMP149]] to i32 795 ; CHECK-NEXT: [[TMP151:%.*]] = add i32 [[TMP135]], [[TMP150]] 796 ; CHECK-NEXT: [[TMP152:%.*]] = sub i32 [[TMP135]], [[TMP150]] 797 ; CHECK-NEXT: [[TMP153:%.*]] = select i1 [[TMP143]], i32 [[TMP151]], i32 [[TMP152]] 798 ; CHECK-NEXT: [[TMP154:%.*]] = zext i32 [[TMP153]] to i64 799 ; CHECK-NEXT: [[TMP155:%.*]] = zext i32 [[TMP130]] to i64 800 ; CHECK-NEXT: [[TMP156:%.*]] = mul i64 [[TMP154]], [[TMP155]] 801 ; CHECK-NEXT: [[TMP157:%.*]] = trunc i64 [[TMP156]] to i32 802 ; CHECK-NEXT: [[TMP158:%.*]] = lshr i64 [[TMP156]], 32 803 ; CHECK-NEXT: [[TMP159:%.*]] = trunc i64 [[TMP158]] to i32 804 ; CHECK-NEXT: [[TMP160:%.*]] = mul i32 [[TMP159]], [[TMP131]] 805 ; CHECK-NEXT: [[TMP161:%.*]] = sub i32 [[TMP130]], [[TMP160]] 806 ; CHECK-NEXT: [[TMP162:%.*]] = icmp uge i32 [[TMP161]], [[TMP131]] 807 ; CHECK-NEXT: [[TMP163:%.*]] = select i1 [[TMP162]], i32 -1, i32 0 808 ; CHECK-NEXT: [[TMP164:%.*]] = icmp uge i32 [[TMP130]], [[TMP160]] 809 ; CHECK-NEXT: [[TMP165:%.*]] = select i1 [[TMP164]], i32 -1, i32 0 810 ; CHECK-NEXT: [[TMP166:%.*]] = and i32 [[TMP163]], [[TMP165]] 811 ; CHECK-NEXT: [[TMP167:%.*]] = icmp eq i32 [[TMP166]], 0 812 ; CHECK-NEXT: [[TMP168:%.*]] = sub i32 [[TMP161]], [[TMP131]] 813 ; CHECK-NEXT: [[TMP169:%.*]] = add i32 [[TMP161]], [[TMP131]] 814 ; CHECK-NEXT: [[TMP170:%.*]] = select i1 [[TMP167]], i32 [[TMP161]], i32 [[TMP168]] 815 ; CHECK-NEXT: [[TMP171:%.*]] = select i1 [[TMP164]], i32 [[TMP170]], i32 [[TMP169]] 816 ; CHECK-NEXT: [[TMP172:%.*]] = insertelement <4 x i32> [[TMP129]], i32 [[TMP171]], i64 3 817 ; CHECK-NEXT: store <4 x i32> [[TMP172]], <4 x i32> addrspace(1)* [[OUT:%.*]] 818 ; CHECK-NEXT: ret void 819 ; 820 %r = urem <4 x i32> %x, %y 821 store <4 x i32> %r, <4 x i32> addrspace(1)* %out 822 ret void 823 } 824 825 define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { 826 ; CHECK-LABEL: @sdiv_v4i32( 827 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i64 0 828 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[Y:%.*]], i64 0 829 ; CHECK-NEXT: [[TMP3:%.*]] = ashr i32 [[TMP1]], 31 830 ; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP2]], 31 831 ; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] 832 ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP1]], [[TMP3]] 833 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP2]], [[TMP4]] 834 ; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP3]] 835 ; CHECK-NEXT: [[TMP9:%.*]] = xor i32 [[TMP7]], [[TMP4]] 836 ; CHECK-NEXT: [[TMP10:%.*]] = uitofp i32 [[TMP9]] to float 837 ; CHECK-NEXT: [[TMP11:%.*]] = fdiv fast float 1.000000e+00, [[TMP10]] 838 ; CHECK-NEXT: [[TMP12:%.*]] = fmul fast float [[TMP11]], 0x41F0000000000000 839 ; CHECK-NEXT: [[TMP13:%.*]] = fptoui float [[TMP12]] to i32 840 ; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 841 ; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP9]] to i64 842 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP14]], [[TMP15]] 843 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[TMP16]] to i32 844 ; CHECK-NEXT: [[TMP18:%.*]] = lshr i64 [[TMP16]], 32 845 ; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 846 ; CHECK-NEXT: [[TMP20:%.*]] = sub i32 0, [[TMP17]] 847 ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], 0 848 ; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP17]] 849 ; CHECK-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 850 ; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP13]] to i64 851 ; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP23]], [[TMP24]] 852 ; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[TMP25]] to i32 853 ; CHECK-NEXT: [[TMP27:%.*]] = lshr i64 [[TMP25]], 32 854 ; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 855 ; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[TMP13]], [[TMP28]] 856 ; CHECK-NEXT: [[TMP30:%.*]] = sub i32 [[TMP13]], [[TMP28]] 857 ; CHECK-NEXT: [[TMP31:%.*]] = select i1 [[TMP21]], i32 [[TMP29]], i32 [[TMP30]] 858 ; CHECK-NEXT: [[TMP32:%.*]] = zext i32 [[TMP31]] to i64 859 ; CHECK-NEXT: [[TMP33:%.*]] = zext i32 [[TMP8]] to i64 860 ; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP32]], [[TMP33]] 861 ; CHECK-NEXT: [[TMP35:%.*]] = trunc i64 [[TMP34]] to i32 862 ; CHECK-NEXT: [[TMP36:%.*]] = lshr i64 [[TMP34]], 32 863 ; CHECK-NEXT: [[TMP37:%.*]] = trunc i64 [[TMP36]] to i32 864 ; CHECK-NEXT: [[TMP38:%.*]] = mul i32 [[TMP37]], [[TMP9]] 865 ; CHECK-NEXT: [[TMP39:%.*]] = sub i32 [[TMP8]], [[TMP38]] 866 ; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i32 [[TMP39]], [[TMP9]] 867 ; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 -1, i32 0 868 ; CHECK-NEXT: [[TMP42:%.*]] = icmp uge i32 [[TMP8]], [[TMP38]] 869 ; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 -1, i32 0 870 ; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP41]], [[TMP43]] 871 ; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[TMP44]], 0 872 ; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP37]], 1 873 ; CHECK-NEXT: [[TMP47:%.*]] = sub i32 [[TMP37]], 1 874 ; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP45]], i32 [[TMP37]], i32 [[TMP46]] 875 ; CHECK-NEXT: [[TMP49:%.*]] = select i1 [[TMP42]], i32 [[TMP48]], i32 [[TMP47]] 876 ; CHECK-NEXT: [[TMP50:%.*]] = xor i32 [[TMP49]], [[TMP5]] 877 ; CHECK-NEXT: [[TMP51:%.*]] = sub i32 [[TMP50]], [[TMP5]] 878 ; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i32> undef, i32 [[TMP51]], i64 0 879 ; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i32> [[X]], i64 1 880 ; CHECK-NEXT: [[TMP54:%.*]] = extractelement <4 x i32> [[Y]], i64 1 881 ; CHECK-NEXT: [[TMP55:%.*]] = ashr i32 [[TMP53]], 31 882 ; CHECK-NEXT: [[TMP56:%.*]] = ashr i32 [[TMP54]], 31 883 ; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]] 884 ; CHECK-NEXT: [[TMP58:%.*]] = add i32 [[TMP53]], [[TMP55]] 885 ; CHECK-NEXT: [[TMP59:%.*]] = add i32 [[TMP54]], [[TMP56]] 886 ; CHECK-NEXT: [[TMP60:%.*]] = xor i32 [[TMP58]], [[TMP55]] 887 ; CHECK-NEXT: [[TMP61:%.*]] = xor i32 [[TMP59]], [[TMP56]] 888 ; CHECK-NEXT: [[TMP62:%.*]] = uitofp i32 [[TMP61]] to float 889 ; CHECK-NEXT: [[TMP63:%.*]] = fdiv fast float 1.000000e+00, [[TMP62]] 890 ; CHECK-NEXT: [[TMP64:%.*]] = fmul fast float [[TMP63]], 0x41F0000000000000 891 ; CHECK-NEXT: [[TMP65:%.*]] = fptoui float [[TMP64]] to i32 892 ; CHECK-NEXT: [[TMP66:%.*]] = zext i32 [[TMP65]] to i64 893 ; CHECK-NEXT: [[TMP67:%.*]] = zext i32 [[TMP61]] to i64 894 ; CHECK-NEXT: [[TMP68:%.*]] = mul i64 [[TMP66]], [[TMP67]] 895 ; CHECK-NEXT: [[TMP69:%.*]] = trunc i64 [[TMP68]] to i32 896 ; CHECK-NEXT: [[TMP70:%.*]] = lshr i64 [[TMP68]], 32 897 ; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 898 ; CHECK-NEXT: [[TMP72:%.*]] = sub i32 0, [[TMP69]] 899 ; CHECK-NEXT: [[TMP73:%.*]] = icmp eq i32 [[TMP71]], 0 900 ; CHECK-NEXT: [[TMP74:%.*]] = select i1 [[TMP73]], i32 [[TMP72]], i32 [[TMP69]] 901 ; CHECK-NEXT: [[TMP75:%.*]] = zext i32 [[TMP74]] to i64 902 ; CHECK-NEXT: [[TMP76:%.*]] = zext i32 [[TMP65]] to i64 903 ; CHECK-NEXT: [[TMP77:%.*]] = mul i64 [[TMP75]], [[TMP76]] 904 ; CHECK-NEXT: [[TMP78:%.*]] = trunc i64 [[TMP77]] to i32 905 ; CHECK-NEXT: [[TMP79:%.*]] = lshr i64 [[TMP77]], 32 906 ; CHECK-NEXT: [[TMP80:%.*]] = trunc i64 [[TMP79]] to i32 907 ; CHECK-NEXT: [[TMP81:%.*]] = add i32 [[TMP65]], [[TMP80]] 908 ; CHECK-NEXT: [[TMP82:%.*]] = sub i32 [[TMP65]], [[TMP80]] 909 ; CHECK-NEXT: [[TMP83:%.*]] = select i1 [[TMP73]], i32 [[TMP81]], i32 [[TMP82]] 910 ; CHECK-NEXT: [[TMP84:%.*]] = zext i32 [[TMP83]] to i64 911 ; CHECK-NEXT: [[TMP85:%.*]] = zext i32 [[TMP60]] to i64 912 ; CHECK-NEXT: [[TMP86:%.*]] = mul i64 [[TMP84]], [[TMP85]] 913 ; CHECK-NEXT: [[TMP87:%.*]] = trunc i64 [[TMP86]] to i32 914 ; CHECK-NEXT: [[TMP88:%.*]] = lshr i64 [[TMP86]], 32 915 ; CHECK-NEXT: [[TMP89:%.*]] = trunc i64 [[TMP88]] to i32 916 ; CHECK-NEXT: [[TMP90:%.*]] = mul i32 [[TMP89]], [[TMP61]] 917 ; CHECK-NEXT: [[TMP91:%.*]] = sub i32 [[TMP60]], [[TMP90]] 918 ; CHECK-NEXT: [[TMP92:%.*]] = icmp uge i32 [[TMP91]], [[TMP61]] 919 ; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 -1, i32 0 920 ; CHECK-NEXT: [[TMP94:%.*]] = icmp uge i32 [[TMP60]], [[TMP90]] 921 ; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 -1, i32 0 922 ; CHECK-NEXT: [[TMP96:%.*]] = and i32 [[TMP93]], [[TMP95]] 923 ; CHECK-NEXT: [[TMP97:%.*]] = icmp eq i32 [[TMP96]], 0 924 ; CHECK-NEXT: [[TMP98:%.*]] = add i32 [[TMP89]], 1 925 ; CHECK-NEXT: [[TMP99:%.*]] = sub i32 [[TMP89]], 1 926 ; CHECK-NEXT: [[TMP100:%.*]] = select i1 [[TMP97]], i32 [[TMP89]], i32 [[TMP98]] 927 ; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP94]], i32 [[TMP100]], i32 [[TMP99]] 928 ; CHECK-NEXT: [[TMP102:%.*]] = xor i32 [[TMP101]], [[TMP57]] 929 ; CHECK-NEXT: [[TMP103:%.*]] = sub i32 [[TMP102]], [[TMP57]] 930 ; CHECK-NEXT: [[TMP104:%.*]] = insertelement <4 x i32> [[TMP52]], i32 [[TMP103]], i64 1 931 ; CHECK-NEXT: [[TMP105:%.*]] = extractelement <4 x i32> [[X]], i64 2 932 ; CHECK-NEXT: [[TMP106:%.*]] = extractelement <4 x i32> [[Y]], i64 2 933 ; CHECK-NEXT: [[TMP107:%.*]] = ashr i32 [[TMP105]], 31 934 ; CHECK-NEXT: [[TMP108:%.*]] = ashr i32 [[TMP106]], 31 935 ; CHECK-NEXT: [[TMP109:%.*]] = xor i32 [[TMP107]], [[TMP108]] 936 ; CHECK-NEXT: [[TMP110:%.*]] = add i32 [[TMP105]], [[TMP107]] 937 ; CHECK-NEXT: [[TMP111:%.*]] = add i32 [[TMP106]], [[TMP108]] 938 ; CHECK-NEXT: [[TMP112:%.*]] = xor i32 [[TMP110]], [[TMP107]] 939 ; CHECK-NEXT: [[TMP113:%.*]] = xor i32 [[TMP111]], [[TMP108]] 940 ; CHECK-NEXT: [[TMP114:%.*]] = uitofp i32 [[TMP113]] to float 941 ; CHECK-NEXT: [[TMP115:%.*]] = fdiv fast float 1.000000e+00, [[TMP114]] 942 ; CHECK-NEXT: [[TMP116:%.*]] = fmul fast float [[TMP115]], 0x41F0000000000000 943 ; CHECK-NEXT: [[TMP117:%.*]] = fptoui float [[TMP116]] to i32 944 ; CHECK-NEXT: [[TMP118:%.*]] = zext i32 [[TMP117]] to i64 945 ; CHECK-NEXT: [[TMP119:%.*]] = zext i32 [[TMP113]] to i64 946 ; CHECK-NEXT: [[TMP120:%.*]] = mul i64 [[TMP118]], [[TMP119]] 947 ; CHECK-NEXT: [[TMP121:%.*]] = trunc i64 [[TMP120]] to i32 948 ; CHECK-NEXT: [[TMP122:%.*]] = lshr i64 [[TMP120]], 32 949 ; CHECK-NEXT: [[TMP123:%.*]] = trunc i64 [[TMP122]] to i32 950 ; CHECK-NEXT: [[TMP124:%.*]] = sub i32 0, [[TMP121]] 951 ; CHECK-NEXT: [[TMP125:%.*]] = icmp eq i32 [[TMP123]], 0 952 ; CHECK-NEXT: [[TMP126:%.*]] = select i1 [[TMP125]], i32 [[TMP124]], i32 [[TMP121]] 953 ; CHECK-NEXT: [[TMP127:%.*]] = zext i32 [[TMP126]] to i64 954 ; CHECK-NEXT: [[TMP128:%.*]] = zext i32 [[TMP117]] to i64 955 ; CHECK-NEXT: [[TMP129:%.*]] = mul i64 [[TMP127]], [[TMP128]] 956 ; CHECK-NEXT: [[TMP130:%.*]] = trunc i64 [[TMP129]] to i32 957 ; CHECK-NEXT: [[TMP131:%.*]] = lshr i64 [[TMP129]], 32 958 ; CHECK-NEXT: [[TMP132:%.*]] = trunc i64 [[TMP131]] to i32 959 ; CHECK-NEXT: [[TMP133:%.*]] = add i32 [[TMP117]], [[TMP132]] 960 ; CHECK-NEXT: [[TMP134:%.*]] = sub i32 [[TMP117]], [[TMP132]] 961 ; CHECK-NEXT: [[TMP135:%.*]] = select i1 [[TMP125]], i32 [[TMP133]], i32 [[TMP134]] 962 ; CHECK-NEXT: [[TMP136:%.*]] = zext i32 [[TMP135]] to i64 963 ; CHECK-NEXT: [[TMP137:%.*]] = zext i32 [[TMP112]] to i64 964 ; CHECK-NEXT: [[TMP138:%.*]] = mul i64 [[TMP136]], [[TMP137]] 965 ; CHECK-NEXT: [[TMP139:%.*]] = trunc i64 [[TMP138]] to i32 966 ; CHECK-NEXT: [[TMP140:%.*]] = lshr i64 [[TMP138]], 32 967 ; CHECK-NEXT: [[TMP141:%.*]] = trunc i64 [[TMP140]] to i32 968 ; CHECK-NEXT: [[TMP142:%.*]] = mul i32 [[TMP141]], [[TMP113]] 969 ; CHECK-NEXT: [[TMP143:%.*]] = sub i32 [[TMP112]], [[TMP142]] 970 ; CHECK-NEXT: [[TMP144:%.*]] = icmp uge i32 [[TMP143]], [[TMP113]] 971 ; CHECK-NEXT: [[TMP145:%.*]] = select i1 [[TMP144]], i32 -1, i32 0 972 ; CHECK-NEXT: [[TMP146:%.*]] = icmp uge i32 [[TMP112]], [[TMP142]] 973 ; CHECK-NEXT: [[TMP147:%.*]] = select i1 [[TMP146]], i32 -1, i32 0 974 ; CHECK-NEXT: [[TMP148:%.*]] = and i32 [[TMP145]], [[TMP147]] 975 ; CHECK-NEXT: [[TMP149:%.*]] = icmp eq i32 [[TMP148]], 0 976 ; CHECK-NEXT: [[TMP150:%.*]] = add i32 [[TMP141]], 1 977 ; CHECK-NEXT: [[TMP151:%.*]] = sub i32 [[TMP141]], 1 978 ; CHECK-NEXT: [[TMP152:%.*]] = select i1 [[TMP149]], i32 [[TMP141]], i32 [[TMP150]] 979 ; CHECK-NEXT: [[TMP153:%.*]] = select i1 [[TMP146]], i32 [[TMP152]], i32 [[TMP151]] 980 ; CHECK-NEXT: [[TMP154:%.*]] = xor i32 [[TMP153]], [[TMP109]] 981 ; CHECK-NEXT: [[TMP155:%.*]] = sub i32 [[TMP154]], [[TMP109]] 982 ; CHECK-NEXT: [[TMP156:%.*]] = insertelement <4 x i32> [[TMP104]], i32 [[TMP155]], i64 2 983 ; CHECK-NEXT: [[TMP157:%.*]] = extractelement <4 x i32> [[X]], i64 3 984 ; CHECK-NEXT: [[TMP158:%.*]] = extractelement <4 x i32> [[Y]], i64 3 985 ; CHECK-NEXT: [[TMP159:%.*]] = ashr i32 [[TMP157]], 31 986 ; CHECK-NEXT: [[TMP160:%.*]] = ashr i32 [[TMP158]], 31 987 ; CHECK-NEXT: [[TMP161:%.*]] = xor i32 [[TMP159]], [[TMP160]] 988 ; CHECK-NEXT: [[TMP162:%.*]] = add i32 [[TMP157]], [[TMP159]] 989 ; CHECK-NEXT: [[TMP163:%.*]] = add i32 [[TMP158]], [[TMP160]] 990 ; CHECK-NEXT: [[TMP164:%.*]] = xor i32 [[TMP162]], [[TMP159]] 991 ; CHECK-NEXT: [[TMP165:%.*]] = xor i32 [[TMP163]], [[TMP160]] 992 ; CHECK-NEXT: [[TMP166:%.*]] = uitofp i32 [[TMP165]] to float 993 ; CHECK-NEXT: [[TMP167:%.*]] = fdiv fast float 1.000000e+00, [[TMP166]] 994 ; CHECK-NEXT: [[TMP168:%.*]] = fmul fast float [[TMP167]], 0x41F0000000000000 995 ; CHECK-NEXT: [[TMP169:%.*]] = fptoui float [[TMP168]] to i32 996 ; CHECK-NEXT: [[TMP170:%.*]] = zext i32 [[TMP169]] to i64 997 ; CHECK-NEXT: [[TMP171:%.*]] = zext i32 [[TMP165]] to i64 998 ; CHECK-NEXT: [[TMP172:%.*]] = mul i64 [[TMP170]], [[TMP171]] 999 ; CHECK-NEXT: [[TMP173:%.*]] = trunc i64 [[TMP172]] to i32 1000 ; CHECK-NEXT: [[TMP174:%.*]] = lshr i64 [[TMP172]], 32 1001 ; CHECK-NEXT: [[TMP175:%.*]] = trunc i64 [[TMP174]] to i32 1002 ; CHECK-NEXT: [[TMP176:%.*]] = sub i32 0, [[TMP173]] 1003 ; CHECK-NEXT: [[TMP177:%.*]] = icmp eq i32 [[TMP175]], 0 1004 ; CHECK-NEXT: [[TMP178:%.*]] = select i1 [[TMP177]], i32 [[TMP176]], i32 [[TMP173]] 1005 ; CHECK-NEXT: [[TMP179:%.*]] = zext i32 [[TMP178]] to i64 1006 ; CHECK-NEXT: [[TMP180:%.*]] = zext i32 [[TMP169]] to i64 1007 ; CHECK-NEXT: [[TMP181:%.*]] = mul i64 [[TMP179]], [[TMP180]] 1008 ; CHECK-NEXT: [[TMP182:%.*]] = trunc i64 [[TMP181]] to i32 1009 ; CHECK-NEXT: [[TMP183:%.*]] = lshr i64 [[TMP181]], 32 1010 ; CHECK-NEXT: [[TMP184:%.*]] = trunc i64 [[TMP183]] to i32 1011 ; CHECK-NEXT: [[TMP185:%.*]] = add i32 [[TMP169]], [[TMP184]] 1012 ; CHECK-NEXT: [[TMP186:%.*]] = sub i32 [[TMP169]], [[TMP184]] 1013 ; CHECK-NEXT: [[TMP187:%.*]] = select i1 [[TMP177]], i32 [[TMP185]], i32 [[TMP186]] 1014 ; CHECK-NEXT: [[TMP188:%.*]] = zext i32 [[TMP187]] to i64 1015 ; CHECK-NEXT: [[TMP189:%.*]] = zext i32 [[TMP164]] to i64 1016 ; CHECK-NEXT: [[TMP190:%.*]] = mul i64 [[TMP188]], [[TMP189]] 1017 ; CHECK-NEXT: [[TMP191:%.*]] = trunc i64 [[TMP190]] to i32 1018 ; CHECK-NEXT: [[TMP192:%.*]] = lshr i64 [[TMP190]], 32 1019 ; CHECK-NEXT: [[TMP193:%.*]] = trunc i64 [[TMP192]] to i32 1020 ; CHECK-NEXT: [[TMP194:%.*]] = mul i32 [[TMP193]], [[TMP165]] 1021 ; CHECK-NEXT: [[TMP195:%.*]] = sub i32 [[TMP164]], [[TMP194]] 1022 ; CHECK-NEXT: [[TMP196:%.*]] = icmp uge i32 [[TMP195]], [[TMP165]] 1023 ; CHECK-NEXT: [[TMP197:%.*]] = select i1 [[TMP196]], i32 -1, i32 0 1024 ; CHECK-NEXT: [[TMP198:%.*]] = icmp uge i32 [[TMP164]], [[TMP194]] 1025 ; CHECK-NEXT: [[TMP199:%.*]] = select i1 [[TMP198]], i32 -1, i32 0 1026 ; CHECK-NEXT: [[TMP200:%.*]] = and i32 [[TMP197]], [[TMP199]] 1027 ; CHECK-NEXT: [[TMP201:%.*]] = icmp eq i32 [[TMP200]], 0 1028 ; CHECK-NEXT: [[TMP202:%.*]] = add i32 [[TMP193]], 1 1029 ; CHECK-NEXT: [[TMP203:%.*]] = sub i32 [[TMP193]], 1 1030 ; CHECK-NEXT: [[TMP204:%.*]] = select i1 [[TMP201]], i32 [[TMP193]], i32 [[TMP202]] 1031 ; CHECK-NEXT: [[TMP205:%.*]] = select i1 [[TMP198]], i32 [[TMP204]], i32 [[TMP203]] 1032 ; CHECK-NEXT: [[TMP206:%.*]] = xor i32 [[TMP205]], [[TMP161]] 1033 ; CHECK-NEXT: [[TMP207:%.*]] = sub i32 [[TMP206]], [[TMP161]] 1034 ; CHECK-NEXT: [[TMP208:%.*]] = insertelement <4 x i32> [[TMP156]], i32 [[TMP207]], i64 3 1035 ; CHECK-NEXT: store <4 x i32> [[TMP208]], <4 x i32> addrspace(1)* [[OUT:%.*]] 1036 ; CHECK-NEXT: ret void 1037 ; 1038 %r = sdiv <4 x i32> %x, %y 1039 store <4 x i32> %r, <4 x i32> addrspace(1)* %out 1040 ret void 1041 } 1042 1043 define amdgpu_kernel void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { 1044 ; CHECK-LABEL: @srem_v4i32( 1045 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i64 0 1046 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[Y:%.*]], i64 0 1047 ; CHECK-NEXT: [[TMP3:%.*]] = ashr i32 [[TMP1]], 31 1048 ; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP2]], 31 1049 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP1]], [[TMP3]] 1050 ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP2]], [[TMP4]] 1051 ; CHECK-NEXT: [[TMP7:%.*]] = xor i32 [[TMP5]], [[TMP3]] 1052 ; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP4]] 1053 ; CHECK-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float 1054 ; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] 1055 ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP10]], 0x41F0000000000000 1056 ; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP11]] to i32 1057 ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 1058 ; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP8]] to i64 1059 ; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP13]], [[TMP14]] 1060 ; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32 1061 ; CHECK-NEXT: [[TMP17:%.*]] = lshr i64 [[TMP15]], 32 1062 ; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32 1063 ; CHECK-NEXT: [[TMP19:%.*]] = sub i32 0, [[TMP16]] 1064 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP18]], 0 1065 ; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP16]] 1066 ; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 1067 ; CHECK-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64 1068 ; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP22]], [[TMP23]] 1069 ; CHECK-NEXT: [[TMP25:%.*]] = trunc i64 [[TMP24]] to i32 1070 ; CHECK-NEXT: [[TMP26:%.*]] = lshr i64 [[TMP24]], 32 1071 ; CHECK-NEXT: [[TMP27:%.*]] = trunc i64 [[TMP26]] to i32 1072 ; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP12]], [[TMP27]] 1073 ; CHECK-NEXT: [[TMP29:%.*]] = sub i32 [[TMP12]], [[TMP27]] 1074 ; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP20]], i32 [[TMP28]], i32 [[TMP29]] 1075 ; CHECK-NEXT: [[TMP31:%.*]] = zext i32 [[TMP30]] to i64 1076 ; CHECK-NEXT: [[TMP32:%.*]] = zext i32 [[TMP7]] to i64 1077 ; CHECK-NEXT: [[TMP33:%.*]] = mul i64 [[TMP31]], [[TMP32]] 1078 ; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[TMP33]] to i32 1079 ; CHECK-NEXT: [[TMP35:%.*]] = lshr i64 [[TMP33]], 32 1080 ; CHECK-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32 1081 ; CHECK-NEXT: [[TMP37:%.*]] = mul i32 [[TMP36]], [[TMP8]] 1082 ; CHECK-NEXT: [[TMP38:%.*]] = sub i32 [[TMP7]], [[TMP37]] 1083 ; CHECK-NEXT: [[TMP39:%.*]] = icmp uge i32 [[TMP38]], [[TMP8]] 1084 ; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 -1, i32 0 1085 ; CHECK-NEXT: [[TMP41:%.*]] = icmp uge i32 [[TMP7]], [[TMP37]] 1086 ; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 -1, i32 0 1087 ; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP40]], [[TMP42]] 1088 ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], 0 1089 ; CHECK-NEXT: [[TMP45:%.*]] = sub i32 [[TMP38]], [[TMP8]] 1090 ; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP38]], [[TMP8]] 1091 ; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP44]], i32 [[TMP38]], i32 [[TMP45]] 1092 ; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP41]], i32 [[TMP47]], i32 [[TMP46]] 1093 ; CHECK-NEXT: [[TMP49:%.*]] = xor i32 [[TMP48]], [[TMP3]] 1094 ; CHECK-NEXT: [[TMP50:%.*]] = sub i32 [[TMP49]], [[TMP3]] 1095 ; CHECK-NEXT: [[TMP51:%.*]] = insertelement <4 x i32> undef, i32 [[TMP50]], i64 0 1096 ; CHECK-NEXT: [[TMP52:%.*]] = extractelement <4 x i32> [[X]], i64 1 1097 ; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i32> [[Y]], i64 1 1098 ; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP52]], 31 1099 ; CHECK-NEXT: [[TMP55:%.*]] = ashr i32 [[TMP53]], 31 1100 ; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[TMP52]], [[TMP54]] 1101 ; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[TMP53]], [[TMP55]] 1102 ; CHECK-NEXT: [[TMP58:%.*]] = xor i32 [[TMP56]], [[TMP54]] 1103 ; CHECK-NEXT: [[TMP59:%.*]] = xor i32 [[TMP57]], [[TMP55]] 1104 ; CHECK-NEXT: [[TMP60:%.*]] = uitofp i32 [[TMP59]] to float 1105 ; CHECK-NEXT: [[TMP61:%.*]] = fdiv fast float 1.000000e+00, [[TMP60]] 1106 ; CHECK-NEXT: [[TMP62:%.*]] = fmul fast float [[TMP61]], 0x41F0000000000000 1107 ; CHECK-NEXT: [[TMP63:%.*]] = fptoui float [[TMP62]] to i32 1108 ; CHECK-NEXT: [[TMP64:%.*]] = zext i32 [[TMP63]] to i64 1109 ; CHECK-NEXT: [[TMP65:%.*]] = zext i32 [[TMP59]] to i64 1110 ; CHECK-NEXT: [[TMP66:%.*]] = mul i64 [[TMP64]], [[TMP65]] 1111 ; CHECK-NEXT: [[TMP67:%.*]] = trunc i64 [[TMP66]] to i32 1112 ; CHECK-NEXT: [[TMP68:%.*]] = lshr i64 [[TMP66]], 32 1113 ; CHECK-NEXT: [[TMP69:%.*]] = trunc i64 [[TMP68]] to i32 1114 ; CHECK-NEXT: [[TMP70:%.*]] = sub i32 0, [[TMP67]] 1115 ; CHECK-NEXT: [[TMP71:%.*]] = icmp eq i32 [[TMP69]], 0 1116 ; CHECK-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP70]], i32 [[TMP67]] 1117 ; CHECK-NEXT: [[TMP73:%.*]] = zext i32 [[TMP72]] to i64 1118 ; CHECK-NEXT: [[TMP74:%.*]] = zext i32 [[TMP63]] to i64 1119 ; CHECK-NEXT: [[TMP75:%.*]] = mul i64 [[TMP73]], [[TMP74]] 1120 ; CHECK-NEXT: [[TMP76:%.*]] = trunc i64 [[TMP75]] to i32 1121 ; CHECK-NEXT: [[TMP77:%.*]] = lshr i64 [[TMP75]], 32 1122 ; CHECK-NEXT: [[TMP78:%.*]] = trunc i64 [[TMP77]] to i32 1123 ; CHECK-NEXT: [[TMP79:%.*]] = add i32 [[TMP63]], [[TMP78]] 1124 ; CHECK-NEXT: [[TMP80:%.*]] = sub i32 [[TMP63]], [[TMP78]] 1125 ; CHECK-NEXT: [[TMP81:%.*]] = select i1 [[TMP71]], i32 [[TMP79]], i32 [[TMP80]] 1126 ; CHECK-NEXT: [[TMP82:%.*]] = zext i32 [[TMP81]] to i64 1127 ; CHECK-NEXT: [[TMP83:%.*]] = zext i32 [[TMP58]] to i64 1128 ; CHECK-NEXT: [[TMP84:%.*]] = mul i64 [[TMP82]], [[TMP83]] 1129 ; CHECK-NEXT: [[TMP85:%.*]] = trunc i64 [[TMP84]] to i32 1130 ; CHECK-NEXT: [[TMP86:%.*]] = lshr i64 [[TMP84]], 32 1131 ; CHECK-NEXT: [[TMP87:%.*]] = trunc i64 [[TMP86]] to i32 1132 ; CHECK-NEXT: [[TMP88:%.*]] = mul i32 [[TMP87]], [[TMP59]] 1133 ; CHECK-NEXT: [[TMP89:%.*]] = sub i32 [[TMP58]], [[TMP88]] 1134 ; CHECK-NEXT: [[TMP90:%.*]] = icmp uge i32 [[TMP89]], [[TMP59]] 1135 ; CHECK-NEXT: [[TMP91:%.*]] = select i1 [[TMP90]], i32 -1, i32 0 1136 ; CHECK-NEXT: [[TMP92:%.*]] = icmp uge i32 [[TMP58]], [[TMP88]] 1137 ; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 -1, i32 0 1138 ; CHECK-NEXT: [[TMP94:%.*]] = and i32 [[TMP91]], [[TMP93]] 1139 ; CHECK-NEXT: [[TMP95:%.*]] = icmp eq i32 [[TMP94]], 0 1140 ; CHECK-NEXT: [[TMP96:%.*]] = sub i32 [[TMP89]], [[TMP59]] 1141 ; CHECK-NEXT: [[TMP97:%.*]] = add i32 [[TMP89]], [[TMP59]] 1142 ; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP95]], i32 [[TMP89]], i32 [[TMP96]] 1143 ; CHECK-NEXT: [[TMP99:%.*]] = select i1 [[TMP92]], i32 [[TMP98]], i32 [[TMP97]] 1144 ; CHECK-NEXT: [[TMP100:%.*]] = xor i32 [[TMP99]], [[TMP54]] 1145 ; CHECK-NEXT: [[TMP101:%.*]] = sub i32 [[TMP100]], [[TMP54]] 1146 ; CHECK-NEXT: [[TMP102:%.*]] = insertelement <4 x i32> [[TMP51]], i32 [[TMP101]], i64 1 1147 ; CHECK-NEXT: [[TMP103:%.*]] = extractelement <4 x i32> [[X]], i64 2 1148 ; CHECK-NEXT: [[TMP104:%.*]] = extractelement <4 x i32> [[Y]], i64 2 1149 ; CHECK-NEXT: [[TMP105:%.*]] = ashr i32 [[TMP103]], 31 1150 ; CHECK-NEXT: [[TMP106:%.*]] = ashr i32 [[TMP104]], 31 1151 ; CHECK-NEXT: [[TMP107:%.*]] = add i32 [[TMP103]], [[TMP105]] 1152 ; CHECK-NEXT: [[TMP108:%.*]] = add i32 [[TMP104]], [[TMP106]] 1153 ; CHECK-NEXT: [[TMP109:%.*]] = xor i32 [[TMP107]], [[TMP105]] 1154 ; CHECK-NEXT: [[TMP110:%.*]] = xor i32 [[TMP108]], [[TMP106]] 1155 ; CHECK-NEXT: [[TMP111:%.*]] = uitofp i32 [[TMP110]] to float 1156 ; CHECK-NEXT: [[TMP112:%.*]] = fdiv fast float 1.000000e+00, [[TMP111]] 1157 ; CHECK-NEXT: [[TMP113:%.*]] = fmul fast float [[TMP112]], 0x41F0000000000000 1158 ; CHECK-NEXT: [[TMP114:%.*]] = fptoui float [[TMP113]] to i32 1159 ; CHECK-NEXT: [[TMP115:%.*]] = zext i32 [[TMP114]] to i64 1160 ; CHECK-NEXT: [[TMP116:%.*]] = zext i32 [[TMP110]] to i64 1161 ; CHECK-NEXT: [[TMP117:%.*]] = mul i64 [[TMP115]], [[TMP116]] 1162 ; CHECK-NEXT: [[TMP118:%.*]] = trunc i64 [[TMP117]] to i32 1163 ; CHECK-NEXT: [[TMP119:%.*]] = lshr i64 [[TMP117]], 32 1164 ; CHECK-NEXT: [[TMP120:%.*]] = trunc i64 [[TMP119]] to i32 1165 ; CHECK-NEXT: [[TMP121:%.*]] = sub i32 0, [[TMP118]] 1166 ; CHECK-NEXT: [[TMP122:%.*]] = icmp eq i32 [[TMP120]], 0 1167 ; CHECK-NEXT: [[TMP123:%.*]] = select i1 [[TMP122]], i32 [[TMP121]], i32 [[TMP118]] 1168 ; CHECK-NEXT: [[TMP124:%.*]] = zext i32 [[TMP123]] to i64 1169 ; CHECK-NEXT: [[TMP125:%.*]] = zext i32 [[TMP114]] to i64 1170 ; CHECK-NEXT: [[TMP126:%.*]] = mul i64 [[TMP124]], [[TMP125]] 1171 ; CHECK-NEXT: [[TMP127:%.*]] = trunc i64 [[TMP126]] to i32 1172 ; CHECK-NEXT: [[TMP128:%.*]] = lshr i64 [[TMP126]], 32 1173 ; CHECK-NEXT: [[TMP129:%.*]] = trunc i64 [[TMP128]] to i32 1174 ; CHECK-NEXT: [[TMP130:%.*]] = add i32 [[TMP114]], [[TMP129]] 1175 ; CHECK-NEXT: [[TMP131:%.*]] = sub i32 [[TMP114]], [[TMP129]] 1176 ; CHECK-NEXT: [[TMP132:%.*]] = select i1 [[TMP122]], i32 [[TMP130]], i32 [[TMP131]] 1177 ; CHECK-NEXT: [[TMP133:%.*]] = zext i32 [[TMP132]] to i64 1178 ; CHECK-NEXT: [[TMP134:%.*]] = zext i32 [[TMP109]] to i64 1179 ; CHECK-NEXT: [[TMP135:%.*]] = mul i64 [[TMP133]], [[TMP134]] 1180 ; CHECK-NEXT: [[TMP136:%.*]] = trunc i64 [[TMP135]] to i32 1181 ; CHECK-NEXT: [[TMP137:%.*]] = lshr i64 [[TMP135]], 32 1182 ; CHECK-NEXT: [[TMP138:%.*]] = trunc i64 [[TMP137]] to i32 1183 ; CHECK-NEXT: [[TMP139:%.*]] = mul i32 [[TMP138]], [[TMP110]] 1184 ; CHECK-NEXT: [[TMP140:%.*]] = sub i32 [[TMP109]], [[TMP139]] 1185 ; CHECK-NEXT: [[TMP141:%.*]] = icmp uge i32 [[TMP140]], [[TMP110]] 1186 ; CHECK-NEXT: [[TMP142:%.*]] = select i1 [[TMP141]], i32 -1, i32 0 1187 ; CHECK-NEXT: [[TMP143:%.*]] = icmp uge i32 [[TMP109]], [[TMP139]] 1188 ; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP143]], i32 -1, i32 0 1189 ; CHECK-NEXT: [[TMP145:%.*]] = and i32 [[TMP142]], [[TMP144]] 1190 ; CHECK-NEXT: [[TMP146:%.*]] = icmp eq i32 [[TMP145]], 0 1191 ; CHECK-NEXT: [[TMP147:%.*]] = sub i32 [[TMP140]], [[TMP110]] 1192 ; CHECK-NEXT: [[TMP148:%.*]] = add i32 [[TMP140]], [[TMP110]] 1193 ; CHECK-NEXT: [[TMP149:%.*]] = select i1 [[TMP146]], i32 [[TMP140]], i32 [[TMP147]] 1194 ; CHECK-NEXT: [[TMP150:%.*]] = select i1 [[TMP143]], i32 [[TMP149]], i32 [[TMP148]] 1195 ; CHECK-NEXT: [[TMP151:%.*]] = xor i32 [[TMP150]], [[TMP105]] 1196 ; CHECK-NEXT: [[TMP152:%.*]] = sub i32 [[TMP151]], [[TMP105]] 1197 ; CHECK-NEXT: [[TMP153:%.*]] = insertelement <4 x i32> [[TMP102]], i32 [[TMP152]], i64 2 1198 ; CHECK-NEXT: [[TMP154:%.*]] = extractelement <4 x i32> [[X]], i64 3 1199 ; CHECK-NEXT: [[TMP155:%.*]] = extractelement <4 x i32> [[Y]], i64 3 1200 ; CHECK-NEXT: [[TMP156:%.*]] = ashr i32 [[TMP154]], 31 1201 ; CHECK-NEXT: [[TMP157:%.*]] = ashr i32 [[TMP155]], 31 1202 ; CHECK-NEXT: [[TMP158:%.*]] = add i32 [[TMP154]], [[TMP156]] 1203 ; CHECK-NEXT: [[TMP159:%.*]] = add i32 [[TMP155]], [[TMP157]] 1204 ; CHECK-NEXT: [[TMP160:%.*]] = xor i32 [[TMP158]], [[TMP156]] 1205 ; CHECK-NEXT: [[TMP161:%.*]] = xor i32 [[TMP159]], [[TMP157]] 1206 ; CHECK-NEXT: [[TMP162:%.*]] = uitofp i32 [[TMP161]] to float 1207 ; CHECK-NEXT: [[TMP163:%.*]] = fdiv fast float 1.000000e+00, [[TMP162]] 1208 ; CHECK-NEXT: [[TMP164:%.*]] = fmul fast float [[TMP163]], 0x41F0000000000000 1209 ; CHECK-NEXT: [[TMP165:%.*]] = fptoui float [[TMP164]] to i32 1210 ; CHECK-NEXT: [[TMP166:%.*]] = zext i32 [[TMP165]] to i64 1211 ; CHECK-NEXT: [[TMP167:%.*]] = zext i32 [[TMP161]] to i64 1212 ; CHECK-NEXT: [[TMP168:%.*]] = mul i64 [[TMP166]], [[TMP167]] 1213 ; CHECK-NEXT: [[TMP169:%.*]] = trunc i64 [[TMP168]] to i32 1214 ; CHECK-NEXT: [[TMP170:%.*]] = lshr i64 [[TMP168]], 32 1215 ; CHECK-NEXT: [[TMP171:%.*]] = trunc i64 [[TMP170]] to i32 1216 ; CHECK-NEXT: [[TMP172:%.*]] = sub i32 0, [[TMP169]] 1217 ; CHECK-NEXT: [[TMP173:%.*]] = icmp eq i32 [[TMP171]], 0 1218 ; CHECK-NEXT: [[TMP174:%.*]] = select i1 [[TMP173]], i32 [[TMP172]], i32 [[TMP169]] 1219 ; CHECK-NEXT: [[TMP175:%.*]] = zext i32 [[TMP174]] to i64 1220 ; CHECK-NEXT: [[TMP176:%.*]] = zext i32 [[TMP165]] to i64 1221 ; CHECK-NEXT: [[TMP177:%.*]] = mul i64 [[TMP175]], [[TMP176]] 1222 ; CHECK-NEXT: [[TMP178:%.*]] = trunc i64 [[TMP177]] to i32 1223 ; CHECK-NEXT: [[TMP179:%.*]] = lshr i64 [[TMP177]], 32 1224 ; CHECK-NEXT: [[TMP180:%.*]] = trunc i64 [[TMP179]] to i32 1225 ; CHECK-NEXT: [[TMP181:%.*]] = add i32 [[TMP165]], [[TMP180]] 1226 ; CHECK-NEXT: [[TMP182:%.*]] = sub i32 [[TMP165]], [[TMP180]] 1227 ; CHECK-NEXT: [[TMP183:%.*]] = select i1 [[TMP173]], i32 [[TMP181]], i32 [[TMP182]] 1228 ; CHECK-NEXT: [[TMP184:%.*]] = zext i32 [[TMP183]] to i64 1229 ; CHECK-NEXT: [[TMP185:%.*]] = zext i32 [[TMP160]] to i64 1230 ; CHECK-NEXT: [[TMP186:%.*]] = mul i64 [[TMP184]], [[TMP185]] 1231 ; CHECK-NEXT: [[TMP187:%.*]] = trunc i64 [[TMP186]] to i32 1232 ; CHECK-NEXT: [[TMP188:%.*]] = lshr i64 [[TMP186]], 32 1233 ; CHECK-NEXT: [[TMP189:%.*]] = trunc i64 [[TMP188]] to i32 1234 ; CHECK-NEXT: [[TMP190:%.*]] = mul i32 [[TMP189]], [[TMP161]] 1235 ; CHECK-NEXT: [[TMP191:%.*]] = sub i32 [[TMP160]], [[TMP190]] 1236 ; CHECK-NEXT: [[TMP192:%.*]] = icmp uge i32 [[TMP191]], [[TMP161]] 1237 ; CHECK-NEXT: [[TMP193:%.*]] = select i1 [[TMP192]], i32 -1, i32 0 1238 ; CHECK-NEXT: [[TMP194:%.*]] = icmp uge i32 [[TMP160]], [[TMP190]] 1239 ; CHECK-NEXT: [[TMP195:%.*]] = select i1 [[TMP194]], i32 -1, i32 0 1240 ; CHECK-NEXT: [[TMP196:%.*]] = and i32 [[TMP193]], [[TMP195]] 1241 ; CHECK-NEXT: [[TMP197:%.*]] = icmp eq i32 [[TMP196]], 0 1242 ; CHECK-NEXT: [[TMP198:%.*]] = sub i32 [[TMP191]], [[TMP161]] 1243 ; CHECK-NEXT: [[TMP199:%.*]] = add i32 [[TMP191]], [[TMP161]] 1244 ; CHECK-NEXT: [[TMP200:%.*]] = select i1 [[TMP197]], i32 [[TMP191]], i32 [[TMP198]] 1245 ; CHECK-NEXT: [[TMP201:%.*]] = select i1 [[TMP194]], i32 [[TMP200]], i32 [[TMP199]] 1246 ; CHECK-NEXT: [[TMP202:%.*]] = xor i32 [[TMP201]], [[TMP156]] 1247 ; CHECK-NEXT: [[TMP203:%.*]] = sub i32 [[TMP202]], [[TMP156]] 1248 ; CHECK-NEXT: [[TMP204:%.*]] = insertelement <4 x i32> [[TMP153]], i32 [[TMP203]], i64 3 1249 ; CHECK-NEXT: store <4 x i32> [[TMP204]], <4 x i32> addrspace(1)* [[OUT:%.*]] 1250 ; CHECK-NEXT: ret void 1251 ; 1252 %r = srem <4 x i32> %x, %y 1253 store <4 x i32> %r, <4 x i32> addrspace(1)* %out 1254 ret void 1255 } 1256 1257 define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x, <4 x i16> %y) { 1258 ; CHECK-LABEL: @udiv_v4i16( 1259 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[X:%.*]], i64 0 1260 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[Y:%.*]], i64 0 1261 ; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32 1262 ; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32 1263 ; CHECK-NEXT: [[TMP5:%.*]] = uitofp i32 [[TMP3]] to float 1264 ; CHECK-NEXT: [[TMP6:%.*]] = uitofp i32 [[TMP4]] to float 1265 ; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] 1266 ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] 1267 ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) 1268 ; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] 1269 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) 1270 ; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 1271 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) 1272 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) 1273 ; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] 1274 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 1, i32 0 1275 ; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP12]], [[TMP16]] 1276 ; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 65535 1277 ; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 1278 ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i16> undef, i16 [[TMP19]], i64 0 1279 ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i16> [[X]], i64 1 1280 ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i16> [[Y]], i64 1 1281 ; CHECK-NEXT: [[TMP23:%.*]] = zext i16 [[TMP21]] to i32 1282 ; CHECK-NEXT: [[TMP24:%.*]] = zext i16 [[TMP22]] to i32 1283 ; CHECK-NEXT: [[TMP25:%.*]] = uitofp i32 [[TMP23]] to float 1284 ; CHECK-NEXT: [[TMP26:%.*]] = uitofp i32 [[TMP24]] to float 1285 ; CHECK-NEXT: [[TMP27:%.*]] = fdiv fast float 1.000000e+00, [[TMP26]] 1286 ; CHECK-NEXT: [[TMP28:%.*]] = fmul fast float [[TMP25]], [[TMP27]] 1287 ; CHECK-NEXT: [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]]) 1288 ; CHECK-NEXT: [[TMP30:%.*]] = fsub fast float -0.000000e+00, [[TMP29]] 1289 ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]]) 1290 ; CHECK-NEXT: [[TMP32:%.*]] = fptoui float [[TMP29]] to i32 1291 ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]]) 1292 ; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.fabs.f32(float [[TMP26]]) 1293 ; CHECK-NEXT: [[TMP35:%.*]] = fcmp fast oge float [[TMP33]], [[TMP34]] 1294 ; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 1, i32 0 1295 ; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP32]], [[TMP36]] 1296 ; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 65535 1297 ; CHECK-NEXT: [[TMP39:%.*]] = trunc i32 [[TMP38]] to i16 1298 ; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i16> [[TMP20]], i16 [[TMP39]], i64 1 1299 ; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i16> [[X]], i64 2 1300 ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i16> [[Y]], i64 2 1301 ; CHECK-NEXT: [[TMP43:%.*]] = zext i16 [[TMP41]] to i32 1302 ; CHECK-NEXT: [[TMP44:%.*]] = zext i16 [[TMP42]] to i32 1303 ; CHECK-NEXT: [[TMP45:%.*]] = uitofp i32 [[TMP43]] to float 1304 ; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP44]] to float 1305 ; CHECK-NEXT: [[TMP47:%.*]] = fdiv fast float 1.000000e+00, [[TMP46]] 1306 ; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP45]], [[TMP47]] 1307 ; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]]) 1308 ; CHECK-NEXT: [[TMP50:%.*]] = fsub fast float -0.000000e+00, [[TMP49]] 1309 ; CHECK-NEXT: [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]]) 1310 ; CHECK-NEXT: [[TMP52:%.*]] = fptoui float [[TMP49]] to i32 1311 ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]]) 1312 ; CHECK-NEXT: [[TMP54:%.*]] = call fast float @llvm.fabs.f32(float [[TMP46]]) 1313 ; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast oge float [[TMP53]], [[TMP54]] 1314 ; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 1, i32 0 1315 ; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[TMP52]], [[TMP56]] 1316 ; CHECK-NEXT: [[TMP58:%.*]] = and i32 [[TMP57]], 65535 1317 ; CHECK-NEXT: [[TMP59:%.*]] = trunc i32 [[TMP58]] to i16 1318 ; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i16> [[TMP40]], i16 [[TMP59]], i64 2 1319 ; CHECK-NEXT: [[TMP61:%.*]] = extractelement <4 x i16> [[X]], i64 3 1320 ; CHECK-NEXT: [[TMP62:%.*]] = extractelement <4 x i16> [[Y]], i64 3 1321 ; CHECK-NEXT: [[TMP63:%.*]] = zext i16 [[TMP61]] to i32 1322 ; CHECK-NEXT: [[TMP64:%.*]] = zext i16 [[TMP62]] to i32 1323 ; CHECK-NEXT: [[TMP65:%.*]] = uitofp i32 [[TMP63]] to float 1324 ; CHECK-NEXT: [[TMP66:%.*]] = uitofp i32 [[TMP64]] to float 1325 ; CHECK-NEXT: [[TMP67:%.*]] = fdiv fast float 1.000000e+00, [[TMP66]] 1326 ; CHECK-NEXT: [[TMP68:%.*]] = fmul fast float [[TMP65]], [[TMP67]] 1327 ; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.trunc.f32(float [[TMP68]]) 1328 ; CHECK-NEXT: [[TMP70:%.*]] = fsub fast float -0.000000e+00, [[TMP69]] 1329 ; CHECK-NEXT: [[TMP71:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP70]], float [[TMP66]], float [[TMP65]]) 1330 ; CHECK-NEXT: [[TMP72:%.*]] = fptoui float [[TMP69]] to i32 1331 ; CHECK-NEXT: [[TMP73:%.*]] = call fast float @llvm.fabs.f32(float [[TMP71]]) 1332 ; CHECK-NEXT: [[TMP74:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]]) 1333 ; CHECK-NEXT: [[TMP75:%.*]] = fcmp fast oge float [[TMP73]], [[TMP74]] 1334 ; CHECK-NEXT: [[TMP76:%.*]] = select i1 [[TMP75]], i32 1, i32 0 1335 ; CHECK-NEXT: [[TMP77:%.*]] = add i32 [[TMP72]], [[TMP76]] 1336 ; CHECK-NEXT: [[TMP78:%.*]] = and i32 [[TMP77]], 65535 1337 ; CHECK-NEXT: [[TMP79:%.*]] = trunc i32 [[TMP78]] to i16 1338 ; CHECK-NEXT: [[TMP80:%.*]] = insertelement <4 x i16> [[TMP60]], i16 [[TMP79]], i64 3 1339 ; CHECK-NEXT: store <4 x i16> [[TMP80]], <4 x i16> addrspace(1)* [[OUT:%.*]] 1340 ; CHECK-NEXT: ret void 1341 ; 1342 %r = udiv <4 x i16> %x, %y 1343 store <4 x i16> %r, <4 x i16> addrspace(1)* %out 1344 ret void 1345 } 1346 1347 define amdgpu_kernel void @urem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x, <4 x i16> %y) { 1348 ; CHECK-LABEL: @urem_v4i16( 1349 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[X:%.*]], i64 0 1350 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[Y:%.*]], i64 0 1351 ; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32 1352 ; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32 1353 ; CHECK-NEXT: [[TMP5:%.*]] = uitofp i32 [[TMP3]] to float 1354 ; CHECK-NEXT: [[TMP6:%.*]] = uitofp i32 [[TMP4]] to float 1355 ; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] 1356 ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] 1357 ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) 1358 ; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] 1359 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) 1360 ; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 1361 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) 1362 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) 1363 ; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] 1364 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 1, i32 0 1365 ; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP12]], [[TMP16]] 1366 ; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], [[TMP4]] 1367 ; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP3]], [[TMP18]] 1368 ; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 65535 1369 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 1370 ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i16> undef, i16 [[TMP21]], i64 0 1371 ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i16> [[X]], i64 1 1372 ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i16> [[Y]], i64 1 1373 ; CHECK-NEXT: [[TMP25:%.*]] = zext i16 [[TMP23]] to i32 1374 ; CHECK-NEXT: [[TMP26:%.*]] = zext i16 [[TMP24]] to i32 1375 ; CHECK-NEXT: [[TMP27:%.*]] = uitofp i32 [[TMP25]] to float 1376 ; CHECK-NEXT: [[TMP28:%.*]] = uitofp i32 [[TMP26]] to float 1377 ; CHECK-NEXT: [[TMP29:%.*]] = fdiv fast float 1.000000e+00, [[TMP28]] 1378 ; CHECK-NEXT: [[TMP30:%.*]] = fmul fast float [[TMP27]], [[TMP29]] 1379 ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]]) 1380 ; CHECK-NEXT: [[TMP32:%.*]] = fsub fast float -0.000000e+00, [[TMP31]] 1381 ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]]) 1382 ; CHECK-NEXT: [[TMP34:%.*]] = fptoui float [[TMP31]] to i32 1383 ; CHECK-NEXT: [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) 1384 ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.fabs.f32(float [[TMP28]]) 1385 ; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast oge float [[TMP35]], [[TMP36]] 1386 ; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 1, i32 0 1387 ; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[TMP34]], [[TMP38]] 1388 ; CHECK-NEXT: [[TMP40:%.*]] = mul i32 [[TMP39]], [[TMP26]] 1389 ; CHECK-NEXT: [[TMP41:%.*]] = sub i32 [[TMP25]], [[TMP40]] 1390 ; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], 65535 1391 ; CHECK-NEXT: [[TMP43:%.*]] = trunc i32 [[TMP42]] to i16 1392 ; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i16> [[TMP22]], i16 [[TMP43]], i64 1 1393 ; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i16> [[X]], i64 2 1394 ; CHECK-NEXT: [[TMP46:%.*]] = extractelement <4 x i16> [[Y]], i64 2 1395 ; CHECK-NEXT: [[TMP47:%.*]] = zext i16 [[TMP45]] to i32 1396 ; CHECK-NEXT: [[TMP48:%.*]] = zext i16 [[TMP46]] to i32 1397 ; CHECK-NEXT: [[TMP49:%.*]] = uitofp i32 [[TMP47]] to float 1398 ; CHECK-NEXT: [[TMP50:%.*]] = uitofp i32 [[TMP48]] to float 1399 ; CHECK-NEXT: [[TMP51:%.*]] = fdiv fast float 1.000000e+00, [[TMP50]] 1400 ; CHECK-NEXT: [[TMP52:%.*]] = fmul fast float [[TMP49]], [[TMP51]] 1401 ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]]) 1402 ; CHECK-NEXT: [[TMP54:%.*]] = fsub fast float -0.000000e+00, [[TMP53]] 1403 ; CHECK-NEXT: [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]]) 1404 ; CHECK-NEXT: [[TMP56:%.*]] = fptoui float [[TMP53]] to i32 1405 ; CHECK-NEXT: [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]]) 1406 ; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.fabs.f32(float [[TMP50]]) 1407 ; CHECK-NEXT: [[TMP59:%.*]] = fcmp fast oge float [[TMP57]], [[TMP58]] 1408 ; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 1, i32 0 1409 ; CHECK-NEXT: [[TMP61:%.*]] = add i32 [[TMP56]], [[TMP60]] 1410 ; CHECK-NEXT: [[TMP62:%.*]] = mul i32 [[TMP61]], [[TMP48]] 1411 ; CHECK-NEXT: [[TMP63:%.*]] = sub i32 [[TMP47]], [[TMP62]] 1412 ; CHECK-NEXT: [[TMP64:%.*]] = and i32 [[TMP63]], 65535 1413 ; CHECK-NEXT: [[TMP65:%.*]] = trunc i32 [[TMP64]] to i16 1414 ; CHECK-NEXT: [[TMP66:%.*]] = insertelement <4 x i16> [[TMP44]], i16 [[TMP65]], i64 2 1415 ; CHECK-NEXT: [[TMP67:%.*]] = extractelement <4 x i16> [[X]], i64 3 1416 ; CHECK-NEXT: [[TMP68:%.*]] = extractelement <4 x i16> [[Y]], i64 3 1417 ; CHECK-NEXT: [[TMP69:%.*]] = zext i16 [[TMP67]] to i32 1418 ; CHECK-NEXT: [[TMP70:%.*]] = zext i16 [[TMP68]] to i32 1419 ; CHECK-NEXT: [[TMP71:%.*]] = uitofp i32 [[TMP69]] to float 1420 ; CHECK-NEXT: [[TMP72:%.*]] = uitofp i32 [[TMP70]] to float 1421 ; CHECK-NEXT: [[TMP73:%.*]] = fdiv fast float 1.000000e+00, [[TMP72]] 1422 ; CHECK-NEXT: [[TMP74:%.*]] = fmul fast float [[TMP71]], [[TMP73]] 1423 ; CHECK-NEXT: [[TMP75:%.*]] = call fast float @llvm.trunc.f32(float [[TMP74]]) 1424 ; CHECK-NEXT: [[TMP76:%.*]] = fsub fast float -0.000000e+00, [[TMP75]] 1425 ; CHECK-NEXT: [[TMP77:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP76]], float [[TMP72]], float [[TMP71]]) 1426 ; CHECK-NEXT: [[TMP78:%.*]] = fptoui float [[TMP75]] to i32 1427 ; CHECK-NEXT: [[TMP79:%.*]] = call fast float @llvm.fabs.f32(float [[TMP77]]) 1428 ; CHECK-NEXT: [[TMP80:%.*]] = call fast float @llvm.fabs.f32(float [[TMP72]]) 1429 ; CHECK-NEXT: [[TMP81:%.*]] = fcmp fast oge float [[TMP79]], [[TMP80]] 1430 ; CHECK-NEXT: [[TMP82:%.*]] = select i1 [[TMP81]], i32 1, i32 0 1431 ; CHECK-NEXT: [[TMP83:%.*]] = add i32 [[TMP78]], [[TMP82]] 1432 ; CHECK-NEXT: [[TMP84:%.*]] = mul i32 [[TMP83]], [[TMP70]] 1433 ; CHECK-NEXT: [[TMP85:%.*]] = sub i32 [[TMP69]], [[TMP84]] 1434 ; CHECK-NEXT: [[TMP86:%.*]] = and i32 [[TMP85]], 65535 1435 ; CHECK-NEXT: [[TMP87:%.*]] = trunc i32 [[TMP86]] to i16 1436 ; CHECK-NEXT: [[TMP88:%.*]] = insertelement <4 x i16> [[TMP66]], i16 [[TMP87]], i64 3 1437 ; CHECK-NEXT: store <4 x i16> [[TMP88]], <4 x i16> addrspace(1)* [[OUT:%.*]] 1438 ; CHECK-NEXT: ret void 1439 ; 1440 %r = urem <4 x i16> %x, %y 1441 store <4 x i16> %r, <4 x i16> addrspace(1)* %out 1442 ret void 1443 } 1444 1445 define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x, <4 x i16> %y) { 1446 ; CHECK-LABEL: @sdiv_v4i16( 1447 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[X:%.*]], i64 0 1448 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[Y:%.*]], i64 0 1449 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP1]] to i32 1450 ; CHECK-NEXT: [[TMP4:%.*]] = sext i16 [[TMP2]] to i32 1451 ; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] 1452 ; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 1453 ; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 1454 ; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float 1455 ; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float 1456 ; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] 1457 ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] 1458 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) 1459 ; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] 1460 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) 1461 ; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 1462 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) 1463 ; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 1464 ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] 1465 ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0 1466 ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] 1467 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 1468 ; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 1469 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16 1470 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i16> undef, i16 [[TMP23]], i64 0 1471 ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i16> [[X]], i64 1 1472 ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i16> [[Y]], i64 1 1473 ; CHECK-NEXT: [[TMP27:%.*]] = sext i16 [[TMP25]] to i32 1474 ; CHECK-NEXT: [[TMP28:%.*]] = sext i16 [[TMP26]] to i32 1475 ; CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP27]], [[TMP28]] 1476 ; CHECK-NEXT: [[TMP30:%.*]] = ashr i32 [[TMP29]], 30 1477 ; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP30]], 1 1478 ; CHECK-NEXT: [[TMP32:%.*]] = sitofp i32 [[TMP27]] to float 1479 ; CHECK-NEXT: [[TMP33:%.*]] = sitofp i32 [[TMP28]] to float 1480 ; CHECK-NEXT: [[TMP34:%.*]] = fdiv fast float 1.000000e+00, [[TMP33]] 1481 ; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]] 1482 ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]]) 1483 ; CHECK-NEXT: [[TMP37:%.*]] = fsub fast float -0.000000e+00, [[TMP36]] 1484 ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]]) 1485 ; CHECK-NEXT: [[TMP39:%.*]] = fptosi float [[TMP36]] to i32 1486 ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]]) 1487 ; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) 1488 ; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]] 1489 ; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[TMP31]], i32 0 1490 ; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP39]], [[TMP43]] 1491 ; CHECK-NEXT: [[TMP45:%.*]] = trunc i32 [[TMP44]] to i16 1492 ; CHECK-NEXT: [[TMP46:%.*]] = sext i16 [[TMP45]] to i32 1493 ; CHECK-NEXT: [[TMP47:%.*]] = trunc i32 [[TMP46]] to i16 1494 ; CHECK-NEXT: [[TMP48:%.*]] = insertelement <4 x i16> [[TMP24]], i16 [[TMP47]], i64 1 1495 ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <4 x i16> [[X]], i64 2 1496 ; CHECK-NEXT: [[TMP50:%.*]] = extractelement <4 x i16> [[Y]], i64 2 1497 ; CHECK-NEXT: [[TMP51:%.*]] = sext i16 [[TMP49]] to i32 1498 ; CHECK-NEXT: [[TMP52:%.*]] = sext i16 [[TMP50]] to i32 1499 ; CHECK-NEXT: [[TMP53:%.*]] = xor i32 [[TMP51]], [[TMP52]] 1500 ; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP53]], 30 1501 ; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP54]], 1 1502 ; CHECK-NEXT: [[TMP56:%.*]] = sitofp i32 [[TMP51]] to float 1503 ; CHECK-NEXT: [[TMP57:%.*]] = sitofp i32 [[TMP52]] to float 1504 ; CHECK-NEXT: [[TMP58:%.*]] = fdiv fast float 1.000000e+00, [[TMP57]] 1505 ; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]] 1506 ; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]]) 1507 ; CHECK-NEXT: [[TMP61:%.*]] = fsub fast float -0.000000e+00, [[TMP60]] 1508 ; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]]) 1509 ; CHECK-NEXT: [[TMP63:%.*]] = fptosi float [[TMP60]] to i32 1510 ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]]) 1511 ; CHECK-NEXT: [[TMP65:%.*]] = call fast float @llvm.fabs.f32(float [[TMP57]]) 1512 ; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]] 1513 ; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP66]], i32 [[TMP55]], i32 0 1514 ; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[TMP63]], [[TMP67]] 1515 ; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP68]] to i16 1516 ; CHECK-NEXT: [[TMP70:%.*]] = sext i16 [[TMP69]] to i32 1517 ; CHECK-NEXT: [[TMP71:%.*]] = trunc i32 [[TMP70]] to i16 1518 ; CHECK-NEXT: [[TMP72:%.*]] = insertelement <4 x i16> [[TMP48]], i16 [[TMP71]], i64 2 1519 ; CHECK-NEXT: [[TMP73:%.*]] = extractelement <4 x i16> [[X]], i64 3 1520 ; CHECK-NEXT: [[TMP74:%.*]] = extractelement <4 x i16> [[Y]], i64 3 1521 ; CHECK-NEXT: [[TMP75:%.*]] = sext i16 [[TMP73]] to i32 1522 ; CHECK-NEXT: [[TMP76:%.*]] = sext i16 [[TMP74]] to i32 1523 ; CHECK-NEXT: [[TMP77:%.*]] = xor i32 [[TMP75]], [[TMP76]] 1524 ; CHECK-NEXT: [[TMP78:%.*]] = ashr i32 [[TMP77]], 30 1525 ; CHECK-NEXT: [[TMP79:%.*]] = or i32 [[TMP78]], 1 1526 ; CHECK-NEXT: [[TMP80:%.*]] = sitofp i32 [[TMP75]] to float 1527 ; CHECK-NEXT: [[TMP81:%.*]] = sitofp i32 [[TMP76]] to float 1528 ; CHECK-NEXT: [[TMP82:%.*]] = fdiv fast float 1.000000e+00, [[TMP81]] 1529 ; CHECK-NEXT: [[TMP83:%.*]] = fmul fast float [[TMP80]], [[TMP82]] 1530 ; CHECK-NEXT: [[TMP84:%.*]] = call fast float @llvm.trunc.f32(float [[TMP83]]) 1531 ; CHECK-NEXT: [[TMP85:%.*]] = fsub fast float -0.000000e+00, [[TMP84]] 1532 ; CHECK-NEXT: [[TMP86:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP85]], float [[TMP81]], float [[TMP80]]) 1533 ; CHECK-NEXT: [[TMP87:%.*]] = fptosi float [[TMP84]] to i32 1534 ; CHECK-NEXT: [[TMP88:%.*]] = call fast float @llvm.fabs.f32(float [[TMP86]]) 1535 ; CHECK-NEXT: [[TMP89:%.*]] = call fast float @llvm.fabs.f32(float [[TMP81]]) 1536 ; CHECK-NEXT: [[TMP90:%.*]] = fcmp fast oge float [[TMP88]], [[TMP89]] 1537 ; CHECK-NEXT: [[TMP91:%.*]] = select i1 [[TMP90]], i32 [[TMP79]], i32 0 1538 ; CHECK-NEXT: [[TMP92:%.*]] = add i32 [[TMP87]], [[TMP91]] 1539 ; CHECK-NEXT: [[TMP93:%.*]] = trunc i32 [[TMP92]] to i16 1540 ; CHECK-NEXT: [[TMP94:%.*]] = sext i16 [[TMP93]] to i32 1541 ; CHECK-NEXT: [[TMP95:%.*]] = trunc i32 [[TMP94]] to i16 1542 ; CHECK-NEXT: [[TMP96:%.*]] = insertelement <4 x i16> [[TMP72]], i16 [[TMP95]], i64 3 1543 ; CHECK-NEXT: store <4 x i16> [[TMP96]], <4 x i16> addrspace(1)* [[OUT:%.*]] 1544 ; CHECK-NEXT: ret void 1545 ; 1546 %r = sdiv <4 x i16> %x, %y 1547 store <4 x i16> %r, <4 x i16> addrspace(1)* %out 1548 ret void 1549 } 1550 1551 define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x, <4 x i16> %y) { 1552 ; CHECK-LABEL: @srem_v4i16( 1553 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[X:%.*]], i64 0 1554 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[Y:%.*]], i64 0 1555 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP1]] to i32 1556 ; CHECK-NEXT: [[TMP4:%.*]] = sext i16 [[TMP2]] to i32 1557 ; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] 1558 ; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 1559 ; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 1560 ; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float 1561 ; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float 1562 ; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] 1563 ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] 1564 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) 1565 ; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] 1566 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) 1567 ; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 1568 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) 1569 ; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 1570 ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] 1571 ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0 1572 ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] 1573 ; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[TMP20]], [[TMP4]] 1574 ; CHECK-NEXT: [[TMP22:%.*]] = sub i32 [[TMP3]], [[TMP21]] 1575 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16 1576 ; CHECK-NEXT: [[TMP24:%.*]] = sext i16 [[TMP23]] to i32 1577 ; CHECK-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 1578 ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> undef, i16 [[TMP25]], i64 0 1579 ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i16> [[X]], i64 1 1580 ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i16> [[Y]], i64 1 1581 ; CHECK-NEXT: [[TMP29:%.*]] = sext i16 [[TMP27]] to i32 1582 ; CHECK-NEXT: [[TMP30:%.*]] = sext i16 [[TMP28]] to i32 1583 ; CHECK-NEXT: [[TMP31:%.*]] = xor i32 [[TMP29]], [[TMP30]] 1584 ; CHECK-NEXT: [[TMP32:%.*]] = ashr i32 [[TMP31]], 30 1585 ; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP32]], 1 1586 ; CHECK-NEXT: [[TMP34:%.*]] = sitofp i32 [[TMP29]] to float 1587 ; CHECK-NEXT: [[TMP35:%.*]] = sitofp i32 [[TMP30]] to float 1588 ; CHECK-NEXT: [[TMP36:%.*]] = fdiv fast float 1.000000e+00, [[TMP35]] 1589 ; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]] 1590 ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]]) 1591 ; CHECK-NEXT: [[TMP39:%.*]] = fsub fast float -0.000000e+00, [[TMP38]] 1592 ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]]) 1593 ; CHECK-NEXT: [[TMP41:%.*]] = fptosi float [[TMP38]] to i32 1594 ; CHECK-NEXT: [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]]) 1595 ; CHECK-NEXT: [[TMP43:%.*]] = call fast float @llvm.fabs.f32(float [[TMP35]]) 1596 ; CHECK-NEXT: [[TMP44:%.*]] = fcmp fast oge float [[TMP42]], [[TMP43]] 1597 ; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP33]], i32 0 1598 ; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP41]], [[TMP45]] 1599 ; CHECK-NEXT: [[TMP47:%.*]] = mul i32 [[TMP46]], [[TMP30]] 1600 ; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP29]], [[TMP47]] 1601 ; CHECK-NEXT: [[TMP49:%.*]] = trunc i32 [[TMP48]] to i16 1602 ; CHECK-NEXT: [[TMP50:%.*]] = sext i16 [[TMP49]] to i32 1603 ; CHECK-NEXT: [[TMP51:%.*]] = trunc i32 [[TMP50]] to i16 1604 ; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i16> [[TMP26]], i16 [[TMP51]], i64 1 1605 ; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i16> [[X]], i64 2 1606 ; CHECK-NEXT: [[TMP54:%.*]] = extractelement <4 x i16> [[Y]], i64 2 1607 ; CHECK-NEXT: [[TMP55:%.*]] = sext i16 [[TMP53]] to i32 1608 ; CHECK-NEXT: [[TMP56:%.*]] = sext i16 [[TMP54]] to i32 1609 ; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]] 1610 ; CHECK-NEXT: [[TMP58:%.*]] = ashr i32 [[TMP57]], 30 1611 ; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP58]], 1 1612 ; CHECK-NEXT: [[TMP60:%.*]] = sitofp i32 [[TMP55]] to float 1613 ; CHECK-NEXT: [[TMP61:%.*]] = sitofp i32 [[TMP56]] to float 1614 ; CHECK-NEXT: [[TMP62:%.*]] = fdiv fast float 1.000000e+00, [[TMP61]] 1615 ; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]] 1616 ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]]) 1617 ; CHECK-NEXT: [[TMP65:%.*]] = fsub fast float -0.000000e+00, [[TMP64]] 1618 ; CHECK-NEXT: [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]]) 1619 ; CHECK-NEXT: [[TMP67:%.*]] = fptosi float [[TMP64]] to i32 1620 ; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]]) 1621 ; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.fabs.f32(float [[TMP61]]) 1622 ; CHECK-NEXT: [[TMP70:%.*]] = fcmp fast oge float [[TMP68]], [[TMP69]] 1623 ; CHECK-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], i32 [[TMP59]], i32 0 1624 ; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[TMP67]], [[TMP71]] 1625 ; CHECK-NEXT: [[TMP73:%.*]] = mul i32 [[TMP72]], [[TMP56]] 1626 ; CHECK-NEXT: [[TMP74:%.*]] = sub i32 [[TMP55]], [[TMP73]] 1627 ; CHECK-NEXT: [[TMP75:%.*]] = trunc i32 [[TMP74]] to i16 1628 ; CHECK-NEXT: [[TMP76:%.*]] = sext i16 [[TMP75]] to i32 1629 ; CHECK-NEXT: [[TMP77:%.*]] = trunc i32 [[TMP76]] to i16 1630 ; CHECK-NEXT: [[TMP78:%.*]] = insertelement <4 x i16> [[TMP52]], i16 [[TMP77]], i64 2 1631 ; CHECK-NEXT: [[TMP79:%.*]] = extractelement <4 x i16> [[X]], i64 3 1632 ; CHECK-NEXT: [[TMP80:%.*]] = extractelement <4 x i16> [[Y]], i64 3 1633 ; CHECK-NEXT: [[TMP81:%.*]] = sext i16 [[TMP79]] to i32 1634 ; CHECK-NEXT: [[TMP82:%.*]] = sext i16 [[TMP80]] to i32 1635 ; CHECK-NEXT: [[TMP83:%.*]] = xor i32 [[TMP81]], [[TMP82]] 1636 ; CHECK-NEXT: [[TMP84:%.*]] = ashr i32 [[TMP83]], 30 1637 ; CHECK-NEXT: [[TMP85:%.*]] = or i32 [[TMP84]], 1 1638 ; CHECK-NEXT: [[TMP86:%.*]] = sitofp i32 [[TMP81]] to float 1639 ; CHECK-NEXT: [[TMP87:%.*]] = sitofp i32 [[TMP82]] to float 1640 ; CHECK-NEXT: [[TMP88:%.*]] = fdiv fast float 1.000000e+00, [[TMP87]] 1641 ; CHECK-NEXT: [[TMP89:%.*]] = fmul fast float [[TMP86]], [[TMP88]] 1642 ; CHECK-NEXT: [[TMP90:%.*]] = call fast float @llvm.trunc.f32(float [[TMP89]]) 1643 ; CHECK-NEXT: [[TMP91:%.*]] = fsub fast float -0.000000e+00, [[TMP90]] 1644 ; CHECK-NEXT: [[TMP92:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP91]], float [[TMP87]], float [[TMP86]]) 1645 ; CHECK-NEXT: [[TMP93:%.*]] = fptosi float [[TMP90]] to i32 1646 ; CHECK-NEXT: [[TMP94:%.*]] = call fast float @llvm.fabs.f32(float [[TMP92]]) 1647 ; CHECK-NEXT: [[TMP95:%.*]] = call fast float @llvm.fabs.f32(float [[TMP87]]) 1648 ; CHECK-NEXT: [[TMP96:%.*]] = fcmp fast oge float [[TMP94]], [[TMP95]] 1649 ; CHECK-NEXT: [[TMP97:%.*]] = select i1 [[TMP96]], i32 [[TMP85]], i32 0 1650 ; CHECK-NEXT: [[TMP98:%.*]] = add i32 [[TMP93]], [[TMP97]] 1651 ; CHECK-NEXT: [[TMP99:%.*]] = mul i32 [[TMP98]], [[TMP82]] 1652 ; CHECK-NEXT: [[TMP100:%.*]] = sub i32 [[TMP81]], [[TMP99]] 1653 ; CHECK-NEXT: [[TMP101:%.*]] = trunc i32 [[TMP100]] to i16 1654 ; CHECK-NEXT: [[TMP102:%.*]] = sext i16 [[TMP101]] to i32 1655 ; CHECK-NEXT: [[TMP103:%.*]] = trunc i32 [[TMP102]] to i16 1656 ; CHECK-NEXT: [[TMP104:%.*]] = insertelement <4 x i16> [[TMP78]], i16 [[TMP103]], i64 3 1657 ; CHECK-NEXT: store <4 x i16> [[TMP104]], <4 x i16> addrspace(1)* [[OUT:%.*]] 1658 ; CHECK-NEXT: ret void 1659 ; 1660 %r = srem <4 x i16> %x, %y 1661 store <4 x i16> %r, <4 x i16> addrspace(1)* %out 1662 ret void 1663 } 1664 1665 define amdgpu_kernel void @udiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { 1666 ; CHECK-LABEL: @udiv_i3( 1667 ; CHECK-NEXT: [[TMP1:%.*]] = zext i3 [[X:%.*]] to i32 1668 ; CHECK-NEXT: [[TMP2:%.*]] = zext i3 [[Y:%.*]] to i32 1669 ; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP1]] to float 1670 ; CHECK-NEXT: [[TMP4:%.*]] = uitofp i32 [[TMP2]] to float 1671 ; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] 1672 ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] 1673 ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) 1674 ; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] 1675 ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) 1676 ; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 1677 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 1678 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) 1679 ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] 1680 ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 1, i32 0 1681 ; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP10]], [[TMP14]] 1682 ; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 7 1683 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i3 1684 ; CHECK-NEXT: store i3 [[TMP17]], i3 addrspace(1)* [[OUT:%.*]] 1685 ; CHECK-NEXT: ret void 1686 ; 1687 %r = udiv i3 %x, %y 1688 store i3 %r, i3 addrspace(1)* %out 1689 ret void 1690 } 1691 1692 define amdgpu_kernel void @urem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { 1693 ; CHECK-LABEL: @urem_i3( 1694 ; CHECK-NEXT: [[TMP1:%.*]] = zext i3 [[X:%.*]] to i32 1695 ; CHECK-NEXT: [[TMP2:%.*]] = zext i3 [[Y:%.*]] to i32 1696 ; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP1]] to float 1697 ; CHECK-NEXT: [[TMP4:%.*]] = uitofp i32 [[TMP2]] to float 1698 ; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] 1699 ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] 1700 ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) 1701 ; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] 1702 ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) 1703 ; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 1704 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 1705 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) 1706 ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] 1707 ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 1, i32 0 1708 ; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP10]], [[TMP14]] 1709 ; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], [[TMP2]] 1710 ; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP1]], [[TMP16]] 1711 ; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 7 1712 ; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i3 1713 ; CHECK-NEXT: store i3 [[TMP19]], i3 addrspace(1)* [[OUT:%.*]] 1714 ; CHECK-NEXT: ret void 1715 ; 1716 %r = urem i3 %x, %y 1717 store i3 %r, i3 addrspace(1)* %out 1718 ret void 1719 } 1720 1721 define amdgpu_kernel void @sdiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { 1722 ; CHECK-LABEL: @sdiv_i3( 1723 ; CHECK-NEXT: [[TMP1:%.*]] = sext i3 [[X:%.*]] to i32 1724 ; CHECK-NEXT: [[TMP2:%.*]] = sext i3 [[Y:%.*]] to i32 1725 ; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] 1726 ; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 1727 ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 1728 ; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float 1729 ; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float 1730 ; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] 1731 ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] 1732 ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) 1733 ; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] 1734 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) 1735 ; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 1736 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) 1737 ; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) 1738 ; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] 1739 ; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0 1740 ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] 1741 ; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i3 1742 ; CHECK-NEXT: [[TMP20:%.*]] = sext i3 [[TMP19]] to i32 1743 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i3 1744 ; CHECK-NEXT: store i3 [[TMP21]], i3 addrspace(1)* [[OUT:%.*]] 1745 ; CHECK-NEXT: ret void 1746 ; 1747 %r = sdiv i3 %x, %y 1748 store i3 %r, i3 addrspace(1)* %out 1749 ret void 1750 } 1751 1752 define amdgpu_kernel void @srem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { 1753 ; CHECK-LABEL: @srem_i3( 1754 ; CHECK-NEXT: [[TMP1:%.*]] = sext i3 [[X:%.*]] to i32 1755 ; CHECK-NEXT: [[TMP2:%.*]] = sext i3 [[Y:%.*]] to i32 1756 ; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] 1757 ; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 1758 ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 1759 ; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float 1760 ; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float 1761 ; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] 1762 ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] 1763 ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) 1764 ; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] 1765 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) 1766 ; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 1767 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) 1768 ; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) 1769 ; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] 1770 ; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0 1771 ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] 1772 ; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], [[TMP2]] 1773 ; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP1]], [[TMP19]] 1774 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i3 1775 ; CHECK-NEXT: [[TMP22:%.*]] = sext i3 [[TMP21]] to i32 1776 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i3 1777 ; CHECK-NEXT: store i3 [[TMP23]], i3 addrspace(1)* [[OUT:%.*]] 1778 ; CHECK-NEXT: ret void 1779 ; 1780 %r = srem i3 %x, %y 1781 store i3 %r, i3 addrspace(1)* %out 1782 ret void 1783 } 1784 1785 define amdgpu_kernel void @udiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x, <3 x i16> %y) { 1786 ; CHECK-LABEL: @udiv_v3i16( 1787 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i16> [[X:%.*]], i64 0 1788 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i16> [[Y:%.*]], i64 0 1789 ; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32 1790 ; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32 1791 ; CHECK-NEXT: [[TMP5:%.*]] = uitofp i32 [[TMP3]] to float 1792 ; CHECK-NEXT: [[TMP6:%.*]] = uitofp i32 [[TMP4]] to float 1793 ; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] 1794 ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] 1795 ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) 1796 ; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] 1797 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) 1798 ; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 1799 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) 1800 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) 1801 ; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] 1802 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 1, i32 0 1803 ; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP12]], [[TMP16]] 1804 ; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 65535 1805 ; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 1806 ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <3 x i16> undef, i16 [[TMP19]], i64 0 1807 ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <3 x i16> [[X]], i64 1 1808 ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <3 x i16> [[Y]], i64 1 1809 ; CHECK-NEXT: [[TMP23:%.*]] = zext i16 [[TMP21]] to i32 1810 ; CHECK-NEXT: [[TMP24:%.*]] = zext i16 [[TMP22]] to i32 1811 ; CHECK-NEXT: [[TMP25:%.*]] = uitofp i32 [[TMP23]] to float 1812 ; CHECK-NEXT: [[TMP26:%.*]] = uitofp i32 [[TMP24]] to float 1813 ; CHECK-NEXT: [[TMP27:%.*]] = fdiv fast float 1.000000e+00, [[TMP26]] 1814 ; CHECK-NEXT: [[TMP28:%.*]] = fmul fast float [[TMP25]], [[TMP27]] 1815 ; CHECK-NEXT: [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]]) 1816 ; CHECK-NEXT: [[TMP30:%.*]] = fsub fast float -0.000000e+00, [[TMP29]] 1817 ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]]) 1818 ; CHECK-NEXT: [[TMP32:%.*]] = fptoui float [[TMP29]] to i32 1819 ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]]) 1820 ; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.fabs.f32(float [[TMP26]]) 1821 ; CHECK-NEXT: [[TMP35:%.*]] = fcmp fast oge float [[TMP33]], [[TMP34]] 1822 ; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 1, i32 0 1823 ; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP32]], [[TMP36]] 1824 ; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 65535 1825 ; CHECK-NEXT: [[TMP39:%.*]] = trunc i32 [[TMP38]] to i16 1826 ; CHECK-NEXT: [[TMP40:%.*]] = insertelement <3 x i16> [[TMP20]], i16 [[TMP39]], i64 1 1827 ; CHECK-NEXT: [[TMP41:%.*]] = extractelement <3 x i16> [[X]], i64 2 1828 ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <3 x i16> [[Y]], i64 2 1829 ; CHECK-NEXT: [[TMP43:%.*]] = zext i16 [[TMP41]] to i32 1830 ; CHECK-NEXT: [[TMP44:%.*]] = zext i16 [[TMP42]] to i32 1831 ; CHECK-NEXT: [[TMP45:%.*]] = uitofp i32 [[TMP43]] to float 1832 ; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP44]] to float 1833 ; CHECK-NEXT: [[TMP47:%.*]] = fdiv fast float 1.000000e+00, [[TMP46]] 1834 ; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP45]], [[TMP47]] 1835 ; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]]) 1836 ; CHECK-NEXT: [[TMP50:%.*]] = fsub fast float -0.000000e+00, [[TMP49]] 1837 ; CHECK-NEXT: [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]]) 1838 ; CHECK-NEXT: [[TMP52:%.*]] = fptoui float [[TMP49]] to i32 1839 ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]]) 1840 ; CHECK-NEXT: [[TMP54:%.*]] = call fast float @llvm.fabs.f32(float [[TMP46]]) 1841 ; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast oge float [[TMP53]], [[TMP54]] 1842 ; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 1, i32 0 1843 ; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[TMP52]], [[TMP56]] 1844 ; CHECK-NEXT: [[TMP58:%.*]] = and i32 [[TMP57]], 65535 1845 ; CHECK-NEXT: [[TMP59:%.*]] = trunc i32 [[TMP58]] to i16 1846 ; CHECK-NEXT: [[TMP60:%.*]] = insertelement <3 x i16> [[TMP40]], i16 [[TMP59]], i64 2 1847 ; CHECK-NEXT: store <3 x i16> [[TMP60]], <3 x i16> addrspace(1)* [[OUT:%.*]] 1848 ; CHECK-NEXT: ret void 1849 ; 1850 %r = udiv <3 x i16> %x, %y 1851 store <3 x i16> %r, <3 x i16> addrspace(1)* %out 1852 ret void 1853 } 1854 1855 define amdgpu_kernel void @urem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x, <3 x i16> %y) { 1856 ; CHECK-LABEL: @urem_v3i16( 1857 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i16> [[X:%.*]], i64 0 1858 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i16> [[Y:%.*]], i64 0 1859 ; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32 1860 ; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32 1861 ; CHECK-NEXT: [[TMP5:%.*]] = uitofp i32 [[TMP3]] to float 1862 ; CHECK-NEXT: [[TMP6:%.*]] = uitofp i32 [[TMP4]] to float 1863 ; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] 1864 ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] 1865 ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) 1866 ; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] 1867 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) 1868 ; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 1869 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) 1870 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) 1871 ; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] 1872 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 1, i32 0 1873 ; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP12]], [[TMP16]] 1874 ; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], [[TMP4]] 1875 ; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP3]], [[TMP18]] 1876 ; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 65535 1877 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 1878 ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <3 x i16> undef, i16 [[TMP21]], i64 0 1879 ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <3 x i16> [[X]], i64 1 1880 ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <3 x i16> [[Y]], i64 1 1881 ; CHECK-NEXT: [[TMP25:%.*]] = zext i16 [[TMP23]] to i32 1882 ; CHECK-NEXT: [[TMP26:%.*]] = zext i16 [[TMP24]] to i32 1883 ; CHECK-NEXT: [[TMP27:%.*]] = uitofp i32 [[TMP25]] to float 1884 ; CHECK-NEXT: [[TMP28:%.*]] = uitofp i32 [[TMP26]] to float 1885 ; CHECK-NEXT: [[TMP29:%.*]] = fdiv fast float 1.000000e+00, [[TMP28]] 1886 ; CHECK-NEXT: [[TMP30:%.*]] = fmul fast float [[TMP27]], [[TMP29]] 1887 ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]]) 1888 ; CHECK-NEXT: [[TMP32:%.*]] = fsub fast float -0.000000e+00, [[TMP31]] 1889 ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]]) 1890 ; CHECK-NEXT: [[TMP34:%.*]] = fptoui float [[TMP31]] to i32 1891 ; CHECK-NEXT: [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) 1892 ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.fabs.f32(float [[TMP28]]) 1893 ; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast oge float [[TMP35]], [[TMP36]] 1894 ; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 1, i32 0 1895 ; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[TMP34]], [[TMP38]] 1896 ; CHECK-NEXT: [[TMP40:%.*]] = mul i32 [[TMP39]], [[TMP26]] 1897 ; CHECK-NEXT: [[TMP41:%.*]] = sub i32 [[TMP25]], [[TMP40]] 1898 ; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], 65535 1899 ; CHECK-NEXT: [[TMP43:%.*]] = trunc i32 [[TMP42]] to i16 1900 ; CHECK-NEXT: [[TMP44:%.*]] = insertelement <3 x i16> [[TMP22]], i16 [[TMP43]], i64 1 1901 ; CHECK-NEXT: [[TMP45:%.*]] = extractelement <3 x i16> [[X]], i64 2 1902 ; CHECK-NEXT: [[TMP46:%.*]] = extractelement <3 x i16> [[Y]], i64 2 1903 ; CHECK-NEXT: [[TMP47:%.*]] = zext i16 [[TMP45]] to i32 1904 ; CHECK-NEXT: [[TMP48:%.*]] = zext i16 [[TMP46]] to i32 1905 ; CHECK-NEXT: [[TMP49:%.*]] = uitofp i32 [[TMP47]] to float 1906 ; CHECK-NEXT: [[TMP50:%.*]] = uitofp i32 [[TMP48]] to float 1907 ; CHECK-NEXT: [[TMP51:%.*]] = fdiv fast float 1.000000e+00, [[TMP50]] 1908 ; CHECK-NEXT: [[TMP52:%.*]] = fmul fast float [[TMP49]], [[TMP51]] 1909 ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]]) 1910 ; CHECK-NEXT: [[TMP54:%.*]] = fsub fast float -0.000000e+00, [[TMP53]] 1911 ; CHECK-NEXT: [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]]) 1912 ; CHECK-NEXT: [[TMP56:%.*]] = fptoui float [[TMP53]] to i32 1913 ; CHECK-NEXT: [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]]) 1914 ; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.fabs.f32(float [[TMP50]]) 1915 ; CHECK-NEXT: [[TMP59:%.*]] = fcmp fast oge float [[TMP57]], [[TMP58]] 1916 ; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 1, i32 0 1917 ; CHECK-NEXT: [[TMP61:%.*]] = add i32 [[TMP56]], [[TMP60]] 1918 ; CHECK-NEXT: [[TMP62:%.*]] = mul i32 [[TMP61]], [[TMP48]] 1919 ; CHECK-NEXT: [[TMP63:%.*]] = sub i32 [[TMP47]], [[TMP62]] 1920 ; CHECK-NEXT: [[TMP64:%.*]] = and i32 [[TMP63]], 65535 1921 ; CHECK-NEXT: [[TMP65:%.*]] = trunc i32 [[TMP64]] to i16 1922 ; CHECK-NEXT: [[TMP66:%.*]] = insertelement <3 x i16> [[TMP44]], i16 [[TMP65]], i64 2 1923 ; CHECK-NEXT: store <3 x i16> [[TMP66]], <3 x i16> addrspace(1)* [[OUT:%.*]] 1924 ; CHECK-NEXT: ret void 1925 ; 1926 %r = urem <3 x i16> %x, %y 1927 store <3 x i16> %r, <3 x i16> addrspace(1)* %out 1928 ret void 1929 } 1930 1931 define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x, <3 x i16> %y) { 1932 ; CHECK-LABEL: @sdiv_v3i16( 1933 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i16> [[X:%.*]], i64 0 1934 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i16> [[Y:%.*]], i64 0 1935 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP1]] to i32 1936 ; CHECK-NEXT: [[TMP4:%.*]] = sext i16 [[TMP2]] to i32 1937 ; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] 1938 ; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 1939 ; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 1940 ; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float 1941 ; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float 1942 ; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] 1943 ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] 1944 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) 1945 ; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] 1946 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) 1947 ; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 1948 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) 1949 ; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 1950 ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] 1951 ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0 1952 ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] 1953 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 1954 ; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 1955 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16 1956 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <3 x i16> undef, i16 [[TMP23]], i64 0 1957 ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <3 x i16> [[X]], i64 1 1958 ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <3 x i16> [[Y]], i64 1 1959 ; CHECK-NEXT: [[TMP27:%.*]] = sext i16 [[TMP25]] to i32 1960 ; CHECK-NEXT: [[TMP28:%.*]] = sext i16 [[TMP26]] to i32 1961 ; CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP27]], [[TMP28]] 1962 ; CHECK-NEXT: [[TMP30:%.*]] = ashr i32 [[TMP29]], 30 1963 ; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP30]], 1 1964 ; CHECK-NEXT: [[TMP32:%.*]] = sitofp i32 [[TMP27]] to float 1965 ; CHECK-NEXT: [[TMP33:%.*]] = sitofp i32 [[TMP28]] to float 1966 ; CHECK-NEXT: [[TMP34:%.*]] = fdiv fast float 1.000000e+00, [[TMP33]] 1967 ; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]] 1968 ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]]) 1969 ; CHECK-NEXT: [[TMP37:%.*]] = fsub fast float -0.000000e+00, [[TMP36]] 1970 ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]]) 1971 ; CHECK-NEXT: [[TMP39:%.*]] = fptosi float [[TMP36]] to i32 1972 ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]]) 1973 ; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) 1974 ; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]] 1975 ; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[TMP31]], i32 0 1976 ; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP39]], [[TMP43]] 1977 ; CHECK-NEXT: [[TMP45:%.*]] = trunc i32 [[TMP44]] to i16 1978 ; CHECK-NEXT: [[TMP46:%.*]] = sext i16 [[TMP45]] to i32 1979 ; CHECK-NEXT: [[TMP47:%.*]] = trunc i32 [[TMP46]] to i16 1980 ; CHECK-NEXT: [[TMP48:%.*]] = insertelement <3 x i16> [[TMP24]], i16 [[TMP47]], i64 1 1981 ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <3 x i16> [[X]], i64 2 1982 ; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x i16> [[Y]], i64 2 1983 ; CHECK-NEXT: [[TMP51:%.*]] = sext i16 [[TMP49]] to i32 1984 ; CHECK-NEXT: [[TMP52:%.*]] = sext i16 [[TMP50]] to i32 1985 ; CHECK-NEXT: [[TMP53:%.*]] = xor i32 [[TMP51]], [[TMP52]] 1986 ; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP53]], 30 1987 ; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP54]], 1 1988 ; CHECK-NEXT: [[TMP56:%.*]] = sitofp i32 [[TMP51]] to float 1989 ; CHECK-NEXT: [[TMP57:%.*]] = sitofp i32 [[TMP52]] to float 1990 ; CHECK-NEXT: [[TMP58:%.*]] = fdiv fast float 1.000000e+00, [[TMP57]] 1991 ; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]] 1992 ; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]]) 1993 ; CHECK-NEXT: [[TMP61:%.*]] = fsub fast float -0.000000e+00, [[TMP60]] 1994 ; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]]) 1995 ; CHECK-NEXT: [[TMP63:%.*]] = fptosi float [[TMP60]] to i32 1996 ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]]) 1997 ; CHECK-NEXT: [[TMP65:%.*]] = call fast float @llvm.fabs.f32(float [[TMP57]]) 1998 ; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]] 1999 ; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP66]], i32 [[TMP55]], i32 0 2000 ; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[TMP63]], [[TMP67]] 2001 ; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP68]] to i16 2002 ; CHECK-NEXT: [[TMP70:%.*]] = sext i16 [[TMP69]] to i32 2003 ; CHECK-NEXT: [[TMP71:%.*]] = trunc i32 [[TMP70]] to i16 2004 ; CHECK-NEXT: [[TMP72:%.*]] = insertelement <3 x i16> [[TMP48]], i16 [[TMP71]], i64 2 2005 ; CHECK-NEXT: store <3 x i16> [[TMP72]], <3 x i16> addrspace(1)* [[OUT:%.*]] 2006 ; CHECK-NEXT: ret void 2007 ; 2008 %r = sdiv <3 x i16> %x, %y 2009 store <3 x i16> %r, <3 x i16> addrspace(1)* %out 2010 ret void 2011 } 2012 2013 define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x, <3 x i16> %y) { 2014 ; CHECK-LABEL: @srem_v3i16( 2015 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i16> [[X:%.*]], i64 0 2016 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i16> [[Y:%.*]], i64 0 2017 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP1]] to i32 2018 ; CHECK-NEXT: [[TMP4:%.*]] = sext i16 [[TMP2]] to i32 2019 ; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] 2020 ; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 2021 ; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 2022 ; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float 2023 ; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float 2024 ; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] 2025 ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] 2026 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) 2027 ; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] 2028 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) 2029 ; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 2030 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) 2031 ; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 2032 ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] 2033 ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0 2034 ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] 2035 ; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[TMP20]], [[TMP4]] 2036 ; CHECK-NEXT: [[TMP22:%.*]] = sub i32 [[TMP3]], [[TMP21]] 2037 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16 2038 ; CHECK-NEXT: [[TMP24:%.*]] = sext i16 [[TMP23]] to i32 2039 ; CHECK-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 2040 ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <3 x i16> undef, i16 [[TMP25]], i64 0 2041 ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <3 x i16> [[X]], i64 1 2042 ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <3 x i16> [[Y]], i64 1 2043 ; CHECK-NEXT: [[TMP29:%.*]] = sext i16 [[TMP27]] to i32 2044 ; CHECK-NEXT: [[TMP30:%.*]] = sext i16 [[TMP28]] to i32 2045 ; CHECK-NEXT: [[TMP31:%.*]] = xor i32 [[TMP29]], [[TMP30]] 2046 ; CHECK-NEXT: [[TMP32:%.*]] = ashr i32 [[TMP31]], 30 2047 ; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP32]], 1 2048 ; CHECK-NEXT: [[TMP34:%.*]] = sitofp i32 [[TMP29]] to float 2049 ; CHECK-NEXT: [[TMP35:%.*]] = sitofp i32 [[TMP30]] to float 2050 ; CHECK-NEXT: [[TMP36:%.*]] = fdiv fast float 1.000000e+00, [[TMP35]] 2051 ; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]] 2052 ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]]) 2053 ; CHECK-NEXT: [[TMP39:%.*]] = fsub fast float -0.000000e+00, [[TMP38]] 2054 ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]]) 2055 ; CHECK-NEXT: [[TMP41:%.*]] = fptosi float [[TMP38]] to i32 2056 ; CHECK-NEXT: [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]]) 2057 ; CHECK-NEXT: [[TMP43:%.*]] = call fast float @llvm.fabs.f32(float [[TMP35]]) 2058 ; CHECK-NEXT: [[TMP44:%.*]] = fcmp fast oge float [[TMP42]], [[TMP43]] 2059 ; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP33]], i32 0 2060 ; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP41]], [[TMP45]] 2061 ; CHECK-NEXT: [[TMP47:%.*]] = mul i32 [[TMP46]], [[TMP30]] 2062 ; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP29]], [[TMP47]] 2063 ; CHECK-NEXT: [[TMP49:%.*]] = trunc i32 [[TMP48]] to i16 2064 ; CHECK-NEXT: [[TMP50:%.*]] = sext i16 [[TMP49]] to i32 2065 ; CHECK-NEXT: [[TMP51:%.*]] = trunc i32 [[TMP50]] to i16 2066 ; CHECK-NEXT: [[TMP52:%.*]] = insertelement <3 x i16> [[TMP26]], i16 [[TMP51]], i64 1 2067 ; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x i16> [[X]], i64 2 2068 ; CHECK-NEXT: [[TMP54:%.*]] = extractelement <3 x i16> [[Y]], i64 2 2069 ; CHECK-NEXT: [[TMP55:%.*]] = sext i16 [[TMP53]] to i32 2070 ; CHECK-NEXT: [[TMP56:%.*]] = sext i16 [[TMP54]] to i32 2071 ; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]] 2072 ; CHECK-NEXT: [[TMP58:%.*]] = ashr i32 [[TMP57]], 30 2073 ; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP58]], 1 2074 ; CHECK-NEXT: [[TMP60:%.*]] = sitofp i32 [[TMP55]] to float 2075 ; CHECK-NEXT: [[TMP61:%.*]] = sitofp i32 [[TMP56]] to float 2076 ; CHECK-NEXT: [[TMP62:%.*]] = fdiv fast float 1.000000e+00, [[TMP61]] 2077 ; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]] 2078 ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]]) 2079 ; CHECK-NEXT: [[TMP65:%.*]] = fsub fast float -0.000000e+00, [[TMP64]] 2080 ; CHECK-NEXT: [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]]) 2081 ; CHECK-NEXT: [[TMP67:%.*]] = fptosi float [[TMP64]] to i32 2082 ; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]]) 2083 ; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.fabs.f32(float [[TMP61]]) 2084 ; CHECK-NEXT: [[TMP70:%.*]] = fcmp fast oge float [[TMP68]], [[TMP69]] 2085 ; CHECK-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], i32 [[TMP59]], i32 0 2086 ; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[TMP67]], [[TMP71]] 2087 ; CHECK-NEXT: [[TMP73:%.*]] = mul i32 [[TMP72]], [[TMP56]] 2088 ; CHECK-NEXT: [[TMP74:%.*]] = sub i32 [[TMP55]], [[TMP73]] 2089 ; CHECK-NEXT: [[TMP75:%.*]] = trunc i32 [[TMP74]] to i16 2090 ; CHECK-NEXT: [[TMP76:%.*]] = sext i16 [[TMP75]] to i32 2091 ; CHECK-NEXT: [[TMP77:%.*]] = trunc i32 [[TMP76]] to i16 2092 ; CHECK-NEXT: [[TMP78:%.*]] = insertelement <3 x i16> [[TMP52]], i16 [[TMP77]], i64 2 2093 ; CHECK-NEXT: store <3 x i16> [[TMP78]], <3 x i16> addrspace(1)* [[OUT:%.*]] 2094 ; CHECK-NEXT: ret void 2095 ; 2096 %r = srem <3 x i16> %x, %y 2097 store <3 x i16> %r, <3 x i16> addrspace(1)* %out 2098 ret void 2099 } 2100 2101 define amdgpu_kernel void @udiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x, <3 x i15> %y) { 2102 ; CHECK-LABEL: @udiv_v3i15( 2103 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i15> [[X:%.*]], i64 0 2104 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i15> [[Y:%.*]], i64 0 2105 ; CHECK-NEXT: [[TMP3:%.*]] = zext i15 [[TMP1]] to i32 2106 ; CHECK-NEXT: [[TMP4:%.*]] = zext i15 [[TMP2]] to i32 2107 ; CHECK-NEXT: [[TMP5:%.*]] = uitofp i32 [[TMP3]] to float 2108 ; CHECK-NEXT: [[TMP6:%.*]] = uitofp i32 [[TMP4]] to float 2109 ; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] 2110 ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] 2111 ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) 2112 ; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] 2113 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) 2114 ; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 2115 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) 2116 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) 2117 ; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] 2118 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 1, i32 0 2119 ; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP12]], [[TMP16]] 2120 ; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 32767 2121 ; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i15 2122 ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <3 x i15> undef, i15 [[TMP19]], i64 0 2123 ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <3 x i15> [[X]], i64 1 2124 ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <3 x i15> [[Y]], i64 1 2125 ; CHECK-NEXT: [[TMP23:%.*]] = zext i15 [[TMP21]] to i32 2126 ; CHECK-NEXT: [[TMP24:%.*]] = zext i15 [[TMP22]] to i32 2127 ; CHECK-NEXT: [[TMP25:%.*]] = uitofp i32 [[TMP23]] to float 2128 ; CHECK-NEXT: [[TMP26:%.*]] = uitofp i32 [[TMP24]] to float 2129 ; CHECK-NEXT: [[TMP27:%.*]] = fdiv fast float 1.000000e+00, [[TMP26]] 2130 ; CHECK-NEXT: [[TMP28:%.*]] = fmul fast float [[TMP25]], [[TMP27]] 2131 ; CHECK-NEXT: [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]]) 2132 ; CHECK-NEXT: [[TMP30:%.*]] = fsub fast float -0.000000e+00, [[TMP29]] 2133 ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]]) 2134 ; CHECK-NEXT: [[TMP32:%.*]] = fptoui float [[TMP29]] to i32 2135 ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]]) 2136 ; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.fabs.f32(float [[TMP26]]) 2137 ; CHECK-NEXT: [[TMP35:%.*]] = fcmp fast oge float [[TMP33]], [[TMP34]] 2138 ; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 1, i32 0 2139 ; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP32]], [[TMP36]] 2140 ; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 32767 2141 ; CHECK-NEXT: [[TMP39:%.*]] = trunc i32 [[TMP38]] to i15 2142 ; CHECK-NEXT: [[TMP40:%.*]] = insertelement <3 x i15> [[TMP20]], i15 [[TMP39]], i64 1 2143 ; CHECK-NEXT: [[TMP41:%.*]] = extractelement <3 x i15> [[X]], i64 2 2144 ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <3 x i15> [[Y]], i64 2 2145 ; CHECK-NEXT: [[TMP43:%.*]] = zext i15 [[TMP41]] to i32 2146 ; CHECK-NEXT: [[TMP44:%.*]] = zext i15 [[TMP42]] to i32 2147 ; CHECK-NEXT: [[TMP45:%.*]] = uitofp i32 [[TMP43]] to float 2148 ; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP44]] to float 2149 ; CHECK-NEXT: [[TMP47:%.*]] = fdiv fast float 1.000000e+00, [[TMP46]] 2150 ; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP45]], [[TMP47]] 2151 ; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]]) 2152 ; CHECK-NEXT: [[TMP50:%.*]] = fsub fast float -0.000000e+00, [[TMP49]] 2153 ; CHECK-NEXT: [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]]) 2154 ; CHECK-NEXT: [[TMP52:%.*]] = fptoui float [[TMP49]] to i32 2155 ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]]) 2156 ; CHECK-NEXT: [[TMP54:%.*]] = call fast float @llvm.fabs.f32(float [[TMP46]]) 2157 ; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast oge float [[TMP53]], [[TMP54]] 2158 ; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 1, i32 0 2159 ; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[TMP52]], [[TMP56]] 2160 ; CHECK-NEXT: [[TMP58:%.*]] = and i32 [[TMP57]], 32767 2161 ; CHECK-NEXT: [[TMP59:%.*]] = trunc i32 [[TMP58]] to i15 2162 ; CHECK-NEXT: [[TMP60:%.*]] = insertelement <3 x i15> [[TMP40]], i15 [[TMP59]], i64 2 2163 ; CHECK-NEXT: store <3 x i15> [[TMP60]], <3 x i15> addrspace(1)* [[OUT:%.*]] 2164 ; CHECK-NEXT: ret void 2165 ; 2166 %r = udiv <3 x i15> %x, %y 2167 store <3 x i15> %r, <3 x i15> addrspace(1)* %out 2168 ret void 2169 } 2170 2171 define amdgpu_kernel void @urem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x, <3 x i15> %y) { 2172 ; CHECK-LABEL: @urem_v3i15( 2173 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i15> [[X:%.*]], i64 0 2174 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i15> [[Y:%.*]], i64 0 2175 ; CHECK-NEXT: [[TMP3:%.*]] = zext i15 [[TMP1]] to i32 2176 ; CHECK-NEXT: [[TMP4:%.*]] = zext i15 [[TMP2]] to i32 2177 ; CHECK-NEXT: [[TMP5:%.*]] = uitofp i32 [[TMP3]] to float 2178 ; CHECK-NEXT: [[TMP6:%.*]] = uitofp i32 [[TMP4]] to float 2179 ; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] 2180 ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] 2181 ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) 2182 ; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] 2183 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) 2184 ; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 2185 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) 2186 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) 2187 ; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] 2188 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 1, i32 0 2189 ; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP12]], [[TMP16]] 2190 ; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], [[TMP4]] 2191 ; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP3]], [[TMP18]] 2192 ; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 32767 2193 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i15 2194 ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <3 x i15> undef, i15 [[TMP21]], i64 0 2195 ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <3 x i15> [[X]], i64 1 2196 ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <3 x i15> [[Y]], i64 1 2197 ; CHECK-NEXT: [[TMP25:%.*]] = zext i15 [[TMP23]] to i32 2198 ; CHECK-NEXT: [[TMP26:%.*]] = zext i15 [[TMP24]] to i32 2199 ; CHECK-NEXT: [[TMP27:%.*]] = uitofp i32 [[TMP25]] to float 2200 ; CHECK-NEXT: [[TMP28:%.*]] = uitofp i32 [[TMP26]] to float 2201 ; CHECK-NEXT: [[TMP29:%.*]] = fdiv fast float 1.000000e+00, [[TMP28]] 2202 ; CHECK-NEXT: [[TMP30:%.*]] = fmul fast float [[TMP27]], [[TMP29]] 2203 ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]]) 2204 ; CHECK-NEXT: [[TMP32:%.*]] = fsub fast float -0.000000e+00, [[TMP31]] 2205 ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]]) 2206 ; CHECK-NEXT: [[TMP34:%.*]] = fptoui float [[TMP31]] to i32 2207 ; CHECK-NEXT: [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) 2208 ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.fabs.f32(float [[TMP28]]) 2209 ; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast oge float [[TMP35]], [[TMP36]] 2210 ; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 1, i32 0 2211 ; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[TMP34]], [[TMP38]] 2212 ; CHECK-NEXT: [[TMP40:%.*]] = mul i32 [[TMP39]], [[TMP26]] 2213 ; CHECK-NEXT: [[TMP41:%.*]] = sub i32 [[TMP25]], [[TMP40]] 2214 ; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], 32767 2215 ; CHECK-NEXT: [[TMP43:%.*]] = trunc i32 [[TMP42]] to i15 2216 ; CHECK-NEXT: [[TMP44:%.*]] = insertelement <3 x i15> [[TMP22]], i15 [[TMP43]], i64 1 2217 ; CHECK-NEXT: [[TMP45:%.*]] = extractelement <3 x i15> [[X]], i64 2 2218 ; CHECK-NEXT: [[TMP46:%.*]] = extractelement <3 x i15> [[Y]], i64 2 2219 ; CHECK-NEXT: [[TMP47:%.*]] = zext i15 [[TMP45]] to i32 2220 ; CHECK-NEXT: [[TMP48:%.*]] = zext i15 [[TMP46]] to i32 2221 ; CHECK-NEXT: [[TMP49:%.*]] = uitofp i32 [[TMP47]] to float 2222 ; CHECK-NEXT: [[TMP50:%.*]] = uitofp i32 [[TMP48]] to float 2223 ; CHECK-NEXT: [[TMP51:%.*]] = fdiv fast float 1.000000e+00, [[TMP50]] 2224 ; CHECK-NEXT: [[TMP52:%.*]] = fmul fast float [[TMP49]], [[TMP51]] 2225 ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]]) 2226 ; CHECK-NEXT: [[TMP54:%.*]] = fsub fast float -0.000000e+00, [[TMP53]] 2227 ; CHECK-NEXT: [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]]) 2228 ; CHECK-NEXT: [[TMP56:%.*]] = fptoui float [[TMP53]] to i32 2229 ; CHECK-NEXT: [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]]) 2230 ; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.fabs.f32(float [[TMP50]]) 2231 ; CHECK-NEXT: [[TMP59:%.*]] = fcmp fast oge float [[TMP57]], [[TMP58]] 2232 ; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 1, i32 0 2233 ; CHECK-NEXT: [[TMP61:%.*]] = add i32 [[TMP56]], [[TMP60]] 2234 ; CHECK-NEXT: [[TMP62:%.*]] = mul i32 [[TMP61]], [[TMP48]] 2235 ; CHECK-NEXT: [[TMP63:%.*]] = sub i32 [[TMP47]], [[TMP62]] 2236 ; CHECK-NEXT: [[TMP64:%.*]] = and i32 [[TMP63]], 32767 2237 ; CHECK-NEXT: [[TMP65:%.*]] = trunc i32 [[TMP64]] to i15 2238 ; CHECK-NEXT: [[TMP66:%.*]] = insertelement <3 x i15> [[TMP44]], i15 [[TMP65]], i64 2 2239 ; CHECK-NEXT: store <3 x i15> [[TMP66]], <3 x i15> addrspace(1)* [[OUT:%.*]] 2240 ; CHECK-NEXT: ret void 2241 ; 2242 %r = urem <3 x i15> %x, %y 2243 store <3 x i15> %r, <3 x i15> addrspace(1)* %out 2244 ret void 2245 } 2246 2247 define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x, <3 x i15> %y) { 2248 ; CHECK-LABEL: @sdiv_v3i15( 2249 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i15> [[X:%.*]], i64 0 2250 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i15> [[Y:%.*]], i64 0 2251 ; CHECK-NEXT: [[TMP3:%.*]] = sext i15 [[TMP1]] to i32 2252 ; CHECK-NEXT: [[TMP4:%.*]] = sext i15 [[TMP2]] to i32 2253 ; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] 2254 ; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 2255 ; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 2256 ; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float 2257 ; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float 2258 ; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] 2259 ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] 2260 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) 2261 ; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] 2262 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) 2263 ; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 2264 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) 2265 ; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 2266 ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] 2267 ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0 2268 ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] 2269 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i15 2270 ; CHECK-NEXT: [[TMP22:%.*]] = sext i15 [[TMP21]] to i32 2271 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i15 2272 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <3 x i15> undef, i15 [[TMP23]], i64 0 2273 ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <3 x i15> [[X]], i64 1 2274 ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <3 x i15> [[Y]], i64 1 2275 ; CHECK-NEXT: [[TMP27:%.*]] = sext i15 [[TMP25]] to i32 2276 ; CHECK-NEXT: [[TMP28:%.*]] = sext i15 [[TMP26]] to i32 2277 ; CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP27]], [[TMP28]] 2278 ; CHECK-NEXT: [[TMP30:%.*]] = ashr i32 [[TMP29]], 30 2279 ; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP30]], 1 2280 ; CHECK-NEXT: [[TMP32:%.*]] = sitofp i32 [[TMP27]] to float 2281 ; CHECK-NEXT: [[TMP33:%.*]] = sitofp i32 [[TMP28]] to float 2282 ; CHECK-NEXT: [[TMP34:%.*]] = fdiv fast float 1.000000e+00, [[TMP33]] 2283 ; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]] 2284 ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]]) 2285 ; CHECK-NEXT: [[TMP37:%.*]] = fsub fast float -0.000000e+00, [[TMP36]] 2286 ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]]) 2287 ; CHECK-NEXT: [[TMP39:%.*]] = fptosi float [[TMP36]] to i32 2288 ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]]) 2289 ; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) 2290 ; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]] 2291 ; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[TMP31]], i32 0 2292 ; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP39]], [[TMP43]] 2293 ; CHECK-NEXT: [[TMP45:%.*]] = trunc i32 [[TMP44]] to i15 2294 ; CHECK-NEXT: [[TMP46:%.*]] = sext i15 [[TMP45]] to i32 2295 ; CHECK-NEXT: [[TMP47:%.*]] = trunc i32 [[TMP46]] to i15 2296 ; CHECK-NEXT: [[TMP48:%.*]] = insertelement <3 x i15> [[TMP24]], i15 [[TMP47]], i64 1 2297 ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <3 x i15> [[X]], i64 2 2298 ; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x i15> [[Y]], i64 2 2299 ; CHECK-NEXT: [[TMP51:%.*]] = sext i15 [[TMP49]] to i32 2300 ; CHECK-NEXT: [[TMP52:%.*]] = sext i15 [[TMP50]] to i32 2301 ; CHECK-NEXT: [[TMP53:%.*]] = xor i32 [[TMP51]], [[TMP52]] 2302 ; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP53]], 30 2303 ; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP54]], 1 2304 ; CHECK-NEXT: [[TMP56:%.*]] = sitofp i32 [[TMP51]] to float 2305 ; CHECK-NEXT: [[TMP57:%.*]] = sitofp i32 [[TMP52]] to float 2306 ; CHECK-NEXT: [[TMP58:%.*]] = fdiv fast float 1.000000e+00, [[TMP57]] 2307 ; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]] 2308 ; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]]) 2309 ; CHECK-NEXT: [[TMP61:%.*]] = fsub fast float -0.000000e+00, [[TMP60]] 2310 ; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]]) 2311 ; CHECK-NEXT: [[TMP63:%.*]] = fptosi float [[TMP60]] to i32 2312 ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]]) 2313 ; CHECK-NEXT: [[TMP65:%.*]] = call fast float @llvm.fabs.f32(float [[TMP57]]) 2314 ; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]] 2315 ; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP66]], i32 [[TMP55]], i32 0 2316 ; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[TMP63]], [[TMP67]] 2317 ; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP68]] to i15 2318 ; CHECK-NEXT: [[TMP70:%.*]] = sext i15 [[TMP69]] to i32 2319 ; CHECK-NEXT: [[TMP71:%.*]] = trunc i32 [[TMP70]] to i15 2320 ; CHECK-NEXT: [[TMP72:%.*]] = insertelement <3 x i15> [[TMP48]], i15 [[TMP71]], i64 2 2321 ; CHECK-NEXT: store <3 x i15> [[TMP72]], <3 x i15> addrspace(1)* [[OUT:%.*]] 2322 ; CHECK-NEXT: ret void 2323 ; 2324 %r = sdiv <3 x i15> %x, %y 2325 store <3 x i15> %r, <3 x i15> addrspace(1)* %out 2326 ret void 2327 } 2328 2329 define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x, <3 x i15> %y) { 2330 ; CHECK-LABEL: @srem_v3i15( 2331 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i15> [[X:%.*]], i64 0 2332 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i15> [[Y:%.*]], i64 0 2333 ; CHECK-NEXT: [[TMP3:%.*]] = sext i15 [[TMP1]] to i32 2334 ; CHECK-NEXT: [[TMP4:%.*]] = sext i15 [[TMP2]] to i32 2335 ; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] 2336 ; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 2337 ; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 2338 ; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float 2339 ; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float 2340 ; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] 2341 ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] 2342 ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) 2343 ; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] 2344 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) 2345 ; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 2346 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) 2347 ; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 2348 ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] 2349 ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0 2350 ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] 2351 ; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[TMP20]], [[TMP4]] 2352 ; CHECK-NEXT: [[TMP22:%.*]] = sub i32 [[TMP3]], [[TMP21]] 2353 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i15 2354 ; CHECK-NEXT: [[TMP24:%.*]] = sext i15 [[TMP23]] to i32 2355 ; CHECK-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i15 2356 ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <3 x i15> undef, i15 [[TMP25]], i64 0 2357 ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <3 x i15> [[X]], i64 1 2358 ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <3 x i15> [[Y]], i64 1 2359 ; CHECK-NEXT: [[TMP29:%.*]] = sext i15 [[TMP27]] to i32 2360 ; CHECK-NEXT: [[TMP30:%.*]] = sext i15 [[TMP28]] to i32 2361 ; CHECK-NEXT: [[TMP31:%.*]] = xor i32 [[TMP29]], [[TMP30]] 2362 ; CHECK-NEXT: [[TMP32:%.*]] = ashr i32 [[TMP31]], 30 2363 ; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP32]], 1 2364 ; CHECK-NEXT: [[TMP34:%.*]] = sitofp i32 [[TMP29]] to float 2365 ; CHECK-NEXT: [[TMP35:%.*]] = sitofp i32 [[TMP30]] to float 2366 ; CHECK-NEXT: [[TMP36:%.*]] = fdiv fast float 1.000000e+00, [[TMP35]] 2367 ; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]] 2368 ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]]) 2369 ; CHECK-NEXT: [[TMP39:%.*]] = fsub fast float -0.000000e+00, [[TMP38]] 2370 ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]]) 2371 ; CHECK-NEXT: [[TMP41:%.*]] = fptosi float [[TMP38]] to i32 2372 ; CHECK-NEXT: [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]]) 2373 ; CHECK-NEXT: [[TMP43:%.*]] = call fast float @llvm.fabs.f32(float [[TMP35]]) 2374 ; CHECK-NEXT: [[TMP44:%.*]] = fcmp fast oge float [[TMP42]], [[TMP43]] 2375 ; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP33]], i32 0 2376 ; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP41]], [[TMP45]] 2377 ; CHECK-NEXT: [[TMP47:%.*]] = mul i32 [[TMP46]], [[TMP30]] 2378 ; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP29]], [[TMP47]] 2379 ; CHECK-NEXT: [[TMP49:%.*]] = trunc i32 [[TMP48]] to i15 2380 ; CHECK-NEXT: [[TMP50:%.*]] = sext i15 [[TMP49]] to i32 2381 ; CHECK-NEXT: [[TMP51:%.*]] = trunc i32 [[TMP50]] to i15 2382 ; CHECK-NEXT: [[TMP52:%.*]] = insertelement <3 x i15> [[TMP26]], i15 [[TMP51]], i64 1 2383 ; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x i15> [[X]], i64 2 2384 ; CHECK-NEXT: [[TMP54:%.*]] = extractelement <3 x i15> [[Y]], i64 2 2385 ; CHECK-NEXT: [[TMP55:%.*]] = sext i15 [[TMP53]] to i32 2386 ; CHECK-NEXT: [[TMP56:%.*]] = sext i15 [[TMP54]] to i32 2387 ; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]] 2388 ; CHECK-NEXT: [[TMP58:%.*]] = ashr i32 [[TMP57]], 30 2389 ; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP58]], 1 2390 ; CHECK-NEXT: [[TMP60:%.*]] = sitofp i32 [[TMP55]] to float 2391 ; CHECK-NEXT: [[TMP61:%.*]] = sitofp i32 [[TMP56]] to float 2392 ; CHECK-NEXT: [[TMP62:%.*]] = fdiv fast float 1.000000e+00, [[TMP61]] 2393 ; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]] 2394 ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]]) 2395 ; CHECK-NEXT: [[TMP65:%.*]] = fsub fast float -0.000000e+00, [[TMP64]] 2396 ; CHECK-NEXT: [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]]) 2397 ; CHECK-NEXT: [[TMP67:%.*]] = fptosi float [[TMP64]] to i32 2398 ; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]]) 2399 ; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.fabs.f32(float [[TMP61]]) 2400 ; CHECK-NEXT: [[TMP70:%.*]] = fcmp fast oge float [[TMP68]], [[TMP69]] 2401 ; CHECK-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], i32 [[TMP59]], i32 0 2402 ; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[TMP67]], [[TMP71]] 2403 ; CHECK-NEXT: [[TMP73:%.*]] = mul i32 [[TMP72]], [[TMP56]] 2404 ; CHECK-NEXT: [[TMP74:%.*]] = sub i32 [[TMP55]], [[TMP73]] 2405 ; CHECK-NEXT: [[TMP75:%.*]] = trunc i32 [[TMP74]] to i15 2406 ; CHECK-NEXT: [[TMP76:%.*]] = sext i15 [[TMP75]] to i32 2407 ; CHECK-NEXT: [[TMP77:%.*]] = trunc i32 [[TMP76]] to i15 2408 ; CHECK-NEXT: [[TMP78:%.*]] = insertelement <3 x i15> [[TMP52]], i15 [[TMP77]], i64 2 2409 ; CHECK-NEXT: store <3 x i15> [[TMP78]], <3 x i15> addrspace(1)* [[OUT:%.*]] 2410 ; CHECK-NEXT: ret void 2411 ; 2412 %r = srem <3 x i15> %x, %y 2413 store <3 x i15> %r, <3 x i15> addrspace(1)* %out 2414 ret void 2415 } 2416