Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      4 
      5 ; FUNC-LABEL: {{^}}udiv24_i8:
      6 ; SI: v_cvt_f32_ubyte
      7 ; SI: v_cvt_f32_ubyte
      8 ; SI: v_rcp_f32
      9 ; SI: v_cvt_u32_f32
     10 
     11 ; EG: UINT_TO_FLT
     12 ; EG-DAG: UINT_TO_FLT
     13 ; EG-DAG: RECIP_IEEE
     14 ; EG: FLT_TO_UINT
     15 define void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
     16   %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
     17   %num = load i8, i8 addrspace(1) * %in
     18   %den = load i8, i8 addrspace(1) * %den_ptr
     19   %result = udiv i8 %num, %den
     20   store i8 %result, i8 addrspace(1)* %out
     21   ret void
     22 }
     23 
     24 ; FUNC-LABEL: {{^}}udiv24_i16:
     25 ; SI: v_cvt_f32_u32
     26 ; SI: v_cvt_f32_u32
     27 ; SI: v_rcp_f32
     28 ; SI: v_cvt_u32_f32
     29 
     30 ; EG: UINT_TO_FLT
     31 ; EG-DAG: UINT_TO_FLT
     32 ; EG-DAG: RECIP_IEEE
     33 ; EG: FLT_TO_UINT
     34 define void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
     35   %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
     36   %num = load i16, i16 addrspace(1) * %in, align 2
     37   %den = load i16, i16 addrspace(1) * %den_ptr, align 2
     38   %result = udiv i16 %num, %den
     39   store i16 %result, i16 addrspace(1)* %out, align 2
     40   ret void
     41 }
     42 
     43 ; FUNC-LABEL: {{^}}udiv23_i32:
     44 ; SI: v_cvt_f32_u32
     45 ; SI-DAG: v_cvt_f32_u32
     46 ; SI-DAG: v_rcp_f32
     47 ; SI: v_cvt_u32_f32
     48 
     49 ; EG: UINT_TO_FLT
     50 ; EG-DAG: UINT_TO_FLT
     51 ; EG-DAG: RECIP_IEEE
     52 ; EG: FLT_TO_UINT
     53 define void @udiv23_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
     54   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
     55   %num = load i32, i32 addrspace(1) * %in, align 4
     56   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
     57   %num.i23.0 = shl i32 %num, 9
     58   %den.i23.0 = shl i32 %den, 9
     59   %num.i23 = lshr i32 %num.i23.0, 9
     60   %den.i23 = lshr i32 %den.i23.0, 9
     61   %result = udiv i32 %num.i23, %den.i23
     62   store i32 %result, i32 addrspace(1)* %out, align 4
     63   ret void
     64 }
     65 
     66 ; FUNC-LABEL: {{^}}udiv24_i32:
     67 ; SI: v_rcp_iflag
     68 ; SI-NOT v_rcp_f32
     69 ; EG-NOT: RECIP_IEEE
     70 define void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
     71   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
     72   %num = load i32, i32 addrspace(1) * %in, align 4
     73   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
     74   %num.i24.0 = shl i32 %num, 8
     75   %den.i24.0 = shl i32 %den, 8
     76   %num.i24 = lshr i32 %num.i24.0, 8
     77   %den.i24 = lshr i32 %den.i24.0, 8
     78   %result = udiv i32 %num.i24, %den.i24
     79   store i32 %result, i32 addrspace(1)* %out, align 4
     80   ret void
     81 }
     82 
     83 ; FUNC-LABEL: {{^}}no_udiv24_u23_u24_i32:
     84 ; SI: v_rcp_iflag
     85 ; SI-NOT v_rcp_f32
     86 ; EG-NOT: RECIP_IEEE
     87 define void @no_udiv24_u23_u24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
     88   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
     89   %num = load i32, i32 addrspace(1) * %in, align 4
     90   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
     91   %num.i23.0 = shl i32 %num, 9
     92   %den.i24.0 = shl i32 %den, 8
     93   %num.i23 = lshr i32 %num.i23.0, 9
     94   %den.i24 = lshr i32 %den.i24.0, 8
     95   %result = udiv i32 %num.i23, %den.i24
     96   store i32 %result, i32 addrspace(1)* %out, align 4
     97   ret void
     98 }
     99 
    100 ; FUNC-LABEL: {{^}}no_udiv24_u24_u23_i32:
    101 ; SI: v_rcp_iflag
    102 ; SI-NOT v_rcp_f32
    103 ; EG-NOT: RECIP_IEEE
    104 define void @no_udiv24_u24_u23_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    105   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    106   %num = load i32, i32 addrspace(1) * %in, align 4
    107   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    108   %num.i24.0 = shl i32 %num, 8
    109   %den.i23.0 = shl i32 %den, 9
    110   %num.i24 = lshr i32 %num.i24.0, 8
    111   %den.i23 = lshr i32 %den.i23.0, 9
    112   %result = udiv i32 %num.i24, %den.i23
    113   store i32 %result, i32 addrspace(1)* %out, align 4
    114   ret void
    115 }
    116 
    117 ; FUNC-LABEL: {{^}}udiv25_i32:
    118 ; RCP_IFLAG is for URECIP in the full 32b alg
    119 ; SI: v_rcp_iflag
    120 ; SI-NOT: v_rcp_f32
    121 
    122 ; EG-NOT: UINT_TO_FLT
    123 ; EG-NOT: RECIP_IEEE
    124 define void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    125   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    126   %num = load i32, i32 addrspace(1) * %in, align 4
    127   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    128   %num.i25.0 = shl i32 %num, 7
    129   %den.i25.0 = shl i32 %den, 7
    130   %num.i25 = lshr i32 %num.i25.0, 7
    131   %den.i25 = lshr i32 %den.i25.0, 7
    132   %result = udiv i32 %num.i25, %den.i25
    133   store i32 %result, i32 addrspace(1)* %out, align 4
    134   ret void
    135 }
    136 
    137 ; FUNC-LABEL: {{^}}test_no_udiv24_i32_1:
    138 ; RCP_IFLAG is for URECIP in the full 32b alg
    139 ; SI: v_rcp_iflag
    140 ; SI-NOT: v_rcp_f32
    141 
    142 ; EG-NOT: UINT_TO_FLT
    143 ; EG-NOT: RECIP_IEEE
    144 define void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    145   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    146   %num = load i32, i32 addrspace(1) * %in, align 4
    147   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    148   %num.i24.0 = shl i32 %num, 8
    149   %den.i24.0 = shl i32 %den, 7
    150   %num.i24 = lshr i32 %num.i24.0, 8
    151   %den.i24 = lshr i32 %den.i24.0, 7
    152   %result = udiv i32 %num.i24, %den.i24
    153   store i32 %result, i32 addrspace(1)* %out, align 4
    154   ret void
    155 }
    156 
    157 ; FUNC-LABEL: {{^}}test_no_udiv24_i32_2:
    158 ; RCP_IFLAG is for URECIP in the full 32b alg
    159 ; SI: v_rcp_iflag
    160 ; SI-NOT: v_rcp_f32
    161 
    162 ; EG-NOT: UINT_TO_FLT
    163 ; EG-NOT: RECIP_IEEE
    164 define void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    165   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    166   %num = load i32, i32 addrspace(1) * %in, align 4
    167   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    168   %num.i24.0 = shl i32 %num, 7
    169   %den.i24.0 = shl i32 %den, 8
    170   %num.i24 = lshr i32 %num.i24.0, 7
    171   %den.i24 = lshr i32 %den.i24.0, 8
    172   %result = udiv i32 %num.i24, %den.i24
    173   store i32 %result, i32 addrspace(1)* %out, align 4
    174   ret void
    175 }
    176 
    177 ; FUNC-LABEL: {{^}}urem24_i8:
    178 ; SI: v_cvt_f32_ubyte
    179 ; SI: v_cvt_f32_ubyte
    180 ; SI: v_rcp_f32
    181 ; SI: v_cvt_u32_f32
    182 
    183 ; EG: UINT_TO_FLT
    184 ; EG-DAG: UINT_TO_FLT
    185 ; EG-DAG: RECIP_IEEE
    186 ; EG: FLT_TO_UINT
    187 define void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
    188   %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
    189   %num = load i8, i8 addrspace(1) * %in
    190   %den = load i8, i8 addrspace(1) * %den_ptr
    191   %result = urem i8 %num, %den
    192   store i8 %result, i8 addrspace(1)* %out
    193   ret void
    194 }
    195 
    196 ; FUNC-LABEL: {{^}}urem24_i16:
    197 ; SI: v_cvt_f32_u32
    198 ; SI: v_cvt_f32_u32
    199 ; SI: v_rcp_f32
    200 ; SI: v_cvt_u32_f32
    201 
    202 ; EG: UINT_TO_FLT
    203 ; EG-DAG: UINT_TO_FLT
    204 ; EG-DAG: RECIP_IEEE
    205 ; EG: FLT_TO_UINT
    206 define void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
    207   %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
    208   %num = load i16, i16 addrspace(1) * %in, align 2
    209   %den = load i16, i16 addrspace(1) * %den_ptr, align 2
    210   %result = urem i16 %num, %den
    211   store i16 %result, i16 addrspace(1)* %out, align 2
    212   ret void
    213 }
    214 
    215 ; FUNC-LABEL: {{^}}urem24_i32:
    216 ; SI-NOT: v_rcp_f32
    217 ; EG-NOT: RECIP_IEEE
    218 define void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    219   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    220   %num = load i32, i32 addrspace(1) * %in, align 4
    221   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    222   %num.i24.0 = shl i32 %num, 8
    223   %den.i24.0 = shl i32 %den, 8
    224   %num.i24 = lshr i32 %num.i24.0, 8
    225   %den.i24 = lshr i32 %den.i24.0, 8
    226   %result = urem i32 %num.i24, %den.i24
    227   store i32 %result, i32 addrspace(1)* %out, align 4
    228   ret void
    229 }
    230 
    231 ; FUNC-LABEL: {{^}}urem25_i32:
    232 ; RCP_IFLAG is for URECIP in the full 32b alg
    233 ; SI: v_rcp_iflag
    234 ; SI-NOT: v_rcp_f32
    235 
    236 ; EG-NOT: UINT_TO_FLT
    237 ; EG-NOT: RECIP_IEEE
    238 define void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    239   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    240   %num = load i32, i32 addrspace(1) * %in, align 4
    241   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    242   %num.i24.0 = shl i32 %num, 7
    243   %den.i24.0 = shl i32 %den, 7
    244   %num.i24 = lshr i32 %num.i24.0, 7
    245   %den.i24 = lshr i32 %den.i24.0, 7
    246   %result = urem i32 %num.i24, %den.i24
    247   store i32 %result, i32 addrspace(1)* %out, align 4
    248   ret void
    249 }
    250 
    251 ; FUNC-LABEL: {{^}}test_no_urem24_i32_1:
    252 ; RCP_IFLAG is for URECIP in the full 32b alg
    253 ; SI: v_rcp_iflag
    254 ; SI-NOT: v_rcp_f32
    255 
    256 ; EG-NOT: UINT_TO_FLT
    257 ; EG-NOT: RECIP_IEEE
    258 define void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    259   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    260   %num = load i32, i32 addrspace(1) * %in, align 4
    261   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    262   %num.i24.0 = shl i32 %num, 8
    263   %den.i24.0 = shl i32 %den, 7
    264   %num.i24 = lshr i32 %num.i24.0, 8
    265   %den.i24 = lshr i32 %den.i24.0, 7
    266   %result = urem i32 %num.i24, %den.i24
    267   store i32 %result, i32 addrspace(1)* %out, align 4
    268   ret void
    269 }
    270 
    271 ; FUNC-LABEL: {{^}}test_no_urem24_i32_2:
    272 ; RCP_IFLAG is for URECIP in the full 32b alg
    273 ; SI: v_rcp_iflag
    274 ; SI-NOT: v_rcp_f32
    275 
    276 ; EG-NOT: UINT_TO_FLT
    277 ; EG-NOT: RECIP_IEEE
    278 define void @test_no_urem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    279   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    280   %num = load i32, i32 addrspace(1) * %in, align 4
    281   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    282   %num.i24.0 = shl i32 %num, 7
    283   %den.i24.0 = shl i32 %den, 8
    284   %num.i24 = lshr i32 %num.i24.0, 7
    285   %den.i24 = lshr i32 %den.i24.0, 8
    286   %result = urem i32 %num.i24, %den.i24
    287   store i32 %result, i32 addrspace(1)* %out, align 4
    288   ret void
    289 }
    290 
    291 ; FUNC-LABEL: {{^}}test_udiv24_u16_u23_i32:
    292 ; SI-DAG: v_rcp_f32
    293 ; SI-DAG: s_mov_b32 [[MASK:s[0-9]+]], 0x7fffff{{$}}
    294 ; SI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]],
    295 
    296 ; EG: RECIP_IEEE
    297 define void @test_udiv24_u16_u23_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    298   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    299   %num = load i32, i32 addrspace(1) * %in, align 4
    300   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    301   %num.i16.0 = shl i32 %num, 16
    302   %den.i23.0 = shl i32 %den, 9
    303   %num.i16 = lshr i32 %num.i16.0, 16
    304   %den.i23 = lshr i32 %den.i23.0, 9
    305   %result = udiv i32 %num.i16, %den.i23
    306   store i32 %result, i32 addrspace(1)* %out, align 4
    307   ret void
    308 }
    309 
    310 ; FUNC-LABEL: {{^}}test_udiv24_u23_u16_i32:
    311 ; SI-DAG: v_rcp_f32
    312 ; SI-DAG: s_mov_b32 [[MASK:s[0-9]+]], 0x7fffff{{$}}
    313 ; SI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]],
    314 
    315 ; EG: RECIP_IEEE
    316 define void @test_udiv24_u23_u16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    317   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    318   %num = load i32, i32 addrspace(1) * %in, align 4
    319   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    320   %num.i23.0 = shl i32 %num, 9
    321   %den.i16.0 = shl i32 %den, 16
    322   %num.i23 = lshr i32 %num.i23.0, 9
    323   %den.i16 = lshr i32 %den.i16.0, 16
    324   %result = udiv i32 %num.i23, %den.i16
    325   store i32 %result, i32 addrspace(1)* %out, align 4
    326   ret void
    327 }
    328