Home | History | Annotate | Download | only in R600
      1 ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      4 
      5 ; FUNC-LABEL: {{^}}udiv24_i8:
      6 ; SI: v_cvt_f32_ubyte
      7 ; SI: v_cvt_f32_ubyte
      8 ; SI: v_rcp_f32
      9 ; SI: v_cvt_u32_f32
     10 
     11 ; EG: UINT_TO_FLT
     12 ; EG-DAG: UINT_TO_FLT
     13 ; EG-DAG: RECIP_IEEE
     14 ; EG: FLT_TO_UINT
     15 define void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
     16   %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
     17   %num = load i8, i8 addrspace(1) * %in
     18   %den = load i8, i8 addrspace(1) * %den_ptr
     19   %result = udiv i8 %num, %den
     20   store i8 %result, i8 addrspace(1)* %out
     21   ret void
     22 }
     23 
     24 ; FUNC-LABEL: {{^}}udiv24_i16:
     25 ; SI: v_cvt_f32_u32
     26 ; SI: v_cvt_f32_u32
     27 ; SI: v_rcp_f32
     28 ; SI: v_cvt_u32_f32
     29 
     30 ; EG: UINT_TO_FLT
     31 ; EG-DAG: UINT_TO_FLT
     32 ; EG-DAG: RECIP_IEEE
     33 ; EG: FLT_TO_UINT
     34 define void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
     35   %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
     36   %num = load i16, i16 addrspace(1) * %in, align 2
     37   %den = load i16, i16 addrspace(1) * %den_ptr, align 2
     38   %result = udiv i16 %num, %den
     39   store i16 %result, i16 addrspace(1)* %out, align 2
     40   ret void
     41 }
     42 
     43 ; FUNC-LABEL: {{^}}udiv24_i32:
     44 ; SI: v_cvt_f32_u32
     45 ; SI-DAG: v_cvt_f32_u32
     46 ; SI-DAG: v_rcp_f32
     47 ; SI: v_cvt_u32_f32
     48 
     49 ; EG: UINT_TO_FLT
     50 ; EG-DAG: UINT_TO_FLT
     51 ; EG-DAG: RECIP_IEEE
     52 ; EG: FLT_TO_UINT
     53 define void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
     54   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
     55   %num = load i32, i32 addrspace(1) * %in, align 4
     56   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
     57   %num.i24.0 = shl i32 %num, 8
     58   %den.i24.0 = shl i32 %den, 8
     59   %num.i24 = lshr i32 %num.i24.0, 8
     60   %den.i24 = lshr i32 %den.i24.0, 8
     61   %result = udiv i32 %num.i24, %den.i24
     62   store i32 %result, i32 addrspace(1)* %out, align 4
     63   ret void
     64 }
     65 
     66 ; FUNC-LABEL: {{^}}udiv25_i32:
     67 ; RCP_IFLAG is for URECIP in the full 32b alg
     68 ; SI: v_rcp_iflag
     69 ; SI-NOT: v_rcp_f32
     70 
     71 ; EG-NOT: UINT_TO_FLT
     72 ; EG-NOT: RECIP_IEEE
     73 define void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
     74   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
     75   %num = load i32, i32 addrspace(1) * %in, align 4
     76   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
     77   %num.i24.0 = shl i32 %num, 7
     78   %den.i24.0 = shl i32 %den, 7
     79   %num.i24 = lshr i32 %num.i24.0, 7
     80   %den.i24 = lshr i32 %den.i24.0, 7
     81   %result = udiv i32 %num.i24, %den.i24
     82   store i32 %result, i32 addrspace(1)* %out, align 4
     83   ret void
     84 }
     85 
     86 ; FUNC-LABEL: {{^}}test_no_udiv24_i32_1:
     87 ; RCP_IFLAG is for URECIP in the full 32b alg
     88 ; SI: v_rcp_iflag
     89 ; SI-NOT: v_rcp_f32
     90 
     91 ; EG-NOT: UINT_TO_FLT
     92 ; EG-NOT: RECIP_IEEE
     93 define void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
     94   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
     95   %num = load i32, i32 addrspace(1) * %in, align 4
     96   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
     97   %num.i24.0 = shl i32 %num, 8
     98   %den.i24.0 = shl i32 %den, 7
     99   %num.i24 = lshr i32 %num.i24.0, 8
    100   %den.i24 = lshr i32 %den.i24.0, 7
    101   %result = udiv i32 %num.i24, %den.i24
    102   store i32 %result, i32 addrspace(1)* %out, align 4
    103   ret void
    104 }
    105 
    106 ; FUNC-LABEL: {{^}}test_no_udiv24_i32_2:
    107 ; RCP_IFLAG is for URECIP in the full 32b alg
    108 ; SI: v_rcp_iflag
    109 ; SI-NOT: v_rcp_f32
    110 
    111 ; EG-NOT: UINT_TO_FLT
    112 ; EG-NOT: RECIP_IEEE
    113 define void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    114   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    115   %num = load i32, i32 addrspace(1) * %in, align 4
    116   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    117   %num.i24.0 = shl i32 %num, 7
    118   %den.i24.0 = shl i32 %den, 8
    119   %num.i24 = lshr i32 %num.i24.0, 7
    120   %den.i24 = lshr i32 %den.i24.0, 8
    121   %result = udiv i32 %num.i24, %den.i24
    122   store i32 %result, i32 addrspace(1)* %out, align 4
    123   ret void
    124 }
    125 
    126 ; FUNC-LABEL: {{^}}urem24_i8:
    127 ; SI: v_cvt_f32_ubyte
    128 ; SI: v_cvt_f32_ubyte
    129 ; SI: v_rcp_f32
    130 ; SI: v_cvt_u32_f32
    131 
    132 ; EG: UINT_TO_FLT
    133 ; EG-DAG: UINT_TO_FLT
    134 ; EG-DAG: RECIP_IEEE
    135 ; EG: FLT_TO_UINT
    136 define void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
    137   %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
    138   %num = load i8, i8 addrspace(1) * %in
    139   %den = load i8, i8 addrspace(1) * %den_ptr
    140   %result = urem i8 %num, %den
    141   store i8 %result, i8 addrspace(1)* %out
    142   ret void
    143 }
    144 
    145 ; FUNC-LABEL: {{^}}urem24_i16:
    146 ; SI: v_cvt_f32_u32
    147 ; SI: v_cvt_f32_u32
    148 ; SI: v_rcp_f32
    149 ; SI: v_cvt_u32_f32
    150 
    151 ; EG: UINT_TO_FLT
    152 ; EG-DAG: UINT_TO_FLT
    153 ; EG-DAG: RECIP_IEEE
    154 ; EG: FLT_TO_UINT
    155 define void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
    156   %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
    157   %num = load i16, i16 addrspace(1) * %in, align 2
    158   %den = load i16, i16 addrspace(1) * %den_ptr, align 2
    159   %result = urem i16 %num, %den
    160   store i16 %result, i16 addrspace(1)* %out, align 2
    161   ret void
    162 }
    163 
    164 ; FUNC-LABEL: {{^}}urem24_i32:
    165 ; SI: v_cvt_f32_u32
    166 ; SI: v_cvt_f32_u32
    167 ; SI: v_rcp_f32
    168 ; SI: v_cvt_u32_f32
    169 
    170 ; EG: UINT_TO_FLT
    171 ; EG-DAG: UINT_TO_FLT
    172 ; EG-DAG: RECIP_IEEE
    173 ; EG: FLT_TO_UINT
    174 define void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    175   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    176   %num = load i32, i32 addrspace(1) * %in, align 4
    177   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    178   %num.i24.0 = shl i32 %num, 8
    179   %den.i24.0 = shl i32 %den, 8
    180   %num.i24 = lshr i32 %num.i24.0, 8
    181   %den.i24 = lshr i32 %den.i24.0, 8
    182   %result = urem i32 %num.i24, %den.i24
    183   store i32 %result, i32 addrspace(1)* %out, align 4
    184   ret void
    185 }
    186 
    187 ; FUNC-LABEL: {{^}}urem25_i32:
    188 ; RCP_IFLAG is for URECIP in the full 32b alg
    189 ; SI: v_rcp_iflag
    190 ; SI-NOT: v_rcp_f32
    191 
    192 ; EG-NOT: UINT_TO_FLT
    193 ; EG-NOT: RECIP_IEEE
    194 define void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    195   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    196   %num = load i32, i32 addrspace(1) * %in, align 4
    197   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    198   %num.i24.0 = shl i32 %num, 7
    199   %den.i24.0 = shl i32 %den, 7
    200   %num.i24 = lshr i32 %num.i24.0, 7
    201   %den.i24 = lshr i32 %den.i24.0, 7
    202   %result = urem i32 %num.i24, %den.i24
    203   store i32 %result, i32 addrspace(1)* %out, align 4
    204   ret void
    205 }
    206 
    207 ; FUNC-LABEL: {{^}}test_no_urem24_i32_1:
    208 ; RCP_IFLAG is for URECIP in the full 32b alg
    209 ; SI: v_rcp_iflag
    210 ; SI-NOT: v_rcp_f32
    211 
    212 ; EG-NOT: UINT_TO_FLT
    213 ; EG-NOT: RECIP_IEEE
    214 define void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    215   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    216   %num = load i32, i32 addrspace(1) * %in, align 4
    217   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    218   %num.i24.0 = shl i32 %num, 8
    219   %den.i24.0 = shl i32 %den, 7
    220   %num.i24 = lshr i32 %num.i24.0, 8
    221   %den.i24 = lshr i32 %den.i24.0, 7
    222   %result = urem i32 %num.i24, %den.i24
    223   store i32 %result, i32 addrspace(1)* %out, align 4
    224   ret void
    225 }
    226 
    227 ; FUNC-LABEL: {{^}}test_no_urem24_i32_2:
    228 ; RCP_IFLAG is for URECIP in the full 32b alg
    229 ; SI: v_rcp_iflag
    230 ; SI-NOT: v_rcp_f32
    231 
    232 ; EG-NOT: UINT_TO_FLT
    233 ; EG-NOT: RECIP_IEEE
    234 define void @test_no_urem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    235   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    236   %num = load i32, i32 addrspace(1) * %in, align 4
    237   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    238   %num.i24.0 = shl i32 %num, 7
    239   %den.i24.0 = shl i32 %den, 8
    240   %num.i24 = lshr i32 %num.i24.0, 7
    241   %den.i24 = lshr i32 %den.i24.0, 8
    242   %result = urem i32 %num.i24, %den.i24
    243   store i32 %result, i32 addrspace(1)* %out, align 4
    244   ret void
    245 }
    246