Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      4 
      5 ; FUNC-LABEL: {{^}}sdiv24_i8:
      6 ; SI: v_cvt_f32_i32
      7 ; SI: v_cvt_f32_i32
      8 ; SI: v_rcp_f32
      9 ; SI: v_cvt_i32_f32
     10 
     11 ; EG: INT_TO_FLT
     12 ; EG-DAG: INT_TO_FLT
     13 ; EG-DAG: RECIP_IEEE
     14 ; EG: FLT_TO_INT
     15 define void @sdiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
     16   %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
     17   %num = load i8, i8 addrspace(1) * %in
     18   %den = load i8, i8 addrspace(1) * %den_ptr
     19   %result = sdiv i8 %num, %den
     20   store i8 %result, i8 addrspace(1)* %out
     21   ret void
     22 }
     23 
     24 ; FUNC-LABEL: {{^}}sdiv24_i16:
     25 ; SI: v_cvt_f32_i32
     26 ; SI: v_cvt_f32_i32
     27 ; SI: v_rcp_f32
     28 ; SI: v_cvt_i32_f32
     29 
     30 ; EG: INT_TO_FLT
     31 ; EG-DAG: INT_TO_FLT
     32 ; EG-DAG: RECIP_IEEE
     33 ; EG: FLT_TO_INT
     34 define void @sdiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
     35   %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
     36   %num = load i16, i16 addrspace(1) * %in, align 2
     37   %den = load i16, i16 addrspace(1) * %den_ptr, align 2
     38   %result = sdiv i16 %num, %den
     39   store i16 %result, i16 addrspace(1)* %out, align 2
     40   ret void
     41 }
     42 
     43 ; FUNC-LABEL: {{^}}sdiv24_i32:
     44 ; SI: v_cvt_f32_i32
     45 ; SI: v_cvt_f32_i32
     46 ; SI: v_rcp_f32
     47 ; SI: v_cvt_i32_f32
     48 
     49 ; EG: INT_TO_FLT
     50 ; EG-DAG: INT_TO_FLT
     51 ; EG-DAG: RECIP_IEEE
     52 ; EG: FLT_TO_INT
     53 define void @sdiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
     54   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
     55   %num = load i32, i32 addrspace(1) * %in, align 4
     56   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
     57   %num.i24.0 = shl i32 %num, 8
     58   %den.i24.0 = shl i32 %den, 8
     59   %num.i24 = ashr i32 %num.i24.0, 8
     60   %den.i24 = ashr i32 %den.i24.0, 8
     61   %result = sdiv i32 %num.i24, %den.i24
     62   store i32 %result, i32 addrspace(1)* %out, align 4
     63   ret void
     64 }
     65 
     66 ; FUNC-LABEL: {{^}}sdiv25_i32:
     67 ; SI-NOT: v_cvt_f32_i32
     68 ; SI-NOT: v_rcp_f32
     69 
     70 ; EG-NOT: INT_TO_FLT
     71 ; EG-NOT: RECIP_IEEE
     72 define void @sdiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
     73   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
     74   %num = load i32, i32 addrspace(1) * %in, align 4
     75   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
     76   %num.i24.0 = shl i32 %num, 7
     77   %den.i24.0 = shl i32 %den, 7
     78   %num.i24 = ashr i32 %num.i24.0, 7
     79   %den.i24 = ashr i32 %den.i24.0, 7
     80   %result = sdiv i32 %num.i24, %den.i24
     81   store i32 %result, i32 addrspace(1)* %out, align 4
     82   ret void
     83 }
     84 
     85 ; FUNC-LABEL: {{^}}test_no_sdiv24_i32_1:
     86 ; SI-NOT: v_cvt_f32_i32
     87 ; SI-NOT: v_rcp_f32
     88 
     89 ; EG-NOT: INT_TO_FLT
     90 ; EG-NOT: RECIP_IEEE
     91 define void @test_no_sdiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
     92   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
     93   %num = load i32, i32 addrspace(1) * %in, align 4
     94   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
     95   %num.i24.0 = shl i32 %num, 8
     96   %den.i24.0 = shl i32 %den, 7
     97   %num.i24 = ashr i32 %num.i24.0, 8
     98   %den.i24 = ashr i32 %den.i24.0, 7
     99   %result = sdiv i32 %num.i24, %den.i24
    100   store i32 %result, i32 addrspace(1)* %out, align 4
    101   ret void
    102 }
    103 
    104 ; FUNC-LABEL: {{^}}test_no_sdiv24_i32_2:
    105 ; SI-NOT: v_cvt_f32_i32
    106 ; SI-NOT: v_rcp_f32
    107 
    108 ; EG-NOT: INT_TO_FLT
    109 ; EG-NOT: RECIP_IEEE
    110 define void @test_no_sdiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    111   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    112   %num = load i32, i32 addrspace(1) * %in, align 4
    113   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    114   %num.i24.0 = shl i32 %num, 7
    115   %den.i24.0 = shl i32 %den, 8
    116   %num.i24 = ashr i32 %num.i24.0, 7
    117   %den.i24 = ashr i32 %den.i24.0, 8
    118   %result = sdiv i32 %num.i24, %den.i24
    119   store i32 %result, i32 addrspace(1)* %out, align 4
    120   ret void
    121 }
    122 
    123 ; FUNC-LABEL: {{^}}srem24_i8:
    124 ; SI: v_cvt_f32_i32
    125 ; SI: v_cvt_f32_i32
    126 ; SI: v_rcp_f32
    127 ; SI: v_cvt_i32_f32
    128 
    129 ; EG: INT_TO_FLT
    130 ; EG-DAG: INT_TO_FLT
    131 ; EG-DAG: RECIP_IEEE
    132 ; EG: FLT_TO_INT
    133 define void @srem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
    134   %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
    135   %num = load i8, i8 addrspace(1) * %in
    136   %den = load i8, i8 addrspace(1) * %den_ptr
    137   %result = srem i8 %num, %den
    138   store i8 %result, i8 addrspace(1)* %out
    139   ret void
    140 }
    141 
    142 ; FUNC-LABEL: {{^}}srem24_i16:
    143 ; SI: v_cvt_f32_i32
    144 ; SI: v_cvt_f32_i32
    145 ; SI: v_rcp_f32
    146 ; SI: v_cvt_i32_f32
    147 
    148 ; EG: INT_TO_FLT
    149 ; EG-DAG: INT_TO_FLT
    150 ; EG-DAG: RECIP_IEEE
    151 ; EG: FLT_TO_INT
    152 define void @srem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
    153   %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
    154   %num = load i16, i16 addrspace(1) * %in, align 2
    155   %den = load i16, i16 addrspace(1) * %den_ptr, align 2
    156   %result = srem i16 %num, %den
    157   store i16 %result, i16 addrspace(1)* %out, align 2
    158   ret void
    159 }
    160 
    161 ; FUNC-LABEL: {{^}}srem24_i32:
    162 ; SI: v_cvt_f32_i32
    163 ; SI: v_cvt_f32_i32
    164 ; SI: v_rcp_f32
    165 ; SI: v_cvt_i32_f32
    166 
    167 ; EG: INT_TO_FLT
    168 ; EG-DAG: INT_TO_FLT
    169 ; EG-DAG: RECIP_IEEE
    170 ; EG: FLT_TO_INT
    171 define void @srem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    172   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    173   %num = load i32, i32 addrspace(1) * %in, align 4
    174   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    175   %num.i24.0 = shl i32 %num, 8
    176   %den.i24.0 = shl i32 %den, 8
    177   %num.i24 = ashr i32 %num.i24.0, 8
    178   %den.i24 = ashr i32 %den.i24.0, 8
    179   %result = srem i32 %num.i24, %den.i24
    180   store i32 %result, i32 addrspace(1)* %out, align 4
    181   ret void
    182 }
    183 
    184 ; FUNC-LABEL: {{^}}srem25_i32:
    185 ; SI-NOT: v_cvt_f32_i32
    186 ; SI-NOT: v_rcp_f32
    187 
    188 ; EG-NOT: INT_TO_FLT
    189 ; EG-NOT: RECIP_IEEE
    190 define void @srem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    191   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    192   %num = load i32, i32 addrspace(1) * %in, align 4
    193   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    194   %num.i24.0 = shl i32 %num, 7
    195   %den.i24.0 = shl i32 %den, 7
    196   %num.i24 = ashr i32 %num.i24.0, 7
    197   %den.i24 = ashr i32 %den.i24.0, 7
    198   %result = srem i32 %num.i24, %den.i24
    199   store i32 %result, i32 addrspace(1)* %out, align 4
    200   ret void
    201 }
    202 
    203 ; FUNC-LABEL: {{^}}test_no_srem24_i32_1:
    204 ; SI-NOT: v_cvt_f32_i32
    205 ; SI-NOT: v_rcp_f32
    206 
    207 ; EG-NOT: INT_TO_FLT
    208 ; EG-NOT: RECIP_IEEE
    209 define void @test_no_srem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    210   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    211   %num = load i32, i32 addrspace(1) * %in, align 4
    212   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    213   %num.i24.0 = shl i32 %num, 8
    214   %den.i24.0 = shl i32 %den, 7
    215   %num.i24 = ashr i32 %num.i24.0, 8
    216   %den.i24 = ashr i32 %den.i24.0, 7
    217   %result = srem i32 %num.i24, %den.i24
    218   store i32 %result, i32 addrspace(1)* %out, align 4
    219   ret void
    220 }
    221 
    222 ; FUNC-LABEL: {{^}}test_no_srem24_i32_2:
    223 ; SI-NOT: v_cvt_f32_i32
    224 ; SI-NOT: v_rcp_f32
    225 
    226 ; EG-NOT: INT_TO_FLT
    227 ; EG-NOT: RECIP_IEEE
    228 define void @test_no_srem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    229   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    230   %num = load i32, i32 addrspace(1) * %in, align 4
    231   %den = load i32, i32 addrspace(1) * %den_ptr, align 4
    232   %num.i24.0 = shl i32 %num, 7
    233   %den.i24.0 = shl i32 %den, 8
    234   %num.i24 = ashr i32 %num.i24.0, 7
    235   %den.i24 = ashr i32 %den.i24.0, 8
    236   %result = srem i32 %num.i24, %den.i24
    237   store i32 %result, i32 addrspace(1)* %out, align 4
    238   ret void
    239 }
    240