Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
      3 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      4 
      5 ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0:
      6 ; GCN-NOT: v_cmp
      7 ; GCN: v_cmp_ne_u32_e32 vcc,
      8 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
      9 ; GCN-NEXT:buffer_store_byte [[RESULT]]
     10 ; GCN-NEXT: s_endpgm
     11 
     12 ; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
     13 ; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
     14 define amdgpu_kernel void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
     15   %icmp0 = icmp eq i32 %a, %b
     16   %ext = sext i1 %icmp0 to i32
     17   %icmp1 = icmp eq i32 %ext, 0
     18   store i1 %icmp1, i1 addrspace(1)* %out
     19   ret void
     20 }
     21 
     22 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0:
     23 ; GCN-NOT: v_cmp
     24 ; GCN: v_cmp_ne_u32_e32 vcc,
     25 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
     26 ; GCN-NEXT: buffer_store_byte [[RESULT]]
     27 ; GCN-NEXT: s_endpgm
     28 
     29 ; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
     30 ; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
     31 define amdgpu_kernel void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
     32   %icmp0 = icmp ne i32 %a, %b
     33   %ext = sext i1 %icmp0 to i32
     34   %icmp1 = icmp ne i32 %ext, 0
     35   store i1 %icmp1, i1 addrspace(1)* %out
     36   ret void
     37 }
     38 
     39 ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_neg1:
     40 ; GCN-NOT: v_cmp
     41 ; GCN: v_cmp_eq_u32_e32 vcc,
     42 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
     43 ; GCN-NEXT: buffer_store_byte [[RESULT]]
     44 ; GCN-NEXT: s_endpgm
     45 define amdgpu_kernel void @sext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
     46   %icmp0 = icmp eq i32 %a, %b
     47   %ext = sext i1 %icmp0 to i32
     48   %icmp1 = icmp eq i32 %ext, -1
     49   store i1 %icmp1, i1 addrspace(1)* %out
     50   ret void
     51 }
     52 
     53 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_neg1:
     54 ; GCN-NOT: v_cmp
     55 ; GCN: v_cmp_eq_u32_e32 vcc,
     56 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
     57 ; GCN-NEXT: buffer_store_byte [[RESULT]]
     58 ; GCN-NEXT: s_endpgm
     59 define amdgpu_kernel void @sext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
     60   %icmp0 = icmp ne i32 %a, %b
     61   %ext = sext i1 %icmp0 to i32
     62   %icmp1 = icmp ne i32 %ext, -1
     63   store i1 %icmp1, i1 addrspace(1)* %out
     64   ret void
     65 }
     66 
     67 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0:
     68 ; GCN-NOT: v_cmp
     69 ; GCN: v_cmp_ne_u32_e32 vcc,
     70 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
     71 ; GCN-NEXT: buffer_store_byte [[RESULT]]
     72 ; GCN-NEXT: s_endpgm
     73 define amdgpu_kernel void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
     74   %icmp0 = icmp eq i32 %a, %b
     75   %ext = zext i1 %icmp0 to i32
     76   %icmp1 = icmp eq i32 %ext, 0
     77   store i1 %icmp1, i1 addrspace(1)* %out
     78   ret void
     79 }
     80 
     81 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0:
     82 ; GCN-NOT: v_cmp
     83 ; GCN: v_cmp_ne_u32_e32 vcc,
     84 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
     85 ; GCN-NEXT: buffer_store_byte [[RESULT]]
     86 ; GCN-NEXT: s_endpgm
     87 define amdgpu_kernel void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
     88   %icmp0 = icmp ne i32 %a, %b
     89   %ext = zext i1 %icmp0 to i32
     90   %icmp1 = icmp ne i32 %ext, 0
     91   store i1 %icmp1, i1 addrspace(1)* %out
     92   ret void
     93 }
     94 
     95 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1:
     96 ; GCN-NOT: v_cmp
     97 ; GCN: v_cmp_eq_u32_e32 vcc,
     98 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
     99 ; GCN-NEXT: buffer_store_byte [[RESULT]]
    100 ; GCN-NEXT: s_endpgm
    101 define amdgpu_kernel void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    102   %icmp0 = icmp eq i32 %a, %b
    103   %ext = zext i1 %icmp0 to i32
    104   %icmp1 = icmp eq i32 %ext, 1
    105   store i1 %icmp1, i1 addrspace(1)* %out
    106   ret void
    107 }
    108 
    109 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1:
    110 ; GCN-NOT: v_cmp
    111 ; GCN: v_cmp_eq_u32_e32 vcc,
    112 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
    113 ; GCN-NEXT: buffer_store_byte [[RESULT]]
    114 define amdgpu_kernel void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    115   %icmp0 = icmp ne i32 %a, %b
    116   %ext = zext i1 %icmp0 to i32
    117   %icmp1 = icmp ne i32 %ext, 1
    118   store i1 %icmp1, i1 addrspace(1)* %out
    119   ret void
    120 }
    121 
    122 ; Reduces to false:
    123 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_neg1:
    124 ; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 0{{$}}
    125 ; GCN: buffer_store_byte [[TMP]]
    126 ; GCN-NEXT: s_endpgm
    127 define amdgpu_kernel void @zext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    128   %icmp0 = icmp eq i32 %a, %b
    129   %ext = zext i1 %icmp0 to i32
    130   %icmp1 = icmp eq i32 %ext, -1
    131   store i1 %icmp1, i1 addrspace(1)* %out
    132   ret void
    133 }
    134 
    135 ; Reduces to true:
    136 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_neg1:
    137 ; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 1{{$}}
    138 ; GCN: buffer_store_byte [[TMP]]
    139 ; GCN-NEXT: s_endpgm
    140 define amdgpu_kernel void @zext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    141   %icmp0 = icmp ne i32 %a, %b
    142   %ext = zext i1 %icmp0 to i32
    143   %icmp1 = icmp ne i32 %ext, -1
    144   store i1 %icmp1, i1 addrspace(1)* %out
    145   ret void
    146 }
    147 
    148 ; FUNC-LABEL: {{^}}cmp_zext_k_i8max:
    149 ; SI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
    150 ; VI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
    151 ; GCN: s_movk_i32 [[K255:s[0-9]+]], 0xff
    152 ; GCN-DAG: v_mov_b32_e32 [[VK255:v[0-9]+]], [[K255]]
    153 ; SI-DAG: s_and_b32 [[B:s[0-9]+]], [[VALUE]], [[K255]]
    154 ; SI: v_cmp_ne_u32_e32 vcc, [[B]], [[VK255]]
    155 
    156 ; VI-DAG: v_and_b32_e32 [[B:v[0-9]+]], [[VALUE]], [[VK255]]
    157 ; VI: v_cmp_ne_u16_e32 vcc, [[K255]], [[B]]
    158 
    159 ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
    160 ; GCN: buffer_store_byte [[RESULT]]
    161 ; GCN: s_endpgm
    162 define amdgpu_kernel void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
    163   %b.ext = zext i8 %b to i32
    164   %icmp0 = icmp ne i32 %b.ext, 255
    165   store i1 %icmp0, i1 addrspace(1)* %out
    166   ret void
    167 }
    168 
    169 ; FUNC-LABEL: {{^}}cmp_sext_k_neg1:
    170 ; GCN: buffer_load_sbyte [[B:v[0-9]+]]
    171 ; GCN: v_cmp_ne_u32_e32 vcc, -1, [[B]]{{$}}
    172 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
    173 ; GCN: buffer_store_byte [[RESULT]]
    174 ; GCN: s_endpgm
    175 define amdgpu_kernel void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
    176   %b = load i8, i8 addrspace(1)* %b.ptr
    177   %b.ext = sext i8 %b to i32
    178   %icmp0 = icmp ne i32 %b.ext, -1
    179   store i1 %icmp0, i1 addrspace(1)* %out
    180   ret void
    181 }
    182 
    183 ; FUNC-LABEL: {{^}}v_cmp_sext_k_neg1_i8_sext_arg:
    184 ; GCN: v_cmp_ne_u32_e32 vcc, -1, v0
    185 ; GCN-NEXT: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, 1, vcc
    186 ; GCN: buffer_store_byte [[SELECT]]
    187 define void @v_cmp_sext_k_neg1_i8_sext_arg(i8 signext %b) nounwind {
    188   %b.ext = sext i8 %b to i32
    189   %icmp0 = icmp ne i32 %b.ext, -1
    190   store i1 %icmp0, i1 addrspace(1)* undef
    191   ret void
    192 }
    193 
    194 ; FIXME: This ends up doing a buffer_load_ubyte, and and compare to
    195 ; 255. Seems to be because of ordering problems when not allowing load widths to be reduced.
    196 ; Should do a buffer_load_sbyte and compare with -1
    197 
    198 ; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg:
    199 ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
    200 ; VI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
    201 ; GCN: s_movk_i32 [[K:s[0-9]+]], 0xff
    202 ; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VAL]], [[K]]
    203 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]]
    204 ; GCN: v_cmp_ne_u32_e32 vcc, [[B]], [[VK]]{{$}}
    205 ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
    206 ; GCN: buffer_store_byte [[RESULT]]
    207 ; GCN: s_endpgm
    208 define amdgpu_kernel void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
    209   %b.ext = sext i8 %b to i32
    210   %icmp0 = icmp ne i32 %b.ext, -1
    211   store i1 %icmp0, i1 addrspace(1)* %out
    212   ret void
    213 }
    214 
    215 ; FUNC-LABEL: {{^}}cmp_zext_k_neg1:
    216 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
    217 ; GCN: buffer_store_byte [[RESULT]]
    218 ; GCN: s_endpgm
    219 define amdgpu_kernel void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
    220   %b.ext = zext i8 %b to i32
    221   %icmp0 = icmp ne i32 %b.ext, -1
    222   store i1 %icmp0, i1 addrspace(1)* %out
    223   ret void
    224 }
    225 
    226 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k:
    227 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
    228 ; GCN: buffer_store_byte [[RESULT]]
    229 ; GCN-NEXT: s_endpgm
    230 define amdgpu_kernel void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    231   %icmp0 = icmp ne i32 %a, %b
    232   %ext = zext i1 %icmp0 to i32
    233   %icmp1 = icmp ne i32 %ext, 2
    234   store i1 %icmp1, i1 addrspace(1)* %out
    235   ret void
    236 }
    237 
    238 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k:
    239 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
    240 ; GCN: buffer_store_byte [[RESULT]]
    241 ; GCN-NEXT: s_endpgm
    242 define amdgpu_kernel void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    243   %icmp0 = icmp ne i32 %a, %b
    244   %ext = zext i1 %icmp0 to i32
    245   %icmp1 = icmp eq i32 %ext, 2
    246   store i1 %icmp1, i1 addrspace(1)* %out
    247   ret void
    248 }
    249 
    250 ; FIXME: These cases should really be able fold to true/false in
    251 ; DAGCombiner
    252 
    253 ; This really folds away to false
    254 ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1:
    255 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0{{$}}
    256 ; GCN: buffer_store_byte [[K]]
    257 define amdgpu_kernel void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    258   %icmp0 = icmp eq i32 %a, %b
    259   %ext = sext i1 %icmp0 to i32
    260   %icmp1 = icmp eq i32 %ext, 1
    261   store i1 %icmp1, i1 addrspace(1)* %out
    262   ret void
    263 }
    264 
    265 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1:
    266 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}}
    267 ; GCN: buffer_store_byte [[K]]
    268 define amdgpu_kernel void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    269   %icmp0 = icmp ne i32 %a, %b
    270   %ext = sext i1 %icmp0 to i32
    271   %icmp1 = icmp ne i32 %ext, 1
    272   store i1 %icmp1, i1 addrspace(1)* %out
    273   ret void
    274 }
    275 
    276 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k:
    277 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}}
    278 ; GCN: buffer_store_byte [[K]]
    279 define amdgpu_kernel void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    280   %icmp0 = icmp ne i32 %a, %b
    281   %ext = sext i1 %icmp0 to i32
    282   %icmp1 = icmp ne i32 %ext, 2
    283   store i1 %icmp1, i1 addrspace(1)* %out
    284   ret void
    285 }
    286