Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
      3 
      4 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
      5 
      6 ; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_eq_0:
      7 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
      8 ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
      9 ; SI: v_cmp_eq_u32_e32 vcc, 0, [[TMP]]{{$}}
     10 ; SI: v_cndmask_b32_e64
     11 ; SI: buffer_store_byte
     12 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
     13   %load = load i1, i1 addrspace(1)* %in
     14   %ext = sext i1 %load to i32
     15   %cmp = icmp eq i32 %ext, 0
     16   store i1 %cmp, i1 addrspace(1)* %out
     17   ret void
     18 }
     19 
     20 ; FIXME: The negate should be inverting the compare.
     21 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_0:
     22 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
     23 ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
     24 ; SI: v_cmp_eq_u32_e32 vcc, 1, [[TMP]]{{$}}
     25 ; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1
     26 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
     27 ; SI: buffer_store_byte [[RESULT]]
     28 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
     29   %load = load i1, i1 addrspace(1)* %in
     30   %ext = zext i1 %load to i32
     31   %cmp = icmp eq i32 %ext, 0
     32   store i1 %cmp, i1 addrspace(1)* %out
     33   ret void
     34 }
     35 
     36 ; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_1:
     37 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
     38 ; SI: buffer_store_byte [[RESULT]]
     39 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
     40   %load = load i1, i1 addrspace(1)* %in
     41   %ext = sext i1 %load to i32
     42   %cmp = icmp eq i32 %ext, 1
     43   store i1 %cmp, i1 addrspace(1)* %out
     44   ret void
     45 }
     46 
     47 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_1:
     48 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
     49 ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
     50 ; SI: buffer_store_byte [[RESULT]]
     51 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
     52   %load = load i1, i1 addrspace(1)* %in
     53   %ext = zext i1 %load to i32
     54   %cmp = icmp eq i32 %ext, 1
     55   store i1 %cmp, i1 addrspace(1)* %out
     56   ret void
     57 }
     58 
     59 ; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_neg1:
     60 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
     61 ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
     62 ; SI: buffer_store_byte [[RESULT]]
     63 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
     64   %load = load i1, i1 addrspace(1)* %in
     65   %ext = sext i1 %load to i32
     66   %cmp = icmp eq i32 %ext, -1
     67   store i1 %cmp, i1 addrspace(1)* %out
     68   ret void
     69 }
     70 
     71 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_neg1:
     72 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
     73 ; SI: buffer_store_byte [[RESULT]]
     74 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
     75   %load = load i1, i1 addrspace(1)* %in
     76   %ext = zext i1 %load to i32
     77   %cmp = icmp eq i32 %ext, -1
     78   store i1 %cmp, i1 addrspace(1)* %out
     79   ret void
     80 }
     81 
     82 
     83 ; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_ne_0:
     84 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
     85 ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
     86 ; SI: buffer_store_byte [[RESULT]]
     87 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
     88   %load = load i1, i1 addrspace(1)* %in
     89   %ext = sext i1 %load to i32
     90   %cmp = icmp ne i32 %ext, 0
     91   store i1 %cmp, i1 addrspace(1)* %out
     92   ret void
     93 }
     94 
     95 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_0:
     96 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
     97 ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
     98 ; SI: buffer_store_byte [[RESULT]]
     99 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
    100   %load = load i1, i1 addrspace(1)* %in
    101   %ext = zext i1 %load to i32
    102   %cmp = icmp ne i32 %ext, 0
    103   store i1 %cmp, i1 addrspace(1)* %out
    104   ret void
    105 }
    106 
    107 ; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_1:
    108 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
    109 ; SI: buffer_store_byte [[RESULT]]
    110 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
    111   %load = load i1, i1 addrspace(1)* %in
    112   %ext = sext i1 %load to i32
    113   %cmp = icmp ne i32 %ext, 1
    114   store i1 %cmp, i1 addrspace(1)* %out
    115   ret void
    116 }
    117 
    118 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_1:
    119 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
    120 ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
    121 ; SI: v_cmp_eq_u32_e32 vcc, 1, [[TMP]]{{$}}
    122 ; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1
    123 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
    124 ; SI: buffer_store_byte [[RESULT]]
    125 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
    126   %load = load i1, i1 addrspace(1)* %in
    127   %ext = zext i1 %load to i32
    128   %cmp = icmp ne i32 %ext, 1
    129   store i1 %cmp, i1 addrspace(1)* %out
    130   ret void
    131 }
    132 
    133 ; FIXME: This should be one compare.
    134 ; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_neg1:
    135 ; XSI: buffer_load_ubyte [[LOAD:v[0-9]+]]
    136 ; XSI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
    137 ; XSI: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 0{{$}}
    138 ; XSI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP0]]
    139 ; XSI-NEXT: buffer_store_byte [[RESULT]]
    140 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
    141   %load = load i1, i1 addrspace(1)* %in
    142   %ext = sext i1 %load to i32
    143   %cmp = icmp ne i32 %ext, -1
    144   store i1 %cmp, i1 addrspace(1)* %out
    145   ret void
    146 }
    147 
    148 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_neg1:
    149 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
    150 ; SI: buffer_store_byte [[RESULT]]
    151 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
    152   %load = load i1, i1 addrspace(1)* %in
    153   %ext = zext i1 %load to i32
    154   %cmp = icmp ne i32 %ext, -1
    155   store i1 %cmp, i1 addrspace(1)* %out
    156   ret void
    157 }
    158 
    159 ; FIXME: Need to handle non-uniform case for function below (load without gep).
    160 ; FUNC-LABEL: {{^}}masked_load_i1_to_i32_trunc_cmp_ne_neg1:
    161 ; SI: {{buffer|flat}}_load_sbyte [[LOAD:v[0-9]+]]
    162 ; SI: v_cmp_ne_u32_e32 vcc, -1, [[LOAD]]{{$}}
    163 ; SI-NEXT: v_cndmask_b32_e64
    164 ; SI: {{buffer|flat}}_store_byte
    165 define amdgpu_kernel void @masked_load_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
    166   %tid.x = call i32 @llvm.amdgcn.workitem.id.x()
    167   %in.ptr = getelementptr i8, i8 addrspace(1)* %in, i32 %tid.x
    168   %load = load i8, i8 addrspace(1)* %in.ptr
    169   %masked = and i8 %load, 255
    170   %ext = sext i8 %masked to i32
    171   %cmp = icmp ne i32 %ext, -1
    172   store i1 %cmp, i1 addrspace(1)* %out
    173   ret void
    174 }
    175