1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s 3 4 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 5 6 ; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_eq_0: 7 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] 8 ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] 9 ; SI: v_cmp_eq_u32_e32 vcc, 0, [[TMP]]{{$}} 10 ; SI: v_cndmask_b32_e64 11 ; SI: buffer_store_byte 12 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { 13 %load = load i1, i1 addrspace(1)* %in 14 %ext = sext i1 %load to i32 15 %cmp = icmp eq i32 %ext, 0 16 store i1 %cmp, i1 addrspace(1)* %out 17 ret void 18 } 19 20 ; FIXME: The negate should be inverting the compare. 21 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_0: 22 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] 23 ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] 24 ; SI: v_cmp_eq_u32_e32 vcc, 1, [[TMP]]{{$}} 25 ; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1 26 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]] 27 ; SI: buffer_store_byte [[RESULT]] 28 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { 29 %load = load i1, i1 addrspace(1)* %in 30 %ext = zext i1 %load to i32 31 %cmp = icmp eq i32 %ext, 0 32 store i1 %cmp, i1 addrspace(1)* %out 33 ret void 34 } 35 36 ; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_1: 37 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} 38 ; SI: buffer_store_byte [[RESULT]] 39 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { 40 %load = load i1, i1 addrspace(1)* %in 41 %ext = sext i1 %load to i32 42 %cmp = icmp eq i32 %ext, 1 43 store i1 %cmp, i1 addrspace(1)* %out 44 ret void 45 } 46 47 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_1: 48 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] 49 ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]] 50 ; SI: buffer_store_byte [[RESULT]] 51 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { 52 %load = load i1, i1 addrspace(1)* %in 53 %ext = zext i1 %load to i32 54 %cmp = icmp eq i32 %ext, 1 55 store i1 %cmp, i1 addrspace(1)* %out 56 ret void 57 } 58 59 ; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_neg1: 60 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] 61 ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]] 62 ; SI: buffer_store_byte [[RESULT]] 63 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { 64 %load = load i1, i1 addrspace(1)* %in 65 %ext = sext i1 %load to i32 66 %cmp = icmp eq i32 %ext, -1 67 store i1 %cmp, i1 addrspace(1)* %out 68 ret void 69 } 70 71 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_neg1: 72 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} 73 ; SI: buffer_store_byte [[RESULT]] 74 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { 75 %load = load i1, i1 addrspace(1)* %in 76 %ext = zext i1 %load to i32 77 %cmp = icmp eq i32 %ext, -1 78 store i1 %cmp, i1 addrspace(1)* %out 79 ret void 80 } 81 82 83 ; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_ne_0: 84 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] 85 ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] 86 ; SI: buffer_store_byte [[RESULT]] 87 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { 88 %load = load i1, i1 addrspace(1)* %in 89 %ext = sext i1 %load to i32 90 %cmp = icmp ne i32 %ext, 0 91 store i1 %cmp, i1 addrspace(1)* %out 92 ret void 93 } 94 95 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_0: 96 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] 97 ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]] 98 ; SI: buffer_store_byte [[RESULT]] 99 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { 100 %load = load i1, i1 addrspace(1)* %in 101 %ext = zext i1 %load to i32 102 %cmp = icmp ne i32 %ext, 0 103 store i1 %cmp, i1 addrspace(1)* %out 104 ret void 105 } 106 107 ; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_1: 108 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} 109 ; SI: buffer_store_byte [[RESULT]] 110 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { 111 %load = load i1, i1 addrspace(1)* %in 112 %ext = sext i1 %load to i32 113 %cmp = icmp ne i32 %ext, 1 114 store i1 %cmp, i1 addrspace(1)* %out 115 ret void 116 } 117 118 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_1: 119 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] 120 ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] 121 ; SI: v_cmp_eq_u32_e32 vcc, 1, [[TMP]]{{$}} 122 ; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1 123 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]] 124 ; SI: buffer_store_byte [[RESULT]] 125 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { 126 %load = load i1, i1 addrspace(1)* %in 127 %ext = zext i1 %load to i32 128 %cmp = icmp ne i32 %ext, 1 129 store i1 %cmp, i1 addrspace(1)* %out 130 ret void 131 } 132 133 ; FIXME: This should be one compare. 134 ; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_neg1: 135 ; XSI: buffer_load_ubyte [[LOAD:v[0-9]+]] 136 ; XSI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] 137 ; XSI: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 0{{$}} 138 ; XSI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP0]] 139 ; XSI-NEXT: buffer_store_byte [[RESULT]] 140 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { 141 %load = load i1, i1 addrspace(1)* %in 142 %ext = sext i1 %load to i32 143 %cmp = icmp ne i32 %ext, -1 144 store i1 %cmp, i1 addrspace(1)* %out 145 ret void 146 } 147 148 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_neg1: 149 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} 150 ; SI: buffer_store_byte [[RESULT]] 151 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { 152 %load = load i1, i1 addrspace(1)* %in 153 %ext = zext i1 %load to i32 154 %cmp = icmp ne i32 %ext, -1 155 store i1 %cmp, i1 addrspace(1)* %out 156 ret void 157 } 158 159 ; FIXME: Need to handle non-uniform case for function below (load without gep). 160 ; FUNC-LABEL: {{^}}masked_load_i1_to_i32_trunc_cmp_ne_neg1: 161 ; SI: {{buffer|flat}}_load_sbyte [[LOAD:v[0-9]+]] 162 ; SI: v_cmp_ne_u32_e32 vcc, -1, [[LOAD]]{{$}} 163 ; SI-NEXT: v_cndmask_b32_e64 164 ; SI: {{buffer|flat}}_store_byte 165 define amdgpu_kernel void @masked_load_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { 166 %tid.x = call i32 @llvm.amdgcn.workitem.id.x() 167 %in.ptr = getelementptr i8, i8 addrspace(1)* %in, i32 %tid.x 168 %load = load i8, i8 addrspace(1)* %in.ptr 169 %masked = and i8 %load, 255 170 %ext = sext i8 %masked to i32 171 %cmp = icmp ne i32 %ext, -1 172 store i1 %cmp, i1 addrspace(1)* %out 173 ret void 174 } 175