1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s 3 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5 ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0: 6 ; GCN-NOT: v_cmp 7 ; GCN: v_cmp_ne_u32_e32 vcc, 8 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 9 ; GCN-NEXT:buffer_store_byte [[RESULT]] 10 ; GCN-NEXT: s_endpgm 11 12 ; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W 13 ; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1 14 define amdgpu_kernel void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 15 %icmp0 = icmp eq i32 %a, %b 16 %ext = sext i1 %icmp0 to i32 17 %icmp1 = icmp eq i32 %ext, 0 18 store i1 %icmp1, i1 addrspace(1)* %out 19 ret void 20 } 21 22 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0: 23 ; GCN-NOT: v_cmp 24 ; GCN: v_cmp_ne_u32_e32 vcc, 25 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 26 ; GCN-NEXT: buffer_store_byte [[RESULT]] 27 ; GCN-NEXT: s_endpgm 28 29 ; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W 30 ; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1 31 define amdgpu_kernel void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 32 %icmp0 = icmp ne i32 %a, %b 33 %ext = sext i1 %icmp0 to i32 34 %icmp1 = icmp ne i32 %ext, 0 35 store i1 %icmp1, i1 addrspace(1)* %out 36 ret void 37 } 38 39 ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_neg1: 40 ; GCN-NOT: v_cmp 41 ; GCN: v_cmp_eq_u32_e32 vcc, 42 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 43 ; GCN-NEXT: buffer_store_byte [[RESULT]] 44 ; GCN-NEXT: s_endpgm 45 define amdgpu_kernel void @sext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 46 %icmp0 = icmp eq i32 %a, %b 47 %ext = sext i1 %icmp0 to i32 48 %icmp1 = icmp eq i32 %ext, -1 49 store i1 %icmp1, i1 addrspace(1)* %out 50 ret void 51 } 52 53 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_neg1: 54 ; GCN-NOT: v_cmp 55 ; GCN: v_cmp_eq_u32_e32 vcc, 56 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 57 ; GCN-NEXT: buffer_store_byte [[RESULT]] 58 ; GCN-NEXT: s_endpgm 59 define amdgpu_kernel void @sext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 60 %icmp0 = icmp ne i32 %a, %b 61 %ext = sext i1 %icmp0 to i32 62 %icmp1 = icmp ne i32 %ext, -1 63 store i1 %icmp1, i1 addrspace(1)* %out 64 ret void 65 } 66 67 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0: 68 ; GCN-NOT: v_cmp 69 ; GCN: v_cmp_ne_u32_e32 vcc, 70 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 71 ; GCN-NEXT: buffer_store_byte [[RESULT]] 72 ; GCN-NEXT: s_endpgm 73 define amdgpu_kernel void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 74 %icmp0 = icmp eq i32 %a, %b 75 %ext = zext i1 %icmp0 to i32 76 %icmp1 = icmp eq i32 %ext, 0 77 store i1 %icmp1, i1 addrspace(1)* %out 78 ret void 79 } 80 81 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0: 82 ; GCN-NOT: v_cmp 83 ; GCN: v_cmp_ne_u32_e32 vcc, 84 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 85 ; GCN-NEXT: buffer_store_byte [[RESULT]] 86 ; GCN-NEXT: s_endpgm 87 define amdgpu_kernel void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 88 %icmp0 = icmp ne i32 %a, %b 89 %ext = zext i1 %icmp0 to i32 90 %icmp1 = icmp ne i32 %ext, 0 91 store i1 %icmp1, i1 addrspace(1)* %out 92 ret void 93 } 94 95 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1: 96 ; GCN-NOT: v_cmp 97 ; GCN: v_cmp_eq_u32_e32 vcc, 98 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 99 ; GCN-NEXT: buffer_store_byte [[RESULT]] 100 ; GCN-NEXT: s_endpgm 101 define amdgpu_kernel void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 102 %icmp0 = icmp eq i32 %a, %b 103 %ext = zext i1 %icmp0 to i32 104 %icmp1 = icmp eq i32 %ext, 1 105 store i1 %icmp1, i1 addrspace(1)* %out 106 ret void 107 } 108 109 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1: 110 ; GCN-NOT: v_cmp 111 ; GCN: v_cmp_eq_u32_e32 vcc, 112 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 113 ; GCN-NEXT: buffer_store_byte [[RESULT]] 114 define amdgpu_kernel void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 115 %icmp0 = icmp ne i32 %a, %b 116 %ext = zext i1 %icmp0 to i32 117 %icmp1 = icmp ne i32 %ext, 1 118 store i1 %icmp1, i1 addrspace(1)* %out 119 ret void 120 } 121 122 ; Reduces to false: 123 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_neg1: 124 ; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 0{{$}} 125 ; GCN: buffer_store_byte [[TMP]] 126 ; GCN-NEXT: s_endpgm 127 define amdgpu_kernel void @zext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 128 %icmp0 = icmp eq i32 %a, %b 129 %ext = zext i1 %icmp0 to i32 130 %icmp1 = icmp eq i32 %ext, -1 131 store i1 %icmp1, i1 addrspace(1)* %out 132 ret void 133 } 134 135 ; Reduces to true: 136 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_neg1: 137 ; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 1{{$}} 138 ; GCN: buffer_store_byte [[TMP]] 139 ; GCN-NEXT: s_endpgm 140 define amdgpu_kernel void @zext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 141 %icmp0 = icmp ne i32 %a, %b 142 %ext = zext i1 %icmp0 to i32 143 %icmp1 = icmp ne i32 %ext, -1 144 store i1 %icmp1, i1 addrspace(1)* %out 145 ret void 146 } 147 148 ; FUNC-LABEL: {{^}}cmp_zext_k_i8max: 149 ; SI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb 150 ; VI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c 151 ; GCN: s_movk_i32 [[K255:s[0-9]+]], 0xff 152 ; GCN-DAG: v_mov_b32_e32 [[VK255:v[0-9]+]], [[K255]] 153 ; SI-DAG: s_and_b32 [[B:s[0-9]+]], [[VALUE]], [[K255]] 154 ; SI: v_cmp_ne_u32_e32 vcc, [[B]], [[VK255]] 155 156 ; VI-DAG: v_and_b32_e32 [[B:v[0-9]+]], [[VALUE]], [[VK255]] 157 ; VI: v_cmp_ne_u16_e32 vcc, [[K255]], [[B]] 158 159 ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 160 ; GCN: buffer_store_byte [[RESULT]] 161 ; GCN: s_endpgm 162 define amdgpu_kernel void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind { 163 %b.ext = zext i8 %b to i32 164 %icmp0 = icmp ne i32 %b.ext, 255 165 store i1 %icmp0, i1 addrspace(1)* %out 166 ret void 167 } 168 169 ; FUNC-LABEL: {{^}}cmp_sext_k_neg1: 170 ; GCN: buffer_load_sbyte [[B:v[0-9]+]] 171 ; GCN: v_cmp_ne_u32_e32 vcc, -1, [[B]]{{$}} 172 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 173 ; GCN: buffer_store_byte [[RESULT]] 174 ; GCN: s_endpgm 175 define amdgpu_kernel void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind { 176 %b = load i8, i8 addrspace(1)* %b.ptr 177 %b.ext = sext i8 %b to i32 178 %icmp0 = icmp ne i32 %b.ext, -1 179 store i1 %icmp0, i1 addrspace(1)* %out 180 ret void 181 } 182 183 ; FUNC-LABEL: {{^}}v_cmp_sext_k_neg1_i8_sext_arg: 184 ; GCN: v_cmp_ne_u32_e32 vcc, -1, v0 185 ; GCN-NEXT: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, 1, vcc 186 ; GCN: buffer_store_byte [[SELECT]] 187 define void @v_cmp_sext_k_neg1_i8_sext_arg(i8 signext %b) nounwind { 188 %b.ext = sext i8 %b to i32 189 %icmp0 = icmp ne i32 %b.ext, -1 190 store i1 %icmp0, i1 addrspace(1)* undef 191 ret void 192 } 193 194 ; FIXME: This ends up doing a buffer_load_ubyte, and and compare to 195 ; 255. Seems to be because of ordering problems when not allowing load widths to be reduced. 196 ; Should do a buffer_load_sbyte and compare with -1 197 198 ; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg: 199 ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb 200 ; VI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c 201 ; GCN: s_movk_i32 [[K:s[0-9]+]], 0xff 202 ; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VAL]], [[K]] 203 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]] 204 ; GCN: v_cmp_ne_u32_e32 vcc, [[B]], [[VK]]{{$}} 205 ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 206 ; GCN: buffer_store_byte [[RESULT]] 207 ; GCN: s_endpgm 208 define amdgpu_kernel void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind { 209 %b.ext = sext i8 %b to i32 210 %icmp0 = icmp ne i32 %b.ext, -1 211 store i1 %icmp0, i1 addrspace(1)* %out 212 ret void 213 } 214 215 ; FUNC-LABEL: {{^}}cmp_zext_k_neg1: 216 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} 217 ; GCN: buffer_store_byte [[RESULT]] 218 ; GCN: s_endpgm 219 define amdgpu_kernel void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind { 220 %b.ext = zext i8 %b to i32 221 %icmp0 = icmp ne i32 %b.ext, -1 222 store i1 %icmp0, i1 addrspace(1)* %out 223 ret void 224 } 225 226 ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k: 227 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} 228 ; GCN: buffer_store_byte [[RESULT]] 229 ; GCN-NEXT: s_endpgm 230 define amdgpu_kernel void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 231 %icmp0 = icmp ne i32 %a, %b 232 %ext = zext i1 %icmp0 to i32 233 %icmp1 = icmp ne i32 %ext, 2 234 store i1 %icmp1, i1 addrspace(1)* %out 235 ret void 236 } 237 238 ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k: 239 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} 240 ; GCN: buffer_store_byte [[RESULT]] 241 ; GCN-NEXT: s_endpgm 242 define amdgpu_kernel void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 243 %icmp0 = icmp ne i32 %a, %b 244 %ext = zext i1 %icmp0 to i32 245 %icmp1 = icmp eq i32 %ext, 2 246 store i1 %icmp1, i1 addrspace(1)* %out 247 ret void 248 } 249 250 ; FIXME: These cases should really be able fold to true/false in 251 ; DAGCombiner 252 253 ; This really folds away to false 254 ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1: 255 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0{{$}} 256 ; GCN: buffer_store_byte [[K]] 257 define amdgpu_kernel void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 258 %icmp0 = icmp eq i32 %a, %b 259 %ext = sext i1 %icmp0 to i32 260 %icmp1 = icmp eq i32 %ext, 1 261 store i1 %icmp1, i1 addrspace(1)* %out 262 ret void 263 } 264 265 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1: 266 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}} 267 ; GCN: buffer_store_byte [[K]] 268 define amdgpu_kernel void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 269 %icmp0 = icmp ne i32 %a, %b 270 %ext = sext i1 %icmp0 to i32 271 %icmp1 = icmp ne i32 %ext, 1 272 store i1 %icmp1, i1 addrspace(1)* %out 273 ret void 274 } 275 276 ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k: 277 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}} 278 ; GCN: buffer_store_byte [[K]] 279 define amdgpu_kernel void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 280 %icmp0 = icmp ne i32 %a, %b 281 %ext = sext i1 %icmp0 to i32 282 %icmp1 = icmp ne i32 %ext, 2 283 store i1 %icmp1, i1 addrspace(1)* %out 284 ret void 285 } 286