1 ; RUN: llc -amdgpu-codegenprepare-widen-constant-loads=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s 2 ; RUN: llc -amdgpu-codegenprepare-widen-constant-loads=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s 3 4 ; GCN-LABEL: {{^}}widen_i16_constant_load: 5 ; GCN: s_load_dword [[VAL:s[0-9]+]] 6 ; GCN: s_addk_i32 [[VAL]], 0x3e7 7 ; GCN: s_or_b32 [[OR:s[0-9]+]], [[VAL]], 4 8 define amdgpu_kernel void @widen_i16_constant_load(i16 addrspace(4)* %arg) { 9 %load = load i16, i16 addrspace(4)* %arg, align 4 10 %add = add i16 %load, 999 11 %or = or i16 %add, 4 12 store i16 %or, i16 addrspace(1)* null 13 ret void 14 } 15 16 ; GCN-LABEL: {{^}}widen_i16_constant_load_zext_i32: 17 ; GCN: s_load_dword [[VAL:s[0-9]+]] 18 ; GCN: s_and_b32 [[TRUNC:s[0-9]+]], [[VAL]], 0xffff{{$}} 19 ; GCN: s_addk_i32 [[TRUNC]], 0x3e7 20 ; GCN: s_or_b32 [[OR:s[0-9]+]], [[TRUNC]], 4 21 define amdgpu_kernel void @widen_i16_constant_load_zext_i32(i16 addrspace(4)* %arg) { 22 %load = load i16, i16 addrspace(4)* %arg, align 4 23 %ext = zext i16 %load to i32 24 %add = add i32 %ext, 999 25 %or = or i32 %add, 4 26 store i32 %or, i32 addrspace(1)* null 27 ret void 28 } 29 30 ; GCN-LABEL: {{^}}widen_i16_constant_load_sext_i32: 31 ; GCN: s_load_dword [[VAL:s[0-9]+]] 32 ; GCN: s_sext_i32_i16 [[EXT:s[0-9]+]], [[VAL]] 33 ; GCN: s_addk_i32 [[EXT]], 0x3e7 34 ; GCN: s_or_b32 [[OR:s[0-9]+]], [[EXT]], 4 35 define amdgpu_kernel void @widen_i16_constant_load_sext_i32(i16 addrspace(4)* %arg) { 36 %load = load i16, i16 addrspace(4)* %arg, align 4 37 %ext = sext i16 %load to i32 38 %add = add i32 %ext, 999 39 %or = or i32 %add, 4 40 store i32 %or, i32 addrspace(1)* null 41 ret void 42 } 43 44 ; GCN-LABEL: {{^}}widen_i17_constant_load: 45 ; GCN: s_load_dword [[VAL:s[0-9]+]] 46 ; GCN: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 34 47 ; GCN: s_or_b32 [[OR:s[0-9]+]], [[ADD]], 4 48 ; GCN: s_bfe_u32 s{{[0-9]+}}, [[OR]], 0x10010 49 define amdgpu_kernel void @widen_i17_constant_load(i17 addrspace(4)* %arg) { 50 %load = load i17, i17 addrspace(4)* %arg, align 4 51 %add = add i17 %load, 34 52 %or = or i17 %add, 4 53 store i17 %or, i17 addrspace(1)* null 54 ret void 55 } 56 57 ; GCN-LABEL: {{^}}widen_f16_constant_load: 58 ; GCN: s_load_dword [[VAL:s[0-9]+]] 59 ; SI: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[VAL]] 60 ; SI: v_add_f32_e32 [[ADD:v[0-9]+]], 4.0, [[CVT]] 61 62 ; VI: v_add_f16_e64 [[ADD:v[0-9]+]], [[VAL]], 4.0 63 define amdgpu_kernel void @widen_f16_constant_load(half addrspace(4)* %arg) { 64 %load = load half, half addrspace(4)* %arg, align 4 65 %add = fadd half %load, 4.0 66 store half %add, half addrspace(1)* null 67 ret void 68 } 69 70 ; FIXME: valu usage on VI 71 ; GCN-LABEL: {{^}}widen_v2i8_constant_load: 72 ; GCN: s_load_dword [[VAL:s[0-9]+]] 73 74 ; SI: s_add_i32 75 ; SI: s_or_b32 76 ; SI: s_addk_i32 77 ; SI: s_and_b32 78 ; SI: s_or_b32 79 ; SI: s_or_b32 80 81 ; VI: s_add_i32 82 ; VI: v_add_u32_sdwa 83 ; VI: v_or_b32_sdwa 84 ; VI: v_or_b32_e32 85 define amdgpu_kernel void @widen_v2i8_constant_load(<2 x i8> addrspace(4)* %arg) { 86 %load = load <2 x i8>, <2 x i8> addrspace(4)* %arg, align 4 87 %add = add <2 x i8> %load, <i8 12, i8 44> 88 %or = or <2 x i8> %add, <i8 4, i8 3> 89 store <2 x i8> %or, <2 x i8> addrspace(1)* null 90 ret void 91 } 92 93 ; GCN-LABEL: {{^}}no_widen_i16_constant_divergent_load: 94 ; GCN: {{buffer|flat}}_load_ushort 95 define amdgpu_kernel void @no_widen_i16_constant_divergent_load(i16 addrspace(4)* %arg) { 96 %tid = call i32 @llvm.amdgcn.workitem.id.x() 97 %tid.ext = zext i32 %tid to i64 98 %gep.arg = getelementptr inbounds i16, i16 addrspace(4)* %arg, i64 %tid.ext 99 %load = load i16, i16 addrspace(4)* %gep.arg, align 4 100 %add = add i16 %load, 999 101 %or = or i16 %add, 4 102 store i16 %or, i16 addrspace(1)* null 103 ret void 104 } 105 106 ; GCN-LABEL: {{^}}widen_i1_constant_load: 107 ; GCN: s_load_dword [[VAL:s[0-9]+]] 108 ; GCN: s_and_b32 {{s[0-9]+}}, [[VAL]], 1{{$}} 109 define amdgpu_kernel void @widen_i1_constant_load(i1 addrspace(4)* %arg) { 110 %load = load i1, i1 addrspace(4)* %arg, align 4 111 %and = and i1 %load, true 112 store i1 %and, i1 addrspace(1)* null 113 ret void 114 } 115 116 ; GCN-LABEL: {{^}}widen_i16_zextload_i64_constant_load: 117 ; GCN: s_load_dword [[VAL:s[0-9]+]] 118 ; GCN: s_and_b32 [[TRUNC:s[0-9]+]], [[VAL]], 0xffff{{$}} 119 ; GCN: s_addk_i32 [[TRUNC]], 0x3e7 120 ; GCN: s_or_b32 [[OR:s[0-9]+]], [[TRUNC]], 4 121 define amdgpu_kernel void @widen_i16_zextload_i64_constant_load(i16 addrspace(4)* %arg) { 122 %load = load i16, i16 addrspace(4)* %arg, align 4 123 %zext = zext i16 %load to i32 124 %add = add i32 %zext, 999 125 %or = or i32 %add, 4 126 store i32 %or, i32 addrspace(1)* null 127 ret void 128 } 129 130 ; GCN-LABEL: {{^}}widen_i1_zext_to_i64_constant_load: 131 ; GCN: s_load_dword [[VAL:s[0-9]+]] 132 ; GCN: s_and_b32 [[AND:s[0-9]+]], [[VAL]], 1 133 ; GCN: s_add_u32 [[ADD:s[0-9]+]], [[AND]], 0x3e7 134 ; GCN: s_addc_u32 s{{[0-9]+}}, 0, 0 135 define amdgpu_kernel void @widen_i1_zext_to_i64_constant_load(i1 addrspace(4)* %arg) { 136 %load = load i1, i1 addrspace(4)* %arg, align 4 137 %zext = zext i1 %load to i64 138 %add = add i64 %zext, 999 139 store i64 %add, i64 addrspace(1)* null 140 ret void 141 } 142 143 ; GCN-LABEL: {{^}}widen_i16_constant32_load: 144 ; GCN: s_load_dword [[VAL:s[0-9]+]] 145 ; GCN: s_addk_i32 [[VAL]], 0x3e7 146 ; GCN: s_or_b32 [[OR:s[0-9]+]], [[VAL]], 4 147 define amdgpu_kernel void @widen_i16_constant32_load(i16 addrspace(6)* %arg) { 148 %load = load i16, i16 addrspace(6)* %arg, align 4 149 %add = add i16 %load, 999 150 %or = or i16 %add, 4 151 store i16 %or, i16 addrspace(1)* null 152 ret void 153 } 154 155 ; GCN-LABEL: {{^}}widen_i16_global_invariant_load: 156 ; GCN: s_load_dword [[VAL:s[0-9]+]] 157 ; GCN: s_addk_i32 [[VAL]], 0x3e7 158 ; GCN: s_or_b32 [[OR:s[0-9]+]], [[VAL]], 1 159 define amdgpu_kernel void @widen_i16_global_invariant_load(i16 addrspace(1)* %arg) { 160 %load = load i16, i16 addrspace(1)* %arg, align 4, !invariant.load !0 161 %add = add i16 %load, 999 162 %or = or i16 %add, 1 163 store i16 %or, i16 addrspace(1)* null 164 ret void 165 } 166 167 declare i32 @llvm.amdgcn.workitem.id.x() 168 169 !0 = !{} 170