1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 3 declare i32 @llvm.r600.read.tidig.x() nounwind readnone 4 5 ; Make sure we don't turn the 32-bit argument load into a 16-bit 6 ; load. There aren't extending scalar lods, so that would require 7 ; using a buffer_load instruction. 8 9 ; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i16: 10 ; SI: s_load_dword s 11 ; SI: buffer_store_short v 12 define void @truncate_kernarg_i32_to_i16(i16 addrspace(1)* %out, i32 %arg) nounwind { 13 %trunc = trunc i32 %arg to i16 14 store i16 %trunc, i16 addrspace(1)* %out 15 ret void 16 } 17 18 ; It should be OK (and probably performance neutral) to reduce this, 19 ; but we don't know if the load is uniform yet. 20 21 ; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i16: 22 ; SI: buffer_load_dword v 23 ; SI: buffer_store_short v 24 define void @truncate_buffer_load_i32_to_i16(i16 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 25 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 26 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 27 %gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid 28 %load = load i32, i32 addrspace(1)* %gep.in 29 %trunc = trunc i32 %load to i16 30 store i16 %trunc, i16 addrspace(1)* %gep.out 31 ret void 32 } 33 34 ; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i8: 35 ; SI: s_load_dword s 36 ; SI: buffer_store_byte v 37 define void @truncate_kernarg_i32_to_i8(i8 addrspace(1)* %out, i32 %arg) nounwind { 38 %trunc = trunc i32 %arg to i8 39 store i8 %trunc, i8 addrspace(1)* %out 40 ret void 41 } 42 43 ; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i8: 44 ; SI: buffer_load_dword v 45 ; SI: buffer_store_byte v 46 define void @truncate_buffer_load_i32_to_i8(i8 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 47 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 48 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 49 %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid 50 %load = load i32, i32 addrspace(1)* %gep.in 51 %trunc = trunc i32 %load to i8 52 store i8 %trunc, i8 addrspace(1)* %gep.out 53 ret void 54 } 55 56 ; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i1: 57 ; SI: s_load_dword s 58 ; SI: buffer_store_byte v 59 define void @truncate_kernarg_i32_to_i1(i1 addrspace(1)* %out, i32 %arg) nounwind { 60 %trunc = trunc i32 %arg to i1 61 store i1 %trunc, i1 addrspace(1)* %out 62 ret void 63 } 64 65 ; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i1: 66 ; SI: buffer_load_dword v 67 ; SI: buffer_store_byte v 68 define void @truncate_buffer_load_i32_to_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 69 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 70 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 71 %gep.out = getelementptr i1, i1 addrspace(1)* %out, i32 %tid 72 %load = load i32, i32 addrspace(1)* %gep.in 73 %trunc = trunc i32 %load to i1 74 store i1 %trunc, i1 addrspace(1)* %gep.out 75 ret void 76 } 77 78 ; FUNC-LABEL: {{^}}truncate_kernarg_i64_to_i32: 79 ; SI: s_load_dword s 80 ; SI: buffer_store_dword v 81 define void @truncate_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind { 82 %trunc = trunc i64 %arg to i32 83 store i32 %trunc, i32 addrspace(1)* %out 84 ret void 85 } 86 87 ; FUNC-LABEL: {{^}}truncate_buffer_load_i64_to_i32: 88 ; SI: buffer_load_dword v 89 ; SI: buffer_store_dword v 90 define void @truncate_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { 91 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 92 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 93 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 94 %load = load i64, i64 addrspace(1)* %gep.in 95 %trunc = trunc i64 %load to i32 96 store i32 %trunc, i32 addrspace(1)* %gep.out 97 ret void 98 } 99 100 ; FUNC-LABEL: {{^}}srl_kernarg_i64_to_i32: 101 ; SI: s_load_dword s 102 ; SI: buffer_store_dword v 103 define void @srl_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind { 104 %srl = lshr i64 %arg, 32 105 %trunc = trunc i64 %srl to i32 106 store i32 %trunc, i32 addrspace(1)* %out 107 ret void 108 } 109 110 ; FUNC-LABEL: {{^}}srl_buffer_load_i64_to_i32: 111 ; SI: buffer_load_dword v 112 ; SI: buffer_store_dword v 113 define void @srl_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { 114 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 115 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 116 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 117 %load = load i64, i64 addrspace(1)* %gep.in 118 %srl = lshr i64 %load, 32 119 %trunc = trunc i64 %srl to i32 120 store i32 %trunc, i32 addrspace(1)* %gep.out 121 ret void 122 } 123 124 ; Might as well reduce to 8-bit loads. 125 ; FUNC-LABEL: {{^}}truncate_kernarg_i16_to_i8: 126 ; SI: s_load_dword s 127 ; SI: buffer_store_byte v 128 define void @truncate_kernarg_i16_to_i8(i8 addrspace(1)* %out, i16 %arg) nounwind { 129 %trunc = trunc i16 %arg to i8 130 store i8 %trunc, i8 addrspace(1)* %out 131 ret void 132 } 133 134 ; FUNC-LABEL: {{^}}truncate_buffer_load_i16_to_i8: 135 ; SI: buffer_load_ubyte v 136 ; SI: buffer_store_byte v 137 define void @truncate_buffer_load_i16_to_i8(i8 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { 138 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 139 %gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid 140 %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid 141 %load = load i16, i16 addrspace(1)* %gep.in 142 %trunc = trunc i16 %load to i8 143 store i8 %trunc, i8 addrspace(1)* %gep.out 144 ret void 145 } 146 147 ; FUNC-LABEL: {{^}}srl_kernarg_i64_to_i8: 148 ; SI: s_load_dword s 149 ; SI: buffer_store_byte v 150 define void @srl_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind { 151 %srl = lshr i64 %arg, 32 152 %trunc = trunc i64 %srl to i8 153 store i8 %trunc, i8 addrspace(1)* %out 154 ret void 155 } 156 157 ; FUNC-LABEL: {{^}}srl_buffer_load_i64_to_i8: 158 ; SI: buffer_load_dword v 159 ; SI: buffer_store_byte v 160 define void @srl_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { 161 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 162 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 163 %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid 164 %load = load i64, i64 addrspace(1)* %gep.in 165 %srl = lshr i64 %load, 32 166 %trunc = trunc i64 %srl to i8 167 store i8 %trunc, i8 addrspace(1)* %gep.out 168 ret void 169 } 170 171 ; FUNC-LABEL: {{^}}truncate_kernarg_i64_to_i8: 172 ; SI: s_load_dword s 173 ; SI: buffer_store_byte v 174 define void @truncate_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind { 175 %trunc = trunc i64 %arg to i8 176 store i8 %trunc, i8 addrspace(1)* %out 177 ret void 178 } 179 180 ; FUNC-LABEL: {{^}}truncate_buffer_load_i64_to_i8: 181 ; SI: buffer_load_dword v 182 ; SI: buffer_store_byte v 183 define void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { 184 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 185 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 186 %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid 187 %load = load i64, i64 addrspace(1)* %gep.in 188 %trunc = trunc i64 %load to i8 189 store i8 %trunc, i8 addrspace(1)* %gep.out 190 ret void 191 } 192 193 ; FUNC-LABEL: {{^}}smrd_mask_i32_to_i16 194 ; SI: s_load_dword [[LOAD:s[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0x0 195 ; SI: s_waitcnt lgkmcnt(0) 196 ; SI: s_and_b32 s{{[0-9]+}}, [[LOAD]], 0xffff 197 define void @smrd_mask_i32_to_i16(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 198 entry: 199 %val = load i32, i32 addrspace(2)* %in 200 %mask = and i32 %val, 65535 201 store i32 %mask, i32 addrspace(1)* %out 202 ret void 203 } 204