1 /************************************************************************** 2 * 3 * Copyright 2015 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 #include "lp_bld_format.h" 29 #include "lp_bld_type.h" 30 #include "lp_bld_struct.h" 31 #include "lp_bld_const.h" 32 #include "lp_bld_flow.h" 33 #include "lp_bld_swizzle.h" 34 35 #include "util/u_math.h" 36 37 38 /** 39 * @file 40 * Complex block-compression based formats are handled here by using a cache, 41 * so re-decoding of every pixel is not required. 42 * Especially for bilinear filtering, texel reuse is very high hence even 43 * a small cache helps. 44 * The elements in the cache are the decoded blocks - currently things 45 * are restricted to formats which are 4x4 block based, and the decoded 46 * texels must fit into 4x8 bits. 47 * The cache is direct mapped so hitrates aren't all that great and cache 48 * thrashing could happen. 49 * 50 * @author Roland Scheidegger <sroland (at) vmware.com> 51 */ 52 53 54 #if LP_BUILD_FORMAT_CACHE_DEBUG 55 static void 56 update_cache_access(struct gallivm_state *gallivm, 57 LLVMValueRef ptr, 58 unsigned count, 59 unsigned index) 60 { 61 LLVMBuilderRef builder = gallivm->builder; 62 LLVMValueRef member_ptr, cache_access; 63 64 assert(index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL || 65 index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); 66 67 member_ptr = lp_build_struct_get_ptr(gallivm, ptr, index, ""); 68 cache_access = LLVMBuildLoad(builder, member_ptr, "cache_access"); 69 cache_access = LLVMBuildAdd(builder, cache_access, 70 LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 71 count, 0), ""); 72 LLVMBuildStore(builder, cache_access, member_ptr); 73 } 74 #endif 75 76 77 static void 78 store_cached_block(struct gallivm_state *gallivm, 79 LLVMValueRef *col, 80 LLVMValueRef tag_value, 81 LLVMValueRef hash_index, 82 LLVMValueRef cache) 83 { 84 LLVMBuilderRef builder = gallivm->builder; 85 LLVMValueRef ptr, indices[3]; 86 LLVMTypeRef type_ptr4x32; 87 unsigned count; 88 89 type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0); 90 indices[0] = lp_build_const_int32(gallivm, 0); 91 indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS); 92 indices[2] = hash_index; 93 ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), ""); 94 LLVMBuildStore(builder, tag_value, ptr); 95 96 indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA); 97 hash_index = LLVMBuildMul(builder, hash_index, 98 lp_build_const_int32(gallivm, 16), ""); 99 for (count = 0; count < 4; count++) { 100 indices[2] = hash_index; 101 ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), ""); 102 ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, ""); 103 LLVMBuildStore(builder, col[count], ptr); 104 hash_index = LLVMBuildAdd(builder, hash_index, 105 lp_build_const_int32(gallivm, 4), ""); 106 } 107 } 108 109 110 static LLVMValueRef 111 lookup_cached_pixel(struct gallivm_state *gallivm, 112 LLVMValueRef ptr, 113 LLVMValueRef index) 114 { 115 LLVMBuilderRef builder = gallivm->builder; 116 LLVMValueRef member_ptr, indices[3]; 117 118 indices[0] = lp_build_const_int32(gallivm, 0); 119 indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA); 120 indices[2] = index; 121 member_ptr = LLVMBuildGEP(builder, ptr, indices, ARRAY_SIZE(indices), ""); 122 return LLVMBuildLoad(builder, member_ptr, "cache_data"); 123 } 124 125 126 static LLVMValueRef 127 lookup_tag_data(struct gallivm_state *gallivm, 128 LLVMValueRef ptr, 129 LLVMValueRef index) 130 { 131 LLVMBuilderRef builder = gallivm->builder; 132 LLVMValueRef member_ptr, indices[3]; 133 134 indices[0] = lp_build_const_int32(gallivm, 0); 135 indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS); 136 indices[2] = index; 137 member_ptr = LLVMBuildGEP(builder, ptr, indices, ARRAY_SIZE(indices), ""); 138 return LLVMBuildLoad(builder, member_ptr, "tag_data"); 139 } 140 141 142 static void 143 update_cached_block(struct gallivm_state *gallivm, 144 const struct util_format_description *format_desc, 145 LLVMValueRef ptr_addr, 146 LLVMValueRef hash_index, 147 LLVMValueRef cache) 148 149 { 150 LLVMBuilderRef builder = gallivm->builder; 151 LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); 152 LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); 153 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 154 LLVMTypeRef i32x4 = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4); 155 LLVMValueRef function; 156 LLVMValueRef tag_value, tmp_ptr; 157 LLVMValueRef col[4]; 158 unsigned i, j; 159 160 /* 161 * Use format_desc->fetch_rgba_8unorm() for each pixel in the block. 162 * This doesn't actually make any sense whatsoever, someone would need 163 * to write a function doing this for all pixels in a block (either as 164 * an external c function or with generated code). Don't ask. 165 */ 166 167 { 168 /* 169 * Function to call looks like: 170 * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) 171 */ 172 LLVMTypeRef ret_type; 173 LLVMTypeRef arg_types[4]; 174 LLVMTypeRef function_type; 175 176 assert(format_desc->fetch_rgba_8unorm); 177 178 ret_type = LLVMVoidTypeInContext(gallivm->context); 179 arg_types[0] = pi8t; 180 arg_types[1] = pi8t; 181 arg_types[2] = i32t; 182 arg_types[3] = i32t; 183 function_type = LLVMFunctionType(ret_type, arg_types, 184 ARRAY_SIZE(arg_types), 0); 185 186 /* make const pointer for the C fetch_rgba_8unorm function */ 187 function = lp_build_const_int_pointer(gallivm, 188 func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); 189 190 /* cast the callee pointer to the function's type */ 191 function = LLVMBuildBitCast(builder, function, 192 LLVMPointerType(function_type, 0), 193 "cast callee"); 194 } 195 196 tmp_ptr = lp_build_array_alloca(gallivm, i32x4, 197 lp_build_const_int32(gallivm, 16), 198 "tmp_decode_store"); 199 tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); 200 201 /* 202 * Invoke format_desc->fetch_rgba_8unorm() for each pixel. 203 * This is going to be really really slow. 204 * Note: the block store format is actually 205 * x0y0x0y1x0y2x0y3 x1y0x1y1x1y2x1y3 ... 206 */ 207 for (i = 0; i < 4; ++i) { 208 for (j = 0; j < 4; ++j) { 209 LLVMValueRef args[4]; 210 LLVMValueRef dst_offset = lp_build_const_int32(gallivm, (i * 4 + j) * 4); 211 212 /* 213 * Note we actually supply a pointer to the start of the block, 214 * not the start of the texture. 215 */ 216 args[0] = LLVMBuildGEP(gallivm->builder, tmp_ptr, &dst_offset, 1, ""); 217 args[1] = ptr_addr; 218 args[2] = LLVMConstInt(i32t, i, 0); 219 args[3] = LLVMConstInt(i32t, j, 0); 220 LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), ""); 221 } 222 } 223 224 /* Finally store the block - pointless mem copy + update tag. */ 225 tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, LLVMPointerType(i32x4, 0), ""); 226 for (i = 0; i < 4; ++i) { 227 LLVMValueRef tmp_offset = lp_build_const_int32(gallivm, i); 228 LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, tmp_ptr, &tmp_offset, 1, ""); 229 col[i] = LLVMBuildLoad(builder, ptr, ""); 230 } 231 232 tag_value = LLVMBuildPtrToInt(gallivm->builder, ptr_addr, 233 LLVMInt64TypeInContext(gallivm->context), ""); 234 store_cached_block(gallivm, col, tag_value, hash_index, cache); 235 } 236 237 238 /* 239 * Do a cached lookup. 240 * 241 * Returns (vectors of) 4x8 rgba aos value 242 */ 243 LLVMValueRef 244 lp_build_fetch_cached_texels(struct gallivm_state *gallivm, 245 const struct util_format_description *format_desc, 246 unsigned n, 247 LLVMValueRef base_ptr, 248 LLVMValueRef offset, 249 LLVMValueRef i, 250 LLVMValueRef j, 251 LLVMValueRef cache) 252 253 { 254 LLVMBuilderRef builder = gallivm->builder; 255 unsigned count, low_bit, log2size; 256 LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp; 257 LLVMValueRef ij_index, hash_index, hash_mask, block_index; 258 LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); 259 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 260 LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context); 261 struct lp_type type; 262 struct lp_build_context bld32; 263 memset(&type, 0, sizeof type); 264 type.width = 32; 265 type.length = n; 266 267 assert(format_desc->block.width == 4); 268 assert(format_desc->block.height == 4); 269 270 lp_build_context_init(&bld32, gallivm, type); 271 272 /* 273 * compute hash - we use direct mapped cache, the hash function could 274 * be better but it needs to be simple 275 * per-element: 276 * compare offset with offset stored at tag (hash) 277 * if not equal decode/store block, update tag 278 * extract color from cache 279 * assemble result vector 280 */ 281 282 /* TODO: not ideal with 32bit pointers... */ 283 284 low_bit = util_logbase2(format_desc->block.bits / 8); 285 log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE); 286 addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, ""); 287 ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, ""); 288 ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc); 289 /* For the hash function, first mask off the unused lowest bits. Then just 290 do some xor with address bits - only use lower 32bits */ 291 ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, ""); 292 ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, 293 lp_build_const_int_vec(gallivm, type, low_bit), ""); 294 /* This only really makes sense for size 64,128,256 */ 295 hash_index = ptr_addrtrunc; 296 ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, 297 lp_build_const_int_vec(gallivm, type, 2*log2size), ""); 298 hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, ""); 299 tmp = LLVMBuildLShr(builder, hash_index, 300 lp_build_const_int_vec(gallivm, type, log2size), ""); 301 hash_index = LLVMBuildXor(builder, hash_index, tmp, ""); 302 303 hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1); 304 hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, ""); 305 ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), ""); 306 ij_index = LLVMBuildAdd(builder, ij_index, j, ""); 307 block_index = LLVMBuildShl(builder, hash_index, 308 lp_build_const_int_vec(gallivm, type, 4), ""); 309 block_index = LLVMBuildAdd(builder, ij_index, block_index, ""); 310 311 if (n > 1) { 312 color = LLVMGetUndef(LLVMVectorType(i32t, n)); 313 for (count = 0; count < n; count++) { 314 LLVMValueRef index, cond, colorx; 315 LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx; 316 struct lp_build_if_state if_ctx; 317 318 index = lp_build_const_int32(gallivm, count); 319 offsetx = LLVMBuildExtractElement(builder, offset, index, ""); 320 addrx = LLVMBuildZExt(builder, offsetx, i64t, ""); 321 addrx = LLVMBuildAdd(builder, addrx, addr, ""); 322 block_indexx = LLVMBuildExtractElement(builder, block_index, index, ""); 323 hash_indexx = LLVMBuildLShr(builder, block_indexx, 324 lp_build_const_int32(gallivm, 4), ""); 325 offset_stored = lookup_tag_data(gallivm, cache, hash_indexx); 326 cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, ""); 327 328 lp_build_if(&if_ctx, gallivm, cond); 329 { 330 ptr_addrx = LLVMBuildIntToPtr(builder, addrx, 331 LLVMPointerType(i8t, 0), ""); 332 update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache); 333 #if LP_BUILD_FORMAT_CACHE_DEBUG 334 update_cache_access(gallivm, cache, 1, 335 LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); 336 #endif 337 } 338 lp_build_endif(&if_ctx); 339 340 colorx = lookup_cached_pixel(gallivm, cache, block_indexx); 341 342 color = LLVMBuildInsertElement(builder, color, colorx, 343 lp_build_const_int32(gallivm, count), ""); 344 } 345 } 346 else { 347 LLVMValueRef cond; 348 struct lp_build_if_state if_ctx; 349 350 tmp = LLVMBuildZExt(builder, offset, i64t, ""); 351 addr = LLVMBuildAdd(builder, tmp, addr, ""); 352 offset_stored = lookup_tag_data(gallivm, cache, hash_index); 353 cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, ""); 354 355 lp_build_if(&if_ctx, gallivm, cond); 356 { 357 tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), ""); 358 update_cached_block(gallivm, format_desc, tmp, hash_index, cache); 359 #if LP_BUILD_FORMAT_CACHE_DEBUG 360 update_cache_access(gallivm, cache, 1, 361 LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); 362 #endif 363 } 364 lp_build_endif(&if_ctx); 365 366 color = lookup_cached_pixel(gallivm, cache, block_index); 367 } 368 #if LP_BUILD_FORMAT_CACHE_DEBUG 369 update_cache_access(gallivm, cache, n, 370 LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL); 371 #endif 372 return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), ""); 373 } 374 375