Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2015 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 #include "lp_bld_format.h"
     29 #include "lp_bld_type.h"
     30 #include "lp_bld_struct.h"
     31 #include "lp_bld_const.h"
     32 #include "lp_bld_flow.h"
     33 #include "lp_bld_swizzle.h"
     34 
     35 #include "util/u_math.h"
     36 
     37 
     38 /**
     39  * @file
     40  * Complex block-compression based formats are handled here by using a cache,
     41  * so re-decoding of every pixel is not required.
     42  * Especially for bilinear filtering, texel reuse is very high hence even
     43  * a small cache helps.
     44  * The elements in the cache are the decoded blocks - currently things
     45  * are restricted to formats which are 4x4 block based, and the decoded
     46  * texels must fit into 4x8 bits.
     47  * The cache is direct mapped so hitrates aren't all that great and cache
     48  * thrashing could happen.
     49  *
     50  * @author Roland Scheidegger <sroland (at) vmware.com>
     51  */
     52 
     53 
     54 #if LP_BUILD_FORMAT_CACHE_DEBUG
     55 static void
     56 update_cache_access(struct gallivm_state *gallivm,
     57                     LLVMValueRef ptr,
     58                     unsigned count,
     59                     unsigned index)
     60 {
     61    LLVMBuilderRef builder = gallivm->builder;
     62    LLVMValueRef member_ptr, cache_access;
     63 
     64    assert(index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL ||
     65           index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
     66 
     67    member_ptr = lp_build_struct_get_ptr(gallivm, ptr, index, "");
     68    cache_access = LLVMBuildLoad(builder, member_ptr, "cache_access");
     69    cache_access = LLVMBuildAdd(builder, cache_access,
     70                                LLVMConstInt(LLVMInt64TypeInContext(gallivm->context),
     71                                                                    count, 0), "");
     72    LLVMBuildStore(builder, cache_access, member_ptr);
     73 }
     74 #endif
     75 
     76 
     77 static void
     78 store_cached_block(struct gallivm_state *gallivm,
     79                    LLVMValueRef *col,
     80                    LLVMValueRef tag_value,
     81                    LLVMValueRef hash_index,
     82                    LLVMValueRef cache)
     83 {
     84    LLVMBuilderRef builder = gallivm->builder;
     85    LLVMValueRef ptr, indices[3];
     86    LLVMTypeRef type_ptr4x32;
     87    unsigned count;
     88 
     89    type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0);
     90    indices[0] = lp_build_const_int32(gallivm, 0);
     91    indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
     92    indices[2] = hash_index;
     93    ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), "");
     94    LLVMBuildStore(builder, tag_value, ptr);
     95 
     96    indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
     97    hash_index = LLVMBuildMul(builder, hash_index,
     98                              lp_build_const_int32(gallivm, 16), "");
     99    for (count = 0; count < 4; count++) {
    100       indices[2] = hash_index;
    101       ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), "");
    102       ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, "");
    103       LLVMBuildStore(builder, col[count], ptr);
    104       hash_index = LLVMBuildAdd(builder, hash_index,
    105                                 lp_build_const_int32(gallivm, 4), "");
    106    }
    107 }
    108 
    109 
    110 static LLVMValueRef
    111 lookup_cached_pixel(struct gallivm_state *gallivm,
    112                     LLVMValueRef ptr,
    113                     LLVMValueRef index)
    114 {
    115    LLVMBuilderRef builder = gallivm->builder;
    116    LLVMValueRef member_ptr, indices[3];
    117 
    118    indices[0] = lp_build_const_int32(gallivm, 0);
    119    indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
    120    indices[2] = index;
    121    member_ptr = LLVMBuildGEP(builder, ptr, indices, ARRAY_SIZE(indices), "");
    122    return LLVMBuildLoad(builder, member_ptr, "cache_data");
    123 }
    124 
    125 
    126 static LLVMValueRef
    127 lookup_tag_data(struct gallivm_state *gallivm,
    128                 LLVMValueRef ptr,
    129                 LLVMValueRef index)
    130 {
    131    LLVMBuilderRef builder = gallivm->builder;
    132    LLVMValueRef member_ptr, indices[3];
    133 
    134    indices[0] = lp_build_const_int32(gallivm, 0);
    135    indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
    136    indices[2] = index;
    137    member_ptr = LLVMBuildGEP(builder, ptr, indices, ARRAY_SIZE(indices), "");
    138    return LLVMBuildLoad(builder, member_ptr, "tag_data");
    139 }
    140 
    141 
    142 static void
    143 update_cached_block(struct gallivm_state *gallivm,
    144                     const struct util_format_description *format_desc,
    145                     LLVMValueRef ptr_addr,
    146                     LLVMValueRef hash_index,
    147                     LLVMValueRef cache)
    148 
    149 {
    150    LLVMBuilderRef builder = gallivm->builder;
    151    LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
    152    LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
    153    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
    154    LLVMTypeRef i32x4 = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4);
    155    LLVMValueRef function;
    156    LLVMValueRef tag_value, tmp_ptr;
    157    LLVMValueRef col[4];
    158    unsigned i, j;
    159 
    160    /*
    161     * Use format_desc->fetch_rgba_8unorm() for each pixel in the block.
    162     * This doesn't actually make any sense whatsoever, someone would need
    163     * to write a function doing this for all pixels in a block (either as
    164     * an external c function or with generated code). Don't ask.
    165     */
    166 
    167    {
    168       /*
    169        * Function to call looks like:
    170        *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
    171        */
    172       LLVMTypeRef ret_type;
    173       LLVMTypeRef arg_types[4];
    174       LLVMTypeRef function_type;
    175 
    176       assert(format_desc->fetch_rgba_8unorm);
    177 
    178       ret_type = LLVMVoidTypeInContext(gallivm->context);
    179       arg_types[0] = pi8t;
    180       arg_types[1] = pi8t;
    181       arg_types[2] = i32t;
    182       arg_types[3] = i32t;
    183       function_type = LLVMFunctionType(ret_type, arg_types,
    184                                        ARRAY_SIZE(arg_types), 0);
    185 
    186       /* make const pointer for the C fetch_rgba_8unorm function */
    187       function = lp_build_const_int_pointer(gallivm,
    188          func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));
    189 
    190       /* cast the callee pointer to the function's type */
    191       function = LLVMBuildBitCast(builder, function,
    192                                   LLVMPointerType(function_type, 0),
    193                                   "cast callee");
    194    }
    195 
    196    tmp_ptr = lp_build_array_alloca(gallivm, i32x4,
    197                                    lp_build_const_int32(gallivm, 16),
    198                                    "tmp_decode_store");
    199    tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
    200 
    201    /*
    202     * Invoke format_desc->fetch_rgba_8unorm() for each pixel.
    203     * This is going to be really really slow.
    204     * Note: the block store format is actually
    205     * x0y0x0y1x0y2x0y3 x1y0x1y1x1y2x1y3 ...
    206     */
    207    for (i = 0; i < 4; ++i) {
    208       for (j = 0; j < 4; ++j) {
    209          LLVMValueRef args[4];
    210          LLVMValueRef dst_offset = lp_build_const_int32(gallivm, (i * 4 + j) * 4);
    211 
    212          /*
    213           * Note we actually supply a pointer to the start of the block,
    214           * not the start of the texture.
    215           */
    216          args[0] = LLVMBuildGEP(gallivm->builder, tmp_ptr, &dst_offset, 1, "");
    217          args[1] = ptr_addr;
    218          args[2] = LLVMConstInt(i32t, i, 0);
    219          args[3] = LLVMConstInt(i32t, j, 0);
    220          LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), "");
    221       }
    222    }
    223 
    224    /* Finally store the block - pointless mem copy + update tag. */
    225    tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, LLVMPointerType(i32x4, 0), "");
    226    for (i = 0; i < 4; ++i) {
    227       LLVMValueRef tmp_offset = lp_build_const_int32(gallivm, i);
    228       LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, tmp_ptr, &tmp_offset, 1, "");
    229       col[i] = LLVMBuildLoad(builder, ptr, "");
    230    }
    231 
    232    tag_value = LLVMBuildPtrToInt(gallivm->builder, ptr_addr,
    233                                  LLVMInt64TypeInContext(gallivm->context), "");
    234    store_cached_block(gallivm, col, tag_value, hash_index, cache);
    235 }
    236 
    237 
    238 /*
    239  * Do a cached lookup.
    240  *
    241  * Returns (vectors of) 4x8 rgba aos value
    242  */
    243 LLVMValueRef
    244 lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
    245                              const struct util_format_description *format_desc,
    246                              unsigned n,
    247                              LLVMValueRef base_ptr,
    248                              LLVMValueRef offset,
    249                              LLVMValueRef i,
    250                              LLVMValueRef j,
    251                              LLVMValueRef cache)
    252 
    253 {
    254    LLVMBuilderRef builder = gallivm->builder;
    255    unsigned count, low_bit, log2size;
    256    LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp;
    257    LLVMValueRef ij_index, hash_index, hash_mask, block_index;
    258    LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
    259    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
    260    LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context);
    261    struct lp_type type;
    262    struct lp_build_context bld32;
    263    memset(&type, 0, sizeof type);
    264    type.width = 32;
    265    type.length = n;
    266 
    267    assert(format_desc->block.width == 4);
    268    assert(format_desc->block.height == 4);
    269 
    270    lp_build_context_init(&bld32, gallivm, type);
    271 
    272    /*
    273     * compute hash - we use direct mapped cache, the hash function could
    274     *                be better but it needs to be simple
    275     * per-element:
    276     *    compare offset with offset stored at tag (hash)
    277     *    if not equal decode/store block, update tag
    278     *    extract color from cache
    279     *    assemble result vector
    280     */
    281 
    282    /* TODO: not ideal with 32bit pointers... */
    283 
    284    low_bit = util_logbase2(format_desc->block.bits / 8);
    285    log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE);
    286    addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, "");
    287    ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, "");
    288    ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc);
    289    /* For the hash function, first mask off the unused lowest bits. Then just
    290       do some xor with address bits - only use lower 32bits */
    291    ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, "");
    292    ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
    293                                  lp_build_const_int_vec(gallivm, type, low_bit), "");
    294    /* This only really makes sense for size 64,128,256 */
    295    hash_index = ptr_addrtrunc;
    296    ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
    297                                  lp_build_const_int_vec(gallivm, type, 2*log2size), "");
    298    hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, "");
    299    tmp = LLVMBuildLShr(builder, hash_index,
    300                        lp_build_const_int_vec(gallivm, type, log2size), "");
    301    hash_index = LLVMBuildXor(builder, hash_index, tmp, "");
    302 
    303    hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1);
    304    hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, "");
    305    ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), "");
    306    ij_index = LLVMBuildAdd(builder, ij_index, j, "");
    307    block_index = LLVMBuildShl(builder, hash_index,
    308                               lp_build_const_int_vec(gallivm, type, 4), "");
    309    block_index = LLVMBuildAdd(builder, ij_index, block_index, "");
    310 
    311    if (n > 1) {
    312       color = LLVMGetUndef(LLVMVectorType(i32t, n));
    313       for (count = 0; count < n; count++) {
    314          LLVMValueRef index, cond, colorx;
    315          LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx;
    316          struct lp_build_if_state if_ctx;
    317 
    318          index = lp_build_const_int32(gallivm, count);
    319          offsetx = LLVMBuildExtractElement(builder, offset, index, "");
    320          addrx = LLVMBuildZExt(builder, offsetx, i64t, "");
    321          addrx = LLVMBuildAdd(builder, addrx, addr, "");
    322          block_indexx = LLVMBuildExtractElement(builder, block_index, index, "");
    323          hash_indexx = LLVMBuildLShr(builder, block_indexx,
    324                                      lp_build_const_int32(gallivm, 4), "");
    325          offset_stored = lookup_tag_data(gallivm, cache, hash_indexx);
    326          cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, "");
    327 
    328          lp_build_if(&if_ctx, gallivm, cond);
    329          {
    330             ptr_addrx = LLVMBuildIntToPtr(builder, addrx,
    331                                           LLVMPointerType(i8t, 0), "");
    332             update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache);
    333 #if LP_BUILD_FORMAT_CACHE_DEBUG
    334             update_cache_access(gallivm, cache, 1,
    335                                 LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
    336 #endif
    337          }
    338          lp_build_endif(&if_ctx);
    339 
    340          colorx = lookup_cached_pixel(gallivm, cache, block_indexx);
    341 
    342          color = LLVMBuildInsertElement(builder, color, colorx,
    343                                         lp_build_const_int32(gallivm, count), "");
    344       }
    345    }
    346    else {
    347       LLVMValueRef cond;
    348       struct lp_build_if_state if_ctx;
    349 
    350       tmp = LLVMBuildZExt(builder, offset, i64t, "");
    351       addr = LLVMBuildAdd(builder, tmp, addr, "");
    352       offset_stored = lookup_tag_data(gallivm, cache, hash_index);
    353       cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, "");
    354 
    355       lp_build_if(&if_ctx, gallivm, cond);
    356       {
    357          tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), "");
    358          update_cached_block(gallivm, format_desc, tmp, hash_index, cache);
    359 #if LP_BUILD_FORMAT_CACHE_DEBUG
    360          update_cache_access(gallivm, cache, 1,
    361                              LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
    362 #endif
    363       }
    364       lp_build_endif(&if_ctx);
    365 
    366       color = lookup_cached_pixel(gallivm, cache, block_index);
    367    }
    368 #if LP_BUILD_FORMAT_CACHE_DEBUG
    369    update_cache_access(gallivm, cache, n,
    370                        LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL);
    371 #endif
    372    return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), "");
    373 }
    374 
    375