Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2009 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 
     29 #include "pipe/p_defines.h"
     30 
     31 #include "util/u_format.h"
     32 #include "util/u_memory.h"
     33 #include "util/u_string.h"
     34 
     35 #include "lp_bld_type.h"
     36 #include "lp_bld_const.h"
     37 #include "lp_bld_conv.h"
     38 #include "lp_bld_swizzle.h"
     39 #include "lp_bld_gather.h"
     40 #include "lp_bld_debug.h"
     41 #include "lp_bld_format.h"
     42 
     43 
     44 void
     45 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
     46                             struct lp_build_context *bld,
     47                             const LLVMValueRef *unswizzled,
     48                             LLVMValueRef swizzled_out[4])
     49 {
     50    assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
     51    assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
     52 
     53    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
     54       /*
     55        * Return zzz1 for depth-stencil formats.
     56        *
     57        * XXX: Allow to control the depth swizzle with an additional parameter,
     58        * as the caller may wish another depth swizzle, or retain the stencil
     59        * value.
     60        */
     61       enum util_format_swizzle swizzle = format_desc->swizzle[0];
     62       LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
     63       swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth;
     64       swizzled_out[3] = bld->one;
     65    }
     66    else {
     67       unsigned chan;
     68       for (chan = 0; chan < 4; ++chan) {
     69          enum util_format_swizzle swizzle = format_desc->swizzle[chan];
     70          swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
     71       }
     72    }
     73 }
     74 
     75 
     76 /**
     77  * Unpack several pixels in SoA.
     78  *
     79  * It takes a vector of packed pixels:
     80  *
     81  *   packed = {P0, P1, P2, P3, ..., Pn}
     82  *
     83  * And will produce four vectors:
     84  *
     85  *   red    = {R0, R1, R2, R3, ..., Rn}
     86  *   green  = {G0, G1, G2, G3, ..., Gn}
     87  *   blue   = {B0, B1, B2, B3, ..., Bn}
     88  *   alpha  = {A0, A1, A2, A3, ..., An}
     89  *
     90  * It requires that a packed pixel fits into an element of the output
     91  * channels. The common case is when converting pixel with a depth of 32 bit or
     92  * less into floats.
     93  *
     94  * \param format_desc  the format of the 'packed' incoming pixel vector
     95  * \param type  the desired type for rgba_out (type.length = n, above)
     96  * \param packed  the incoming vector of packed pixels
     97  * \param rgba_out  returns the SoA R,G,B,A vectors
     98  */
     99 void
    100 lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
    101                          const struct util_format_description *format_desc,
    102                          struct lp_type type,
    103                          LLVMValueRef packed,
    104                          LLVMValueRef rgba_out[4])
    105 {
    106    LLVMBuilderRef builder = gallivm->builder;
    107    struct lp_build_context bld;
    108    LLVMValueRef inputs[4];
    109    unsigned start;
    110    unsigned chan;
    111 
    112    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
    113    assert(format_desc->block.width == 1);
    114    assert(format_desc->block.height == 1);
    115    assert(format_desc->block.bits <= type.width);
    116    /* FIXME: Support more output types */
    117    assert(type.floating);
    118    assert(type.width == 32);
    119 
    120    lp_build_context_init(&bld, gallivm, type);
    121 
    122    /* Decode the input vector components */
    123    start = 0;
    124    for (chan = 0; chan < format_desc->nr_channels; ++chan) {
    125       const unsigned width = format_desc->channel[chan].size;
    126       const unsigned stop = start + width;
    127       LLVMValueRef input;
    128 
    129       input = packed;
    130 
    131       switch(format_desc->channel[chan].type) {
    132       case UTIL_FORMAT_TYPE_VOID:
    133          input = lp_build_undef(gallivm, type);
    134          break;
    135 
    136       case UTIL_FORMAT_TYPE_UNSIGNED:
    137          /*
    138           * Align the LSB
    139           */
    140 
    141          if (start) {
    142             input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
    143          }
    144 
    145          /*
    146           * Zero the MSBs
    147           */
    148 
    149          if (stop < format_desc->block.bits) {
    150             unsigned mask = ((unsigned long long)1 << width) - 1;
    151             input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
    152          }
    153 
    154          /*
    155           * Type conversion
    156           */
    157 
    158          if (type.floating) {
    159             if(format_desc->channel[chan].normalized)
    160                input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
    161             else
    162                input = LLVMBuildSIToFP(builder, input,
    163                                        lp_build_vec_type(gallivm, type), "");
    164          }
    165          else {
    166             /* FIXME */
    167             assert(0);
    168             input = lp_build_undef(gallivm, type);
    169          }
    170 
    171          break;
    172 
    173       case UTIL_FORMAT_TYPE_SIGNED:
    174          /*
    175           * Align the sign bit first.
    176           */
    177 
    178          if (stop < type.width) {
    179             unsigned bits = type.width - stop;
    180             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
    181             input = LLVMBuildShl(builder, input, bits_val, "");
    182          }
    183 
    184          /*
    185           * Align the LSB (with an arithmetic shift to preserve the sign)
    186           */
    187 
    188          if (format_desc->channel[chan].size < type.width) {
    189             unsigned bits = type.width - format_desc->channel[chan].size;
    190             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
    191             input = LLVMBuildAShr(builder, input, bits_val, "");
    192          }
    193 
    194          /*
    195           * Type conversion
    196           */
    197 
    198          if (type.floating) {
    199             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
    200             if (format_desc->channel[chan].normalized) {
    201                double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
    202                LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
    203                input = LLVMBuildFMul(builder, input, scale_val, "");
    204             }
    205          }
    206          else {
    207             /* FIXME */
    208             assert(0);
    209             input = lp_build_undef(gallivm, type);
    210          }
    211 
    212          break;
    213 
    214       case UTIL_FORMAT_TYPE_FLOAT:
    215          if (type.floating) {
    216             assert(start == 0);
    217             assert(stop == 32);
    218             assert(type.width == 32);
    219             input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
    220          }
    221          else {
    222             /* FIXME */
    223             assert(0);
    224             input = lp_build_undef(gallivm, type);
    225          }
    226          break;
    227 
    228       case UTIL_FORMAT_TYPE_FIXED:
    229          if (type.floating) {
    230             double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
    231             LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
    232             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
    233             input = LLVMBuildFMul(builder, input, scale_val, "");
    234          }
    235          else {
    236             /* FIXME */
    237             assert(0);
    238             input = lp_build_undef(gallivm, type);
    239          }
    240          break;
    241 
    242       default:
    243          assert(0);
    244          input = lp_build_undef(gallivm, type);
    245          break;
    246       }
    247 
    248       inputs[chan] = input;
    249 
    250       start = stop;
    251    }
    252 
    253    lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
    254 }
    255 
    256 
    257 void
    258 lp_build_rgba8_to_f32_soa(struct gallivm_state *gallivm,
    259                           struct lp_type dst_type,
    260                           LLVMValueRef packed,
    261                           LLVMValueRef *rgba)
    262 {
    263    LLVMBuilderRef builder = gallivm->builder;
    264    LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
    265    unsigned chan;
    266 
    267    packed = LLVMBuildBitCast(builder, packed,
    268                              lp_build_int_vec_type(gallivm, dst_type), "");
    269 
    270    /* Decode the input vector components */
    271    for (chan = 0; chan < 4; ++chan) {
    272       unsigned start = chan*8;
    273       unsigned stop = start + 8;
    274       LLVMValueRef input;
    275 
    276       input = packed;
    277 
    278       if (start)
    279          input = LLVMBuildLShr(builder, input,
    280                                lp_build_const_int_vec(gallivm, dst_type, start), "");
    281 
    282       if (stop < 32)
    283          input = LLVMBuildAnd(builder, input, mask, "");
    284 
    285       input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
    286 
    287       rgba[chan] = input;
    288    }
    289 }
    290 
    291 
    292 
    293 /**
    294  * Fetch a texels from a texture, returning them in SoA layout.
    295  *
    296  * \param type  the desired return type for 'rgba'.  The vector length
    297  *              is the number of texels to fetch
    298  *
    299  * \param base_ptr  points to start of the texture image block.  For non-
    300  *                  compressed formats, this simply points to the texel.
    301  *                  For compressed formats, it points to the start of the
    302  *                  compressed data block.
    303  *
    304  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
    305  *              these will always be (0,0).  For compressed formats, i will
    306  *              be in [0, block_width-1] and j will be in [0, block_height-1].
    307  */
    308 void
    309 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
    310                         const struct util_format_description *format_desc,
    311                         struct lp_type type,
    312                         LLVMValueRef base_ptr,
    313                         LLVMValueRef offset,
    314                         LLVMValueRef i,
    315                         LLVMValueRef j,
    316                         LLVMValueRef rgba_out[4])
    317 {
    318    LLVMBuilderRef builder = gallivm->builder;
    319 
    320    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
    321        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
    322         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
    323        format_desc->block.width == 1 &&
    324        format_desc->block.height == 1 &&
    325        format_desc->block.bits <= type.width &&
    326        (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
    327         format_desc->channel[0].size == 32))
    328    {
    329       /*
    330        * The packed pixel fits into an element of the destination format. Put
    331        * the packed pixels into a vector and extract each component for all
    332        * vector elements in parallel.
    333        */
    334 
    335       LLVMValueRef packed;
    336 
    337       /*
    338        * gather the texels from the texture
    339        * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
    340        */
    341       packed = lp_build_gather(gallivm,
    342                                type.length,
    343                                format_desc->block.bits,
    344                                type.width,
    345                                base_ptr, offset);
    346 
    347       /*
    348        * convert texels to float rgba
    349        */
    350       lp_build_unpack_rgba_soa(gallivm,
    351                                format_desc,
    352                                type,
    353                                packed, rgba_out);
    354       return;
    355    }
    356 
    357    /*
    358     * Try calling lp_build_fetch_rgba_aos for all pixels.
    359     */
    360 
    361    if (util_format_fits_8unorm(format_desc) &&
    362        type.floating && type.width == 32 &&
    363        (type.length == 1 || (type.length % 4 == 0))) {
    364       struct lp_type tmp_type;
    365       LLVMValueRef tmp;
    366 
    367       memset(&tmp_type, 0, sizeof tmp_type);
    368       tmp_type.width = 8;
    369       tmp_type.length = type.length * 4;
    370       tmp_type.norm = TRUE;
    371 
    372       tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
    373                                     base_ptr, offset, i, j);
    374 
    375       lp_build_rgba8_to_f32_soa(gallivm,
    376                                 type,
    377                                 tmp,
    378                                 rgba_out);
    379 
    380       return;
    381    }
    382 
    383    /*
    384     * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
    385     *
    386     * This is not the most efficient way of fetching pixels, as we
    387     * miss some opportunities to do vectorization, but this is
    388     * convenient for formats or scenarios for which there was no
    389     * opportunity or incentive to optimize.
    390     */
    391 
    392    {
    393       unsigned k, chan;
    394       struct lp_type tmp_type;
    395 
    396       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
    397          debug_printf("%s: scalar unpacking of %s\n",
    398                       __FUNCTION__, format_desc->short_name);
    399       }
    400 
    401       tmp_type = type;
    402       tmp_type.length = 4;
    403 
    404       for (chan = 0; chan < 4; ++chan) {
    405          rgba_out[chan] = lp_build_undef(gallivm, type);
    406       }
    407 
    408       /* loop over number of pixels */
    409       for(k = 0; k < type.length; ++k) {
    410          LLVMValueRef index = lp_build_const_int32(gallivm, k);
    411          LLVMValueRef offset_elem;
    412          LLVMValueRef i_elem, j_elem;
    413          LLVMValueRef tmp;
    414 
    415          offset_elem = LLVMBuildExtractElement(builder, offset,
    416                                                index, "");
    417 
    418          i_elem = LLVMBuildExtractElement(builder, i, index, "");
    419          j_elem = LLVMBuildExtractElement(builder, j, index, "");
    420 
    421          /* Get a single float[4]={R,G,B,A} pixel */
    422          tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
    423                                        base_ptr, offset_elem,
    424                                        i_elem, j_elem);
    425 
    426          /*
    427           * Insert the AoS tmp value channels into the SoA result vectors at
    428           * position = 'index'.
    429           */
    430          for (chan = 0; chan < 4; ++chan) {
    431             LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
    432             tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
    433             rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
    434                                                     tmp_chan, index, "");
    435          }
    436       }
    437    }
    438 }
    439