Home | History | Annotate | Download | only in llvmpipe
      1 /**************************************************************************
      2  *
      3  * Copyright 2010 VMware, Inc.  All Rights Reserved.
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the
      7  * "Software"), to deal in the Software without restriction, including
      8  * without limitation the rights to use, copy, modify, merge, publish,
      9  * distribute, sub license, and/or sell copies of the Software, and to
     10  * permit persons to whom the Software is furnished to do so, subject to
     11  * the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the
     14  * next paragraph) shall be included in all copies or substantial portions
     15  * of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     20  * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
     21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     22  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     23  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     24  *
     25  **************************************************************************/
     26 
     27 
     28 /**
     29  * Code to convert images from tiled to linear and back.
     30  * XXX there are quite a few assumptions about color and z/stencil being
     31  * 32bpp.
     32  */
     33 
     34 
     35 #include "util/u_format.h"
     36 #include "util/u_memory.h"
     37 #include "lp_tile_soa.h"
     38 #include "lp_tile_image.h"
     39 
     40 
     41 #define BYTES_PER_TILE (TILE_SIZE * TILE_SIZE * 4)
     42 
     43 
     44 /**
     45  * Untile a 4x4 block of 32-bit words (all contiguous) to linear layout
     46  * at dst, with dst_stride words between rows.
     47  */
     48 static void
     49 untile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned dst_stride)
     50 {
     51    uint32_t *d0 = dst;
     52    uint32_t *d1 = d0 + dst_stride;
     53    uint32_t *d2 = d1 + dst_stride;
     54    uint32_t *d3 = d2 + dst_stride;
     55 
     56    d0[0] = src[0];   d0[1] = src[1];   d0[2] = src[4];   d0[3] = src[5];
     57    d1[0] = src[2];   d1[1] = src[3];   d1[2] = src[6];   d1[3] = src[7];
     58    d2[0] = src[8];   d2[1] = src[9];   d2[2] = src[12];  d2[3] = src[13];
     59    d3[0] = src[10];  d3[1] = src[11];  d3[2] = src[14];  d3[3] = src[15];
     60 }
     61 
     62 
     63 
     64 /**
     65  * Untile a 4x4 block of 16-bit words (all contiguous) to linear layout
     66  * at dst, with dst_stride words between rows.
     67  */
     68 static void
     69 untile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned dst_stride)
     70 {
     71    uint16_t *d0 = dst;
     72    uint16_t *d1 = d0 + dst_stride;
     73    uint16_t *d2 = d1 + dst_stride;
     74    uint16_t *d3 = d2 + dst_stride;
     75 
     76    d0[0] = src[0];   d0[1] = src[1];   d0[2] = src[4];   d0[3] = src[5];
     77    d1[0] = src[2];   d1[1] = src[3];   d1[2] = src[6];   d1[3] = src[7];
     78    d2[0] = src[8];   d2[1] = src[9];   d2[2] = src[12];  d2[3] = src[13];
     79    d3[0] = src[10];  d3[1] = src[11];  d3[2] = src[14];  d3[3] = src[15];
     80 }
     81 
     82 
     83 
     84 /**
     85  * Convert a 4x4 rect of 32-bit words from a linear layout into tiled
     86  * layout (in which all 16 words are contiguous).
     87  */
     88 static void
     89 tile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned src_stride)
     90 {
     91    const uint32_t *s0 = src;
     92    const uint32_t *s1 = s0 + src_stride;
     93    const uint32_t *s2 = s1 + src_stride;
     94    const uint32_t *s3 = s2 + src_stride;
     95 
     96    dst[0] = s0[0];   dst[1] = s0[1];   dst[4] = s0[2];   dst[5] = s0[3];
     97    dst[2] = s1[0];   dst[3] = s1[1];   dst[6] = s1[2];   dst[7] = s1[3];
     98    dst[8] = s2[0];   dst[9] = s2[1];   dst[12] = s2[2];  dst[13] = s2[3];
     99    dst[10] = s3[0];  dst[11] = s3[1];  dst[14] = s3[2];  dst[15] = s3[3];
    100 }
    101 
    102 
    103 
    104 /**
    105  * Convert a 4x4 rect of 16-bit words from a linear layout into tiled
    106  * layout (in which all 16 words are contiguous).
    107  */
    108 static void
    109 tile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned src_stride)
    110 {
    111    const uint16_t *s0 = src;
    112    const uint16_t *s1 = s0 + src_stride;
    113    const uint16_t *s2 = s1 + src_stride;
    114    const uint16_t *s3 = s2 + src_stride;
    115 
    116    dst[0] = s0[0];   dst[1] = s0[1];   dst[4] = s0[2];   dst[5] = s0[3];
    117    dst[2] = s1[0];   dst[3] = s1[1];   dst[6] = s1[2];   dst[7] = s1[3];
    118    dst[8] = s2[0];   dst[9] = s2[1];   dst[12] = s2[2];  dst[13] = s2[3];
    119    dst[10] = s3[0];  dst[11] = s3[1];  dst[14] = s3[2];  dst[15] = s3[3];
    120 }
    121 
    122 
    123 
    124 /**
    125  * Convert a tiled image into a linear image.
    126  * \param dst_stride  dest row stride in bytes
    127  */
    128 void
    129 lp_tiled_to_linear(const void *src, void *dst,
    130                    unsigned x, unsigned y,
    131                    unsigned width, unsigned height,
    132                    enum pipe_format format,
    133                    unsigned dst_stride,
    134                    unsigned tiles_per_row)
    135 {
    136    assert(x % TILE_SIZE == 0);
    137    assert(y % TILE_SIZE == 0);
    138    /*assert(width % TILE_SIZE == 0);
    139      assert(height % TILE_SIZE == 0);*/
    140 
    141    /* Note that Z/stencil surfaces use a different tiling size than
    142     * color surfaces.
    143     */
    144    if (util_format_is_depth_or_stencil(format)) {
    145       const uint bpp = util_format_get_blocksize(format);
    146       const uint src_stride = dst_stride * TILE_VECTOR_WIDTH;
    147       const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
    148       const uint tiles_per_row = src_stride / (tile_w * tile_h * bpp);
    149 
    150       dst_stride /= bpp;   /* convert from bytes to words */
    151 
    152       if (bpp == 4) {
    153          const uint32_t *src32 = (const uint32_t *) src;
    154          uint32_t *dst32 = (uint32_t *) dst;
    155          uint i, j;
    156 
    157          for (j = 0; j < height; j += tile_h) {
    158             for (i = 0; i < width; i += tile_w) {
    159                /* compute offsets in 32-bit words */
    160                uint ii = i + x, jj = j + y;
    161                uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
    162                   * (tile_w * tile_h);
    163                uint dst_offset = jj * dst_stride + ii;
    164                untile_4_4_uint32(src32 + src_offset,
    165                                  dst32 + dst_offset,
    166                                  dst_stride);
    167             }
    168          }
    169       }
    170       else {
    171          const uint16_t *src16 = (const uint16_t *) src;
    172          uint16_t *dst16 = (uint16_t *) dst;
    173          uint i, j;
    174 
    175          assert(bpp == 2);
    176 
    177          for (j = 0; j < height; j += tile_h) {
    178             for (i = 0; i < width; i += tile_w) {
    179                /* compute offsets in 16-bit words */
    180                uint ii = i + x, jj = j + y;
    181                uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
    182                   * (tile_w * tile_h);
    183                uint dst_offset = jj * dst_stride + ii;
    184                untile_4_4_uint16(src16 + src_offset,
    185                                  dst16 + dst_offset,
    186                                  dst_stride);
    187             }
    188          }
    189       }
    190    }
    191    else {
    192       /* color image */
    193       const uint bpp = 4;
    194       const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
    195       const uint bytes_per_tile = tile_w * tile_h * bpp;
    196       uint i, j;
    197 
    198       for (j = 0; j < height; j += tile_h) {
    199          for (i = 0; i < width; i += tile_w) {
    200             uint ii = i + x, jj = j + y;
    201             uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w);
    202             uint byte_offset = tile_offset * bytes_per_tile;
    203             const uint8_t *src_tile = (uint8_t *) src + byte_offset;
    204 
    205             lp_tile_unswizzle_4ub(format,
    206                               src_tile,
    207                               dst, dst_stride,
    208                               ii, jj);
    209          }
    210       }
    211    }
    212 }
    213 
    214 
    215 /**
    216  * Convert a linear image into a tiled image.
    217  * \param src_stride  source row stride in bytes
    218  */
    219 void
    220 lp_linear_to_tiled(const void *src, void *dst,
    221                    unsigned x, unsigned y,
    222                    unsigned width, unsigned height,
    223                    enum pipe_format format,
    224                    unsigned src_stride,
    225                    unsigned tiles_per_row)
    226 {
    227    assert(x % TILE_SIZE == 0);
    228    assert(y % TILE_SIZE == 0);
    229    /*
    230    assert(width % TILE_SIZE == 0);
    231    assert(height % TILE_SIZE == 0);
    232    */
    233 
    234    if (util_format_is_depth_or_stencil(format)) {
    235       const uint bpp = util_format_get_blocksize(format);
    236       const uint dst_stride = src_stride * TILE_VECTOR_WIDTH;
    237       const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
    238       const uint tiles_per_row = dst_stride / (tile_w * tile_h * bpp);
    239 
    240       src_stride /= bpp;   /* convert from bytes to words */
    241 
    242       if (bpp == 4) {
    243          const uint32_t *src32 = (const uint32_t *) src;
    244          uint32_t *dst32 = (uint32_t *) dst;
    245          uint i, j;
    246 
    247          for (j = 0; j < height; j += tile_h) {
    248             for (i = 0; i < width; i += tile_w) {
    249                /* compute offsets in 32-bit words */
    250                uint ii = i + x, jj = j + y;
    251                uint src_offset = jj * src_stride + ii;
    252                uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
    253                   * (tile_w * tile_h);
    254                tile_4_4_uint32(src32 + src_offset,
    255                                dst32 + dst_offset,
    256                                src_stride);
    257             }
    258          }
    259       }
    260       else {
    261          const uint16_t *src16 = (const uint16_t *) src;
    262          uint16_t *dst16 = (uint16_t *) dst;
    263          uint i, j;
    264 
    265          assert(bpp == 2);
    266 
    267          for (j = 0; j < height; j += tile_h) {
    268             for (i = 0; i < width; i += tile_w) {
    269                /* compute offsets in 16-bit words */
    270                uint ii = i + x, jj = j + y;
    271                uint src_offset = jj * src_stride + ii;
    272                uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
    273                   * (tile_w * tile_h);
    274                tile_4_4_uint16(src16 + src_offset,
    275                                dst16 + dst_offset,
    276                                src_stride);
    277             }
    278          }
    279       }
    280    }
    281    else {
    282       const uint bpp = 4;
    283       const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
    284       const uint bytes_per_tile = tile_w * tile_h * bpp;
    285       uint i, j;
    286 
    287       for (j = 0; j < height; j += TILE_SIZE) {
    288          for (i = 0; i < width; i += TILE_SIZE) {
    289             uint ii = i + x, jj = j + y;
    290             uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w);
    291             uint byte_offset = tile_offset * bytes_per_tile;
    292             uint8_t *dst_tile = (uint8_t *) dst + byte_offset;
    293 
    294             lp_tile_swizzle_4ub(format,
    295                              dst_tile,
    296                              src, src_stride,
    297                              ii, jj);
    298          }
    299       }
    300    }
    301 }
    302 
    303 
    304 /**
    305  * For testing only.
    306  */
    307 void
    308 test_tiled_linear_conversion(void *data,
    309                              enum pipe_format format,
    310                              unsigned width, unsigned height,
    311                              unsigned stride)
    312 {
    313    /* size in tiles */
    314    unsigned wt = (width + TILE_SIZE - 1) / TILE_SIZE;
    315    unsigned ht = (height + TILE_SIZE - 1) / TILE_SIZE;
    316 
    317    uint8_t *tiled = MALLOC(wt * ht * TILE_SIZE * TILE_SIZE * 4);
    318 
    319    /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/
    320 
    321    lp_linear_to_tiled(data, tiled, 0, 0, width, height, format,
    322                       stride, wt);
    323 
    324    lp_tiled_to_linear(tiled, data, 0, 0, width, height, format,
    325                       stride, wt);
    326 
    327    FREE(tiled);
    328 }
    329 
    330