Home | History | Annotate | Download | only in radeon
      1 /*
      2  * Copyright (C) 2010 Maciej Cencora <m.cencora (at) gmail.com>
      3  *
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining
      7  * a copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sublicense, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial
     16  * portions of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  */
     27 
     28 #include "radeon_tile.h"
     29 
     30 #include <stdint.h>
     31 #include <string.h>
     32 
     33 #include "main/macros.h"
     34 #include "radeon_debug.h"
     35 
     36 #define MICRO_TILE_SIZE 32
     37 
     38 static void micro_tile_8_x_4_8bit(const void * const src, unsigned src_pitch,
     39                                   void * const dst, unsigned dst_pitch,
     40                                   unsigned width, unsigned height)
     41 {
     42     unsigned row; /* current source row */
     43     unsigned col; /* current source column */
     44     unsigned k; /* number of processed tiles */
     45     const unsigned tile_width = 8, tile_height = 4;
     46     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
     47 
     48     k = 0;
     49     for (row = 0; row < height; row += tile_height)
     50     {
     51         for (col = 0; col < width; col += tile_width, ++k)
     52         {
     53             uint8_t *src2 = (uint8_t *)src + src_pitch * row + col;
     54             uint8_t *dst2 = (uint8_t *)dst + row * dst_pitch +
     55                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
     56             unsigned j;
     57 
     58             for (j = 0; j < MIN2(tile_height, height - row); ++j)
     59             {
     60                 unsigned columns = MIN2(tile_width, width - col);
     61                 memcpy(dst2, src2, columns * sizeof(uint8_t));
     62                 dst2 += tile_width;
     63                 src2 += src_pitch;
     64             }
     65         }
     66     }
     67 }
     68 
     69 static void micro_tile_4_x_4_16bit(const void * const src, unsigned src_pitch,
     70                                    void * const dst, unsigned dst_pitch,
     71                                    unsigned width, unsigned height)
     72 {
     73     unsigned row; /* current source row */
     74     unsigned col; /* current source column */
     75     unsigned k; /* number of processed tiles */
     76     const unsigned tile_width = 4, tile_height = 4;
     77     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
     78 
     79     k = 0;
     80     for (row = 0; row < height; row += tile_height)
     81     {
     82         for (col = 0; col < width; col += tile_width, ++k)
     83         {
     84             uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
     85             uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
     86                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
     87             unsigned j;
     88 
     89             for (j = 0; j < MIN2(tile_height, height - row); ++j)
     90             {
     91                 unsigned columns = MIN2(tile_width, width - col);
     92                 memcpy(dst2, src2, columns * sizeof(uint16_t));
     93                 dst2 += tile_width;
     94                 src2 += src_pitch;
     95             }
     96         }
     97     }
     98 }
     99 
    100 static void micro_tile_8_x_2_16bit(const void * const src, unsigned src_pitch,
    101                                    void * const dst, unsigned dst_pitch,
    102                                    unsigned width, unsigned height)
    103 {
    104     unsigned row; /* current source row */
    105     unsigned col; /* current source column */
    106     unsigned k; /* number of processed tiles */
    107     const unsigned tile_width = 8, tile_height = 2;
    108     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
    109 
    110     k = 0;
    111     for (row = 0; row < height; row += tile_height)
    112     {
    113         for (col = 0; col < width; col += tile_width, ++k)
    114         {
    115             uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
    116             uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
    117                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
    118             unsigned j;
    119 
    120             for (j = 0; j < MIN2(tile_height, height - row); ++j)
    121             {
    122                 unsigned columns = MIN2(tile_width, width - col);
    123                 memcpy(dst2, src2, columns * sizeof(uint16_t));
    124                 dst2 += tile_width;
    125                 src2 += src_pitch;
    126             }
    127         }
    128     }
    129 }
    130 
    131 static void micro_tile_4_x_2_32bit(const void * const src, unsigned src_pitch,
    132                                    void * const dst, unsigned dst_pitch,
    133                                    unsigned width, unsigned height)
    134 {
    135     unsigned row; /* current source row */
    136     unsigned col; /* current source column */
    137     unsigned k; /* number of processed tiles */
    138     const unsigned tile_width = 4, tile_height = 2;
    139     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
    140 
    141     k = 0;
    142     for (row = 0; row < height; row += tile_height)
    143     {
    144         for (col = 0; col < width; col += tile_width, ++k)
    145         {
    146             uint32_t *src2 = (uint32_t *)src + src_pitch * row + col;
    147             uint32_t *dst2 = (uint32_t *)dst + row * dst_pitch +
    148                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
    149             unsigned j;
    150 
    151             for (j = 0; j < MIN2(tile_height, height - row); ++j)
    152             {
    153                 unsigned columns = MIN2(tile_width, width - col);
    154                 memcpy(dst2, src2, columns * sizeof(uint32_t));
    155                 dst2 += tile_width;
    156                 src2 += src_pitch;
    157             }
    158         }
    159     }
    160 }
    161 
    162 static void micro_tile_2_x_2_64bit(const void * const src, unsigned src_pitch,
    163                                    void * const dst, unsigned dst_pitch,
    164                                    unsigned width, unsigned height)
    165 {
    166     unsigned row; /* current source row */
    167     unsigned col; /* current source column */
    168     unsigned k; /* number of processed tiles */
    169     const unsigned tile_width = 2, tile_height = 2;
    170     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
    171 
    172     k = 0;
    173     for (row = 0; row < height; row += tile_height)
    174     {
    175         for (col = 0; col < width; col += tile_width, ++k)
    176         {
    177             uint64_t *src2 = (uint64_t *)src + src_pitch * row + col;
    178             uint64_t *dst2 = (uint64_t *)dst + row * dst_pitch +
    179                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
    180             unsigned j;
    181 
    182             for (j = 0; j < MIN2(tile_height, height - row); ++j)
    183             {
    184                 unsigned columns = MIN2(tile_width, width - col);
    185                 memcpy(dst2, src2, columns * sizeof(uint64_t));
    186                 dst2 += tile_width;
    187                 src2 += src_pitch;
    188             }
    189         }
    190     }
    191 }
    192 
    193 static void micro_tile_1_x_1_128bit(const void * src, unsigned src_pitch,
    194                                     void * dst, unsigned dst_pitch,
    195                                     unsigned width, unsigned height)
    196 {
    197     unsigned i, j;
    198     const unsigned elem_size = 16; /* sizeof(uint128_t) */
    199 
    200     for (j = 0; j < height; ++j)
    201     {
    202         for (i = 0; i < width; ++i)
    203         {
    204             memcpy(dst, src, width * elem_size);
    205             dst += dst_pitch * elem_size;
    206             src += src_pitch * elem_size;
    207         }
    208     }
    209 }
    210 
    211 void tile_image(const void * src, unsigned src_pitch,
    212                 void *dst, unsigned dst_pitch,
    213                 gl_format format, unsigned width, unsigned height)
    214 {
    215     assert(src_pitch >= width);
    216     assert(dst_pitch >= width);
    217 
    218     radeon_print(RADEON_TEXTURE, RADEON_TRACE,
    219                  "Software tiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
    220                  src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
    221 
    222     switch (_mesa_get_format_bytes(format))
    223     {
    224         case 16:
    225             micro_tile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
    226             break;
    227         case 8:
    228             micro_tile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
    229             break;
    230         case 4:
    231             micro_tile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
    232             break;
    233         case 2:
    234             if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
    235             {
    236                 micro_tile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
    237             }
    238             else
    239             {
    240                 micro_tile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
    241             }
    242             break;
    243         case 1:
    244             micro_tile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
    245             break;
    246         default:
    247             assert(0);
    248             break;
    249     }
    250 }
    251 
    252 static void micro_untile_8_x_4_8bit(const void * const src, unsigned src_pitch,
    253                                     void * const dst, unsigned dst_pitch,
    254                                     unsigned width, unsigned height)
    255 {
    256     unsigned row; /* current destination row */
    257     unsigned col; /* current destination column */
    258     unsigned k; /* current tile number */
    259     const unsigned tile_width = 8, tile_height = 4;
    260     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
    261 
    262     assert(src_pitch % tile_width == 0);
    263 
    264     k = 0;
    265     for (row = 0; row < height; row += tile_height)
    266     {
    267         for (col = 0; col < width; col += tile_width, ++k)
    268         {
    269             uint8_t *src2 = (uint8_t *)src + row * src_pitch +
    270                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
    271             uint8_t *dst2 = (uint8_t *)dst + dst_pitch * row + col;
    272             unsigned j;
    273 
    274             for (j = 0; j < MIN2(tile_height, height - row); ++j)
    275             {
    276                 unsigned columns = MIN2(tile_width, width - col);
    277                 memcpy(dst2, src2, columns * sizeof(uint8_t));
    278                 dst2 += dst_pitch;
    279                 src2 += tile_width;
    280             }
    281         }
    282     }
    283 }
    284 
    285 static void micro_untile_8_x_2_16bit(const void * const src, unsigned src_pitch,
    286                                      void * const dst, unsigned dst_pitch,
    287                                      unsigned width, unsigned height)
    288 {
    289     unsigned row; /* current destination row */
    290     unsigned col; /* current destination column */
    291     unsigned k; /* current tile number */
    292     const unsigned tile_width = 8, tile_height = 2;
    293     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
    294 
    295     assert(src_pitch % tile_width == 0);
    296 
    297     k = 0;
    298     for (row = 0; row < height; row += tile_height)
    299     {
    300         for (col = 0; col < width; col += tile_width, ++k)
    301         {
    302             uint16_t *src2 = (uint16_t *)src + row * src_pitch +
    303                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
    304             uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
    305             unsigned j;
    306 
    307             for (j = 0; j < MIN2(tile_height, height - row); ++j)
    308             {
    309                 unsigned columns = MIN2(tile_width, width - col);
    310                 memcpy(dst2, src2, columns * sizeof(uint16_t));
    311                 dst2 += dst_pitch;
    312                 src2 += tile_width;
    313             }
    314         }
    315     }
    316 }
    317 
    318 static void micro_untile_4_x_4_16bit(const void * const src, unsigned src_pitch,
    319                                      void * const dst, unsigned dst_pitch,
    320                                      unsigned width, unsigned height)
    321 {
    322     unsigned row; /* current destination row */
    323     unsigned col; /* current destination column */
    324     unsigned k; /* current tile number */
    325     const unsigned tile_width = 4, tile_height = 4;
    326     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
    327 
    328     assert(src_pitch % tile_width == 0);
    329 
    330     k = 0;
    331     for (row = 0; row < height; row += tile_height)
    332     {
    333         for (col = 0; col < width; col += tile_width, ++k)
    334         {
    335             uint16_t *src2 = (uint16_t *)src + row * src_pitch +
    336                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
    337             uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
    338             unsigned j;
    339 
    340             for (j = 0; j < MIN2(tile_height, height - row); ++j)
    341             {
    342                 unsigned columns = MIN2(tile_width, width - col);
    343                 memcpy(dst2, src2, columns * sizeof(uint16_t));
    344                 dst2 += dst_pitch;
    345                 src2 += tile_width;
    346             }
    347         }
    348     }
    349 }
    350 
    351 static void micro_untile_4_x_2_32bit(const void * const src, unsigned src_pitch,
    352                                      void * const dst, unsigned dst_pitch,
    353                                      unsigned width, unsigned height)
    354 {
    355     unsigned row; /* current destination row */
    356     unsigned col; /* current destination column */
    357     unsigned k; /* current tile number */
    358     const unsigned tile_width = 4, tile_height = 2;
    359     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
    360 
    361     assert(src_pitch % tile_width == 0);
    362 
    363     k = 0;
    364     for (row = 0; row < height; row += tile_height)
    365     {
    366         for (col = 0; col < width; col += tile_width, ++k)
    367         {
    368             uint32_t *src2 = (uint32_t *)src + row * src_pitch +
    369                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
    370             uint32_t *dst2 = (uint32_t *)dst + dst_pitch * row + col;
    371             unsigned j;
    372 
    373             for (j = 0; j < MIN2(tile_height, height - row); ++j)
    374             {
    375                 unsigned columns = MIN2(tile_width, width - col);
    376                 memcpy(dst2, src2, columns * sizeof(uint32_t));
    377                 dst2 += dst_pitch;
    378                 src2 += tile_width;
    379             }
    380         }
    381     }
    382 }
    383 
    384 static void micro_untile_2_x_2_64bit(const void * const src, unsigned src_pitch,
    385                                      void * const dst, unsigned dst_pitch,
    386                                      unsigned width, unsigned height)
    387 {
    388     unsigned row; /* current destination row */
    389     unsigned col; /* current destination column */
    390     unsigned k; /* current tile number */
    391     const unsigned tile_width = 2, tile_height = 2;
    392     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
    393 
    394     assert(src_pitch % tile_width == 0);
    395 
    396     k = 0;
    397     for (row = 0; row < height; row += tile_height)
    398     {
    399         for (col = 0; col < width; col += tile_width, ++k)
    400         {
    401             uint64_t *src2 = (uint64_t *)src + row * src_pitch +
    402                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
    403             uint64_t *dst2 = (uint64_t *)dst + dst_pitch * row + col;
    404             unsigned j;
    405 
    406             for (j = 0; j < MIN2(tile_height, height - row); ++j)
    407             {
    408                 unsigned columns = MIN2(tile_width, width - col);
    409                 memcpy(dst2, src2, columns * sizeof(uint64_t));
    410                 dst2 += dst_pitch;
    411                 src2 += tile_width;
    412             }
    413         }
    414     }
    415 }
    416 
    417 static void micro_untile_1_x_1_128bit(const void * src, unsigned src_pitch,
    418                                       void * dst, unsigned dst_pitch,
    419                                       unsigned width, unsigned height)
    420 {
    421     unsigned i, j;
    422     const unsigned elem_size = 16; /* sizeof(uint128_t) */
    423 
    424     for (j = 0; j < height; ++j)
    425     {
    426         for (i = 0; i < width; ++i)
    427         {
    428             memcpy(dst, src, width * elem_size);
    429             dst += dst_pitch * elem_size;
    430             src += src_pitch * elem_size;
    431         }
    432     }
    433 }
    434 
    435 void untile_image(const void * src, unsigned src_pitch,
    436                   void *dst, unsigned dst_pitch,
    437                   gl_format format, unsigned width, unsigned height)
    438 {
    439     assert(src_pitch >= width);
    440     assert(dst_pitch >= width);
    441 
    442     radeon_print(RADEON_TEXTURE, RADEON_TRACE,
    443                  "Software untiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
    444                  src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
    445 
    446     switch (_mesa_get_format_bytes(format))
    447     {
    448         case 16:
    449             micro_untile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
    450             break;
    451         case 8:
    452             micro_untile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
    453             break;
    454         case 4:
    455             micro_untile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
    456             break;
    457         case 2:
    458             if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
    459             {
    460                 micro_untile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
    461             }
    462             else
    463             {
    464                 micro_untile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
    465             }
    466             break;
    467         case 1:
    468             micro_untile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
    469             break;
    470         default:
    471             assert(0);
    472             break;
    473     }
    474 }
    475 
    476 void get_tile_size(gl_format format, unsigned *block_width, unsigned *block_height)
    477 {
    478     switch (_mesa_get_format_bytes(format))
    479     {
    480         case 16:
    481             *block_width = 1;
    482             *block_height = 1;
    483             break;
    484         case 8:
    485             *block_width = 2;
    486             *block_height = 2;
    487             break;
    488         case 4:
    489             *block_width = 4;
    490             *block_height = 2;
    491             break;
    492         case 2:
    493             if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
    494             {
    495                 *block_width = 4;
    496                 *block_height = 4;
    497             }
    498             else
    499             {
    500                 *block_width = 8;
    501                 *block_height = 2;
    502             }
    503             break;
    504         case 1:
    505             *block_width = 8;
    506             *block_height = 4;
    507             break;
    508         default:
    509             assert(0);
    510             break;
    511     }
    512 }
    513