Home | History | Annotate | Download | only in radeon
      1 /*
      2  * Copyright (C) 2010 Maciej Cencora <m.cencora (at) gmail.com>
      3  *
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining
      7  * a copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sublicense, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial
     16  * portions of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  */
     27 
     28 #include "radeon_screen.h"
     29 #include "radeon_tile.h"
     30 
     31 #include <stdint.h>
     32 #include <string.h>
     33 
     34 #include "main/macros.h"
     35 #include "radeon_debug.h"
     36 
     37 #define MICRO_TILE_SIZE 32
     38 
     39 static void micro_tile_8_x_4_8bit(const void * const src, unsigned src_pitch,
     40                                   void * const dst, unsigned dst_pitch,
     41                                   unsigned width, unsigned height)
     42 {
     43     unsigned row; /* current source row */
     44     unsigned col; /* current source column */
     45     unsigned k; /* number of processed tiles */
     46     const unsigned tile_width = 8, tile_height = 4;
     47     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
     48 
     49     k = 0;
     50     for (row = 0; row < height; row += tile_height)
     51     {
     52         for (col = 0; col < width; col += tile_width, ++k)
     53         {
     54             uint8_t *src2 = (uint8_t *)src + src_pitch * row + col;
     55             uint8_t *dst2 = (uint8_t *)dst + row * dst_pitch +
     56                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
     57             unsigned j;
     58 
     59             for (j = 0; j < MIN2(tile_height, height - row); ++j)
     60             {
     61                 unsigned columns = MIN2(tile_width, width - col);
     62                 memcpy(dst2, src2, columns * sizeof(uint8_t));
     63                 dst2 += tile_width;
     64                 src2 += src_pitch;
     65             }
     66         }
     67     }
     68 }
     69 
     70 static void micro_tile_4_x_4_16bit(const void * const src, unsigned src_pitch,
     71                                    void * const dst, unsigned dst_pitch,
     72                                    unsigned width, unsigned height)
     73 {
     74     unsigned row; /* current source row */
     75     unsigned col; /* current source column */
     76     unsigned k; /* number of processed tiles */
     77     const unsigned tile_width = 4, tile_height = 4;
     78     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
     79 
     80     k = 0;
     81     for (row = 0; row < height; row += tile_height)
     82     {
     83         for (col = 0; col < width; col += tile_width, ++k)
     84         {
     85             uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
     86             uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
     87                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
     88             unsigned j;
     89 
     90             for (j = 0; j < MIN2(tile_height, height - row); ++j)
     91             {
     92                 unsigned columns = MIN2(tile_width, width - col);
     93                 memcpy(dst2, src2, columns * sizeof(uint16_t));
     94                 dst2 += tile_width;
     95                 src2 += src_pitch;
     96             }
     97         }
     98     }
     99 }
    100 
    101 static void micro_tile_8_x_2_16bit(const void * const src, unsigned src_pitch,
    102                                    void * const dst, unsigned dst_pitch,
    103                                    unsigned width, unsigned height)
    104 {
    105     unsigned row; /* current source row */
    106     unsigned col; /* current source column */
    107     unsigned k; /* number of processed tiles */
    108     const unsigned tile_width = 8, tile_height = 2;
    109     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
    110 
    111     k = 0;
    112     for (row = 0; row < height; row += tile_height)
    113     {
    114         for (col = 0; col < width; col += tile_width, ++k)
    115         {
    116             uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
    117             uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
    118                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
    119             unsigned j;
    120 
    121             for (j = 0; j < MIN2(tile_height, height - row); ++j)
    122             {
    123                 unsigned columns = MIN2(tile_width, width - col);
    124                 memcpy(dst2, src2, columns * sizeof(uint16_t));
    125                 dst2 += tile_width;
    126                 src2 += src_pitch;
    127             }
    128         }
    129     }
    130 }
    131 
    132 static void micro_tile_4_x_2_32bit(const void * const src, unsigned src_pitch,
    133                                    void * const dst, unsigned dst_pitch,
    134                                    unsigned width, unsigned height)
    135 {
    136     unsigned row; /* current source row */
    137     unsigned col; /* current source column */
    138     unsigned k; /* number of processed tiles */
    139     const unsigned tile_width = 4, tile_height = 2;
    140     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
    141 
    142     k = 0;
    143     for (row = 0; row < height; row += tile_height)
    144     {
    145         for (col = 0; col < width; col += tile_width, ++k)
    146         {
    147             uint32_t *src2 = (uint32_t *)src + src_pitch * row + col;
    148             uint32_t *dst2 = (uint32_t *)dst + row * dst_pitch +
    149                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
    150             unsigned j;
    151 
    152             for (j = 0; j < MIN2(tile_height, height - row); ++j)
    153             {
    154                 unsigned columns = MIN2(tile_width, width - col);
    155                 memcpy(dst2, src2, columns * sizeof(uint32_t));
    156                 dst2 += tile_width;
    157                 src2 += src_pitch;
    158             }
    159         }
    160     }
    161 }
    162 
    163 static void micro_tile_2_x_2_64bit(const void * const src, unsigned src_pitch,
    164                                    void * const dst, unsigned dst_pitch,
    165                                    unsigned width, unsigned height)
    166 {
    167     unsigned row; /* current source row */
    168     unsigned col; /* current source column */
    169     unsigned k; /* number of processed tiles */
    170     const unsigned tile_width = 2, tile_height = 2;
    171     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
    172 
    173     k = 0;
    174     for (row = 0; row < height; row += tile_height)
    175     {
    176         for (col = 0; col < width; col += tile_width, ++k)
    177         {
    178             uint64_t *src2 = (uint64_t *)src + src_pitch * row + col;
    179             uint64_t *dst2 = (uint64_t *)dst + row * dst_pitch +
    180                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
    181             unsigned j;
    182 
    183             for (j = 0; j < MIN2(tile_height, height - row); ++j)
    184             {
    185                 unsigned columns = MIN2(tile_width, width - col);
    186                 memcpy(dst2, src2, columns * sizeof(uint64_t));
    187                 dst2 += tile_width;
    188                 src2 += src_pitch;
    189             }
    190         }
    191     }
    192 }
    193 
    194 static void micro_tile_1_x_1_128bit(const void * src, unsigned src_pitch,
    195                                     void * dst, unsigned dst_pitch,
    196                                     unsigned width, unsigned height)
    197 {
    198     unsigned i, j;
    199     const unsigned elem_size = 16; /* sizeof(uint128_t) */
    200 
    201     for (j = 0; j < height; ++j)
    202     {
    203         for (i = 0; i < width; ++i)
    204         {
    205             memcpy(dst, src, width * elem_size);
    206             dst += dst_pitch * elem_size;
    207             src += src_pitch * elem_size;
    208         }
    209     }
    210 }
    211 
    212 void tile_image(const void * src, unsigned src_pitch,
    213                 void *dst, unsigned dst_pitch,
    214                 mesa_format format, unsigned width, unsigned height)
    215 {
    216     assert(src_pitch >= width);
    217     assert(dst_pitch >= width);
    218 
    219     radeon_print(RADEON_TEXTURE, RADEON_TRACE,
    220                  "Software tiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
    221                  src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
    222 
    223     switch (_mesa_get_format_bytes(format))
    224     {
    225         case 16:
    226             micro_tile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
    227             break;
    228         case 8:
    229             micro_tile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
    230             break;
    231         case 4:
    232             micro_tile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
    233             break;
    234         case 2:
    235             if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
    236             {
    237                 micro_tile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
    238             }
    239             else
    240             {
    241                 micro_tile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
    242             }
    243             break;
    244         case 1:
    245             micro_tile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
    246             break;
    247         default:
    248             assert(0);
    249             break;
    250     }
    251 }
    252 
    253 static void micro_untile_8_x_4_8bit(const void * const src, unsigned src_pitch,
    254                                     void * const dst, unsigned dst_pitch,
    255                                     unsigned width, unsigned height)
    256 {
    257     unsigned row; /* current destination row */
    258     unsigned col; /* current destination column */
    259     unsigned k; /* current tile number */
    260     const unsigned tile_width = 8, tile_height = 4;
    261     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
    262 
    263     assert(src_pitch % tile_width == 0);
    264 
    265     k = 0;
    266     for (row = 0; row < height; row += tile_height)
    267     {
    268         for (col = 0; col < width; col += tile_width, ++k)
    269         {
    270             uint8_t *src2 = (uint8_t *)src + row * src_pitch +
    271                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
    272             uint8_t *dst2 = (uint8_t *)dst + dst_pitch * row + col;
    273             unsigned j;
    274 
    275             for (j = 0; j < MIN2(tile_height, height - row); ++j)
    276             {
    277                 unsigned columns = MIN2(tile_width, width - col);
    278                 memcpy(dst2, src2, columns * sizeof(uint8_t));
    279                 dst2 += dst_pitch;
    280                 src2 += tile_width;
    281             }
    282         }
    283     }
    284 }
    285 
    286 static void micro_untile_8_x_2_16bit(const void * const src, unsigned src_pitch,
    287                                      void * const dst, unsigned dst_pitch,
    288                                      unsigned width, unsigned height)
    289 {
    290     unsigned row; /* current destination row */
    291     unsigned col; /* current destination column */
    292     unsigned k; /* current tile number */
    293     const unsigned tile_width = 8, tile_height = 2;
    294     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
    295 
    296     assert(src_pitch % tile_width == 0);
    297 
    298     k = 0;
    299     for (row = 0; row < height; row += tile_height)
    300     {
    301         for (col = 0; col < width; col += tile_width, ++k)
    302         {
    303             uint16_t *src2 = (uint16_t *)src + row * src_pitch +
    304                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
    305             uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
    306             unsigned j;
    307 
    308             for (j = 0; j < MIN2(tile_height, height - row); ++j)
    309             {
    310                 unsigned columns = MIN2(tile_width, width - col);
    311                 memcpy(dst2, src2, columns * sizeof(uint16_t));
    312                 dst2 += dst_pitch;
    313                 src2 += tile_width;
    314             }
    315         }
    316     }
    317 }
    318 
    319 static void micro_untile_4_x_4_16bit(const void * const src, unsigned src_pitch,
    320                                      void * const dst, unsigned dst_pitch,
    321                                      unsigned width, unsigned height)
    322 {
    323     unsigned row; /* current destination row */
    324     unsigned col; /* current destination column */
    325     unsigned k; /* current tile number */
    326     const unsigned tile_width = 4, tile_height = 4;
    327     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
    328 
    329     assert(src_pitch % tile_width == 0);
    330 
    331     k = 0;
    332     for (row = 0; row < height; row += tile_height)
    333     {
    334         for (col = 0; col < width; col += tile_width, ++k)
    335         {
    336             uint16_t *src2 = (uint16_t *)src + row * src_pitch +
    337                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
    338             uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
    339             unsigned j;
    340 
    341             for (j = 0; j < MIN2(tile_height, height - row); ++j)
    342             {
    343                 unsigned columns = MIN2(tile_width, width - col);
    344                 memcpy(dst2, src2, columns * sizeof(uint16_t));
    345                 dst2 += dst_pitch;
    346                 src2 += tile_width;
    347             }
    348         }
    349     }
    350 }
    351 
    352 static void micro_untile_4_x_2_32bit(const void * const src, unsigned src_pitch,
    353                                      void * const dst, unsigned dst_pitch,
    354                                      unsigned width, unsigned height)
    355 {
    356     unsigned row; /* current destination row */
    357     unsigned col; /* current destination column */
    358     unsigned k; /* current tile number */
    359     const unsigned tile_width = 4, tile_height = 2;
    360     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
    361 
    362     assert(src_pitch % tile_width == 0);
    363 
    364     k = 0;
    365     for (row = 0; row < height; row += tile_height)
    366     {
    367         for (col = 0; col < width; col += tile_width, ++k)
    368         {
    369             uint32_t *src2 = (uint32_t *)src + row * src_pitch +
    370                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
    371             uint32_t *dst2 = (uint32_t *)dst + dst_pitch * row + col;
    372             unsigned j;
    373 
    374             for (j = 0; j < MIN2(tile_height, height - row); ++j)
    375             {
    376                 unsigned columns = MIN2(tile_width, width - col);
    377                 memcpy(dst2, src2, columns * sizeof(uint32_t));
    378                 dst2 += dst_pitch;
    379                 src2 += tile_width;
    380             }
    381         }
    382     }
    383 }
    384 
    385 static void micro_untile_2_x_2_64bit(const void * const src, unsigned src_pitch,
    386                                      void * const dst, unsigned dst_pitch,
    387                                      unsigned width, unsigned height)
    388 {
    389     unsigned row; /* current destination row */
    390     unsigned col; /* current destination column */
    391     unsigned k; /* current tile number */
    392     const unsigned tile_width = 2, tile_height = 2;
    393     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
    394 
    395     assert(src_pitch % tile_width == 0);
    396 
    397     k = 0;
    398     for (row = 0; row < height; row += tile_height)
    399     {
    400         for (col = 0; col < width; col += tile_width, ++k)
    401         {
    402             uint64_t *src2 = (uint64_t *)src + row * src_pitch +
    403                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
    404             uint64_t *dst2 = (uint64_t *)dst + dst_pitch * row + col;
    405             unsigned j;
    406 
    407             for (j = 0; j < MIN2(tile_height, height - row); ++j)
    408             {
    409                 unsigned columns = MIN2(tile_width, width - col);
    410                 memcpy(dst2, src2, columns * sizeof(uint64_t));
    411                 dst2 += dst_pitch;
    412                 src2 += tile_width;
    413             }
    414         }
    415     }
    416 }
    417 
    418 static void micro_untile_1_x_1_128bit(const void * src, unsigned src_pitch,
    419                                       void * dst, unsigned dst_pitch,
    420                                       unsigned width, unsigned height)
    421 {
    422     unsigned i, j;
    423     const unsigned elem_size = 16; /* sizeof(uint128_t) */
    424 
    425     for (j = 0; j < height; ++j)
    426     {
    427         for (i = 0; i < width; ++i)
    428         {
    429             memcpy(dst, src, width * elem_size);
    430             dst += dst_pitch * elem_size;
    431             src += src_pitch * elem_size;
    432         }
    433     }
    434 }
    435 
    436 void untile_image(const void * src, unsigned src_pitch,
    437                   void *dst, unsigned dst_pitch,
    438                   mesa_format format, unsigned width, unsigned height)
    439 {
    440     assert(src_pitch >= width);
    441     assert(dst_pitch >= width);
    442 
    443     radeon_print(RADEON_TEXTURE, RADEON_TRACE,
    444                  "Software untiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
    445                  src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
    446 
    447     switch (_mesa_get_format_bytes(format))
    448     {
    449         case 16:
    450             micro_untile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
    451             break;
    452         case 8:
    453             micro_untile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
    454             break;
    455         case 4:
    456             micro_untile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
    457             break;
    458         case 2:
    459             if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
    460             {
    461                 micro_untile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
    462             }
    463             else
    464             {
    465                 micro_untile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
    466             }
    467             break;
    468         case 1:
    469             micro_untile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
    470             break;
    471         default:
    472             assert(0);
    473             break;
    474     }
    475 }
    476 
    477 void get_tile_size(mesa_format format, unsigned *block_width, unsigned *block_height)
    478 {
    479     switch (_mesa_get_format_bytes(format))
    480     {
    481         case 16:
    482             *block_width = 1;
    483             *block_height = 1;
    484             break;
    485         case 8:
    486             *block_width = 2;
    487             *block_height = 2;
    488             break;
    489         case 4:
    490             *block_width = 4;
    491             *block_height = 2;
    492             break;
    493         case 2:
    494             if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
    495             {
    496                 *block_width = 4;
    497                 *block_height = 4;
    498             }
    499             else
    500             {
    501                 *block_width = 8;
    502                 *block_height = 2;
    503             }
    504             break;
    505         case 1:
    506             *block_width = 8;
    507             *block_height = 4;
    508             break;
    509         default:
    510             assert(0);
    511             break;
    512     }
    513 }
    514