1 /* 2 * Copyright (C) 2010 Maciej Cencora <m.cencora (at) gmail.com> 3 * 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial 16 * portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 */ 27 28 #include "radeon_tile.h" 29 30 #include <stdint.h> 31 #include <string.h> 32 33 #include "main/macros.h" 34 #include "radeon_debug.h" 35 36 #define MICRO_TILE_SIZE 32 37 38 static void micro_tile_8_x_4_8bit(const void * const src, unsigned src_pitch, 39 void * const dst, unsigned dst_pitch, 40 unsigned width, unsigned height) 41 { 42 unsigned row; /* current source row */ 43 unsigned col; /* current source column */ 44 unsigned k; /* number of processed tiles */ 45 const unsigned tile_width = 8, tile_height = 4; 46 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 47 48 k = 0; 49 for (row = 0; row < height; row += tile_height) 50 { 51 for (col = 0; col < width; col += tile_width, ++k) 52 { 53 uint8_t *src2 = (uint8_t *)src + src_pitch * row + col; 54 uint8_t *dst2 = (uint8_t *)dst + row * dst_pitch + 55 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t); 56 unsigned j; 57 58 for (j = 0; j < MIN2(tile_height, height - row); ++j) 59 { 60 unsigned columns = MIN2(tile_width, width - col); 61 memcpy(dst2, src2, columns * sizeof(uint8_t)); 62 dst2 += tile_width; 63 src2 += src_pitch; 64 } 65 } 66 } 67 } 68 69 static void micro_tile_4_x_4_16bit(const void * const src, unsigned src_pitch, 70 void * const dst, unsigned dst_pitch, 71 unsigned width, unsigned height) 72 { 73 unsigned row; /* current source row */ 74 unsigned col; /* current source column */ 75 unsigned k; /* number of processed tiles */ 76 const unsigned tile_width = 4, tile_height = 4; 77 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 78 79 k = 0; 80 for (row = 0; row < height; row += tile_height) 81 { 82 for (col = 0; col < width; col += tile_width, ++k) 83 { 84 uint16_t *src2 = (uint16_t *)src + src_pitch * row + col; 85 uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch + 86 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t); 87 unsigned j; 88 89 for (j = 0; j < MIN2(tile_height, height - row); ++j) 90 { 91 unsigned columns = MIN2(tile_width, width - col); 92 memcpy(dst2, src2, columns * sizeof(uint16_t)); 93 dst2 += tile_width; 94 src2 += src_pitch; 95 } 96 } 97 } 98 } 99 100 static void micro_tile_8_x_2_16bit(const void * const src, unsigned src_pitch, 101 void * const dst, unsigned dst_pitch, 102 unsigned width, unsigned height) 103 { 104 unsigned row; /* current source row */ 105 unsigned col; /* current source column */ 106 unsigned k; /* number of processed tiles */ 107 const unsigned tile_width = 8, tile_height = 2; 108 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 109 110 k = 0; 111 for (row = 0; row < height; row += tile_height) 112 { 113 for (col = 0; col < width; col += tile_width, ++k) 114 { 115 uint16_t *src2 = (uint16_t *)src + src_pitch * row + col; 116 uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch + 117 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t); 118 unsigned j; 119 120 for (j = 0; j < MIN2(tile_height, height - row); ++j) 121 { 122 unsigned columns = MIN2(tile_width, width - col); 123 memcpy(dst2, src2, columns * sizeof(uint16_t)); 124 dst2 += tile_width; 125 src2 += src_pitch; 126 } 127 } 128 } 129 } 130 131 static void micro_tile_4_x_2_32bit(const void * const src, unsigned src_pitch, 132 void * const dst, unsigned dst_pitch, 133 unsigned width, unsigned height) 134 { 135 unsigned row; /* current source row */ 136 unsigned col; /* current source column */ 137 unsigned k; /* number of processed tiles */ 138 const unsigned tile_width = 4, tile_height = 2; 139 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 140 141 k = 0; 142 for (row = 0; row < height; row += tile_height) 143 { 144 for (col = 0; col < width; col += tile_width, ++k) 145 { 146 uint32_t *src2 = (uint32_t *)src + src_pitch * row + col; 147 uint32_t *dst2 = (uint32_t *)dst + row * dst_pitch + 148 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t); 149 unsigned j; 150 151 for (j = 0; j < MIN2(tile_height, height - row); ++j) 152 { 153 unsigned columns = MIN2(tile_width, width - col); 154 memcpy(dst2, src2, columns * sizeof(uint32_t)); 155 dst2 += tile_width; 156 src2 += src_pitch; 157 } 158 } 159 } 160 } 161 162 static void micro_tile_2_x_2_64bit(const void * const src, unsigned src_pitch, 163 void * const dst, unsigned dst_pitch, 164 unsigned width, unsigned height) 165 { 166 unsigned row; /* current source row */ 167 unsigned col; /* current source column */ 168 unsigned k; /* number of processed tiles */ 169 const unsigned tile_width = 2, tile_height = 2; 170 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 171 172 k = 0; 173 for (row = 0; row < height; row += tile_height) 174 { 175 for (col = 0; col < width; col += tile_width, ++k) 176 { 177 uint64_t *src2 = (uint64_t *)src + src_pitch * row + col; 178 uint64_t *dst2 = (uint64_t *)dst + row * dst_pitch + 179 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t); 180 unsigned j; 181 182 for (j = 0; j < MIN2(tile_height, height - row); ++j) 183 { 184 unsigned columns = MIN2(tile_width, width - col); 185 memcpy(dst2, src2, columns * sizeof(uint64_t)); 186 dst2 += tile_width; 187 src2 += src_pitch; 188 } 189 } 190 } 191 } 192 193 static void micro_tile_1_x_1_128bit(const void * src, unsigned src_pitch, 194 void * dst, unsigned dst_pitch, 195 unsigned width, unsigned height) 196 { 197 unsigned i, j; 198 const unsigned elem_size = 16; /* sizeof(uint128_t) */ 199 200 for (j = 0; j < height; ++j) 201 { 202 for (i = 0; i < width; ++i) 203 { 204 memcpy(dst, src, width * elem_size); 205 dst += dst_pitch * elem_size; 206 src += src_pitch * elem_size; 207 } 208 } 209 } 210 211 void tile_image(const void * src, unsigned src_pitch, 212 void *dst, unsigned dst_pitch, 213 gl_format format, unsigned width, unsigned height) 214 { 215 assert(src_pitch >= width); 216 assert(dst_pitch >= width); 217 218 radeon_print(RADEON_TEXTURE, RADEON_TRACE, 219 "Software tiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n", 220 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format)); 221 222 switch (_mesa_get_format_bytes(format)) 223 { 224 case 16: 225 micro_tile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height); 226 break; 227 case 8: 228 micro_tile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height); 229 break; 230 case 4: 231 micro_tile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height); 232 break; 233 case 2: 234 if (_mesa_get_format_bits(format, GL_DEPTH_BITS)) 235 { 236 micro_tile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height); 237 } 238 else 239 { 240 micro_tile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height); 241 } 242 break; 243 case 1: 244 micro_tile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height); 245 break; 246 default: 247 assert(0); 248 break; 249 } 250 } 251 252 static void micro_untile_8_x_4_8bit(const void * const src, unsigned src_pitch, 253 void * const dst, unsigned dst_pitch, 254 unsigned width, unsigned height) 255 { 256 unsigned row; /* current destination row */ 257 unsigned col; /* current destination column */ 258 unsigned k; /* current tile number */ 259 const unsigned tile_width = 8, tile_height = 4; 260 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 261 262 assert(src_pitch % tile_width == 0); 263 264 k = 0; 265 for (row = 0; row < height; row += tile_height) 266 { 267 for (col = 0; col < width; col += tile_width, ++k) 268 { 269 uint8_t *src2 = (uint8_t *)src + row * src_pitch + 270 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t); 271 uint8_t *dst2 = (uint8_t *)dst + dst_pitch * row + col; 272 unsigned j; 273 274 for (j = 0; j < MIN2(tile_height, height - row); ++j) 275 { 276 unsigned columns = MIN2(tile_width, width - col); 277 memcpy(dst2, src2, columns * sizeof(uint8_t)); 278 dst2 += dst_pitch; 279 src2 += tile_width; 280 } 281 } 282 } 283 } 284 285 static void micro_untile_8_x_2_16bit(const void * const src, unsigned src_pitch, 286 void * const dst, unsigned dst_pitch, 287 unsigned width, unsigned height) 288 { 289 unsigned row; /* current destination row */ 290 unsigned col; /* current destination column */ 291 unsigned k; /* current tile number */ 292 const unsigned tile_width = 8, tile_height = 2; 293 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 294 295 assert(src_pitch % tile_width == 0); 296 297 k = 0; 298 for (row = 0; row < height; row += tile_height) 299 { 300 for (col = 0; col < width; col += tile_width, ++k) 301 { 302 uint16_t *src2 = (uint16_t *)src + row * src_pitch + 303 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t); 304 uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col; 305 unsigned j; 306 307 for (j = 0; j < MIN2(tile_height, height - row); ++j) 308 { 309 unsigned columns = MIN2(tile_width, width - col); 310 memcpy(dst2, src2, columns * sizeof(uint16_t)); 311 dst2 += dst_pitch; 312 src2 += tile_width; 313 } 314 } 315 } 316 } 317 318 static void micro_untile_4_x_4_16bit(const void * const src, unsigned src_pitch, 319 void * const dst, unsigned dst_pitch, 320 unsigned width, unsigned height) 321 { 322 unsigned row; /* current destination row */ 323 unsigned col; /* current destination column */ 324 unsigned k; /* current tile number */ 325 const unsigned tile_width = 4, tile_height = 4; 326 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 327 328 assert(src_pitch % tile_width == 0); 329 330 k = 0; 331 for (row = 0; row < height; row += tile_height) 332 { 333 for (col = 0; col < width; col += tile_width, ++k) 334 { 335 uint16_t *src2 = (uint16_t *)src + row * src_pitch + 336 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t); 337 uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col; 338 unsigned j; 339 340 for (j = 0; j < MIN2(tile_height, height - row); ++j) 341 { 342 unsigned columns = MIN2(tile_width, width - col); 343 memcpy(dst2, src2, columns * sizeof(uint16_t)); 344 dst2 += dst_pitch; 345 src2 += tile_width; 346 } 347 } 348 } 349 } 350 351 static void micro_untile_4_x_2_32bit(const void * const src, unsigned src_pitch, 352 void * const dst, unsigned dst_pitch, 353 unsigned width, unsigned height) 354 { 355 unsigned row; /* current destination row */ 356 unsigned col; /* current destination column */ 357 unsigned k; /* current tile number */ 358 const unsigned tile_width = 4, tile_height = 2; 359 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 360 361 assert(src_pitch % tile_width == 0); 362 363 k = 0; 364 for (row = 0; row < height; row += tile_height) 365 { 366 for (col = 0; col < width; col += tile_width, ++k) 367 { 368 uint32_t *src2 = (uint32_t *)src + row * src_pitch + 369 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t); 370 uint32_t *dst2 = (uint32_t *)dst + dst_pitch * row + col; 371 unsigned j; 372 373 for (j = 0; j < MIN2(tile_height, height - row); ++j) 374 { 375 unsigned columns = MIN2(tile_width, width - col); 376 memcpy(dst2, src2, columns * sizeof(uint32_t)); 377 dst2 += dst_pitch; 378 src2 += tile_width; 379 } 380 } 381 } 382 } 383 384 static void micro_untile_2_x_2_64bit(const void * const src, unsigned src_pitch, 385 void * const dst, unsigned dst_pitch, 386 unsigned width, unsigned height) 387 { 388 unsigned row; /* current destination row */ 389 unsigned col; /* current destination column */ 390 unsigned k; /* current tile number */ 391 const unsigned tile_width = 2, tile_height = 2; 392 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 393 394 assert(src_pitch % tile_width == 0); 395 396 k = 0; 397 for (row = 0; row < height; row += tile_height) 398 { 399 for (col = 0; col < width; col += tile_width, ++k) 400 { 401 uint64_t *src2 = (uint64_t *)src + row * src_pitch + 402 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t); 403 uint64_t *dst2 = (uint64_t *)dst + dst_pitch * row + col; 404 unsigned j; 405 406 for (j = 0; j < MIN2(tile_height, height - row); ++j) 407 { 408 unsigned columns = MIN2(tile_width, width - col); 409 memcpy(dst2, src2, columns * sizeof(uint64_t)); 410 dst2 += dst_pitch; 411 src2 += tile_width; 412 } 413 } 414 } 415 } 416 417 static void micro_untile_1_x_1_128bit(const void * src, unsigned src_pitch, 418 void * dst, unsigned dst_pitch, 419 unsigned width, unsigned height) 420 { 421 unsigned i, j; 422 const unsigned elem_size = 16; /* sizeof(uint128_t) */ 423 424 for (j = 0; j < height; ++j) 425 { 426 for (i = 0; i < width; ++i) 427 { 428 memcpy(dst, src, width * elem_size); 429 dst += dst_pitch * elem_size; 430 src += src_pitch * elem_size; 431 } 432 } 433 } 434 435 void untile_image(const void * src, unsigned src_pitch, 436 void *dst, unsigned dst_pitch, 437 gl_format format, unsigned width, unsigned height) 438 { 439 assert(src_pitch >= width); 440 assert(dst_pitch >= width); 441 442 radeon_print(RADEON_TEXTURE, RADEON_TRACE, 443 "Software untiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n", 444 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format)); 445 446 switch (_mesa_get_format_bytes(format)) 447 { 448 case 16: 449 micro_untile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height); 450 break; 451 case 8: 452 micro_untile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height); 453 break; 454 case 4: 455 micro_untile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height); 456 break; 457 case 2: 458 if (_mesa_get_format_bits(format, GL_DEPTH_BITS)) 459 { 460 micro_untile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height); 461 } 462 else 463 { 464 micro_untile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height); 465 } 466 break; 467 case 1: 468 micro_untile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height); 469 break; 470 default: 471 assert(0); 472 break; 473 } 474 } 475 476 void get_tile_size(gl_format format, unsigned *block_width, unsigned *block_height) 477 { 478 switch (_mesa_get_format_bytes(format)) 479 { 480 case 16: 481 *block_width = 1; 482 *block_height = 1; 483 break; 484 case 8: 485 *block_width = 2; 486 *block_height = 2; 487 break; 488 case 4: 489 *block_width = 4; 490 *block_height = 2; 491 break; 492 case 2: 493 if (_mesa_get_format_bits(format, GL_DEPTH_BITS)) 494 { 495 *block_width = 4; 496 *block_height = 4; 497 } 498 else 499 { 500 *block_width = 8; 501 *block_height = 2; 502 } 503 break; 504 case 1: 505 *block_width = 8; 506 *block_height = 4; 507 break; 508 default: 509 assert(0); 510 break; 511 } 512 } 513