1 /* 2 * Copyright (C) 2010 Maciej Cencora <m.cencora (at) gmail.com> 3 * 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial 16 * portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 */ 27 28 #include "radeon_screen.h" 29 #include "radeon_tile.h" 30 31 #include <stdint.h> 32 #include <string.h> 33 34 #include "main/macros.h" 35 #include "radeon_debug.h" 36 37 #define MICRO_TILE_SIZE 32 38 39 static void micro_tile_8_x_4_8bit(const void * const src, unsigned src_pitch, 40 void * const dst, unsigned dst_pitch, 41 unsigned width, unsigned height) 42 { 43 unsigned row; /* current source row */ 44 unsigned col; /* current source column */ 45 unsigned k; /* number of processed tiles */ 46 const unsigned tile_width = 8, tile_height = 4; 47 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 48 49 k = 0; 50 for (row = 0; row < height; row += tile_height) 51 { 52 for (col = 0; col < width; col += tile_width, ++k) 53 { 54 uint8_t *src2 = (uint8_t *)src + src_pitch * row + col; 55 uint8_t *dst2 = (uint8_t *)dst + row * dst_pitch + 56 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t); 57 unsigned j; 58 59 for (j = 0; j < MIN2(tile_height, height - row); ++j) 60 { 61 unsigned columns = MIN2(tile_width, width - col); 62 memcpy(dst2, src2, columns * sizeof(uint8_t)); 63 dst2 += tile_width; 64 src2 += src_pitch; 65 } 66 } 67 } 68 } 69 70 static void micro_tile_4_x_4_16bit(const void * const src, unsigned src_pitch, 71 void * const dst, unsigned dst_pitch, 72 unsigned width, unsigned height) 73 { 74 unsigned row; /* current source row */ 75 unsigned col; /* current source column */ 76 unsigned k; /* number of processed tiles */ 77 const unsigned tile_width = 4, tile_height = 4; 78 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 79 80 k = 0; 81 for (row = 0; row < height; row += tile_height) 82 { 83 for (col = 0; col < width; col += tile_width, ++k) 84 { 85 uint16_t *src2 = (uint16_t *)src + src_pitch * row + col; 86 uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch + 87 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t); 88 unsigned j; 89 90 for (j = 0; j < MIN2(tile_height, height - row); ++j) 91 { 92 unsigned columns = MIN2(tile_width, width - col); 93 memcpy(dst2, src2, columns * sizeof(uint16_t)); 94 dst2 += tile_width; 95 src2 += src_pitch; 96 } 97 } 98 } 99 } 100 101 static void micro_tile_8_x_2_16bit(const void * const src, unsigned src_pitch, 102 void * const dst, unsigned dst_pitch, 103 unsigned width, unsigned height) 104 { 105 unsigned row; /* current source row */ 106 unsigned col; /* current source column */ 107 unsigned k; /* number of processed tiles */ 108 const unsigned tile_width = 8, tile_height = 2; 109 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 110 111 k = 0; 112 for (row = 0; row < height; row += tile_height) 113 { 114 for (col = 0; col < width; col += tile_width, ++k) 115 { 116 uint16_t *src2 = (uint16_t *)src + src_pitch * row + col; 117 uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch + 118 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t); 119 unsigned j; 120 121 for (j = 0; j < MIN2(tile_height, height - row); ++j) 122 { 123 unsigned columns = MIN2(tile_width, width - col); 124 memcpy(dst2, src2, columns * sizeof(uint16_t)); 125 dst2 += tile_width; 126 src2 += src_pitch; 127 } 128 } 129 } 130 } 131 132 static void micro_tile_4_x_2_32bit(const void * const src, unsigned src_pitch, 133 void * const dst, unsigned dst_pitch, 134 unsigned width, unsigned height) 135 { 136 unsigned row; /* current source row */ 137 unsigned col; /* current source column */ 138 unsigned k; /* number of processed tiles */ 139 const unsigned tile_width = 4, tile_height = 2; 140 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 141 142 k = 0; 143 for (row = 0; row < height; row += tile_height) 144 { 145 for (col = 0; col < width; col += tile_width, ++k) 146 { 147 uint32_t *src2 = (uint32_t *)src + src_pitch * row + col; 148 uint32_t *dst2 = (uint32_t *)dst + row * dst_pitch + 149 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t); 150 unsigned j; 151 152 for (j = 0; j < MIN2(tile_height, height - row); ++j) 153 { 154 unsigned columns = MIN2(tile_width, width - col); 155 memcpy(dst2, src2, columns * sizeof(uint32_t)); 156 dst2 += tile_width; 157 src2 += src_pitch; 158 } 159 } 160 } 161 } 162 163 static void micro_tile_2_x_2_64bit(const void * const src, unsigned src_pitch, 164 void * const dst, unsigned dst_pitch, 165 unsigned width, unsigned height) 166 { 167 unsigned row; /* current source row */ 168 unsigned col; /* current source column */ 169 unsigned k; /* number of processed tiles */ 170 const unsigned tile_width = 2, tile_height = 2; 171 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 172 173 k = 0; 174 for (row = 0; row < height; row += tile_height) 175 { 176 for (col = 0; col < width; col += tile_width, ++k) 177 { 178 uint64_t *src2 = (uint64_t *)src + src_pitch * row + col; 179 uint64_t *dst2 = (uint64_t *)dst + row * dst_pitch + 180 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t); 181 unsigned j; 182 183 for (j = 0; j < MIN2(tile_height, height - row); ++j) 184 { 185 unsigned columns = MIN2(tile_width, width - col); 186 memcpy(dst2, src2, columns * sizeof(uint64_t)); 187 dst2 += tile_width; 188 src2 += src_pitch; 189 } 190 } 191 } 192 } 193 194 static void micro_tile_1_x_1_128bit(const void * src, unsigned src_pitch, 195 void * dst, unsigned dst_pitch, 196 unsigned width, unsigned height) 197 { 198 unsigned i, j; 199 const unsigned elem_size = 16; /* sizeof(uint128_t) */ 200 201 for (j = 0; j < height; ++j) 202 { 203 for (i = 0; i < width; ++i) 204 { 205 memcpy(dst, src, width * elem_size); 206 dst += dst_pitch * elem_size; 207 src += src_pitch * elem_size; 208 } 209 } 210 } 211 212 void tile_image(const void * src, unsigned src_pitch, 213 void *dst, unsigned dst_pitch, 214 mesa_format format, unsigned width, unsigned height) 215 { 216 assert(src_pitch >= width); 217 assert(dst_pitch >= width); 218 219 radeon_print(RADEON_TEXTURE, RADEON_TRACE, 220 "Software tiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n", 221 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format)); 222 223 switch (_mesa_get_format_bytes(format)) 224 { 225 case 16: 226 micro_tile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height); 227 break; 228 case 8: 229 micro_tile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height); 230 break; 231 case 4: 232 micro_tile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height); 233 break; 234 case 2: 235 if (_mesa_get_format_bits(format, GL_DEPTH_BITS)) 236 { 237 micro_tile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height); 238 } 239 else 240 { 241 micro_tile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height); 242 } 243 break; 244 case 1: 245 micro_tile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height); 246 break; 247 default: 248 assert(0); 249 break; 250 } 251 } 252 253 static void micro_untile_8_x_4_8bit(const void * const src, unsigned src_pitch, 254 void * const dst, unsigned dst_pitch, 255 unsigned width, unsigned height) 256 { 257 unsigned row; /* current destination row */ 258 unsigned col; /* current destination column */ 259 unsigned k; /* current tile number */ 260 const unsigned tile_width = 8, tile_height = 4; 261 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 262 263 assert(src_pitch % tile_width == 0); 264 265 k = 0; 266 for (row = 0; row < height; row += tile_height) 267 { 268 for (col = 0; col < width; col += tile_width, ++k) 269 { 270 uint8_t *src2 = (uint8_t *)src + row * src_pitch + 271 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t); 272 uint8_t *dst2 = (uint8_t *)dst + dst_pitch * row + col; 273 unsigned j; 274 275 for (j = 0; j < MIN2(tile_height, height - row); ++j) 276 { 277 unsigned columns = MIN2(tile_width, width - col); 278 memcpy(dst2, src2, columns * sizeof(uint8_t)); 279 dst2 += dst_pitch; 280 src2 += tile_width; 281 } 282 } 283 } 284 } 285 286 static void micro_untile_8_x_2_16bit(const void * const src, unsigned src_pitch, 287 void * const dst, unsigned dst_pitch, 288 unsigned width, unsigned height) 289 { 290 unsigned row; /* current destination row */ 291 unsigned col; /* current destination column */ 292 unsigned k; /* current tile number */ 293 const unsigned tile_width = 8, tile_height = 2; 294 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 295 296 assert(src_pitch % tile_width == 0); 297 298 k = 0; 299 for (row = 0; row < height; row += tile_height) 300 { 301 for (col = 0; col < width; col += tile_width, ++k) 302 { 303 uint16_t *src2 = (uint16_t *)src + row * src_pitch + 304 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t); 305 uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col; 306 unsigned j; 307 308 for (j = 0; j < MIN2(tile_height, height - row); ++j) 309 { 310 unsigned columns = MIN2(tile_width, width - col); 311 memcpy(dst2, src2, columns * sizeof(uint16_t)); 312 dst2 += dst_pitch; 313 src2 += tile_width; 314 } 315 } 316 } 317 } 318 319 static void micro_untile_4_x_4_16bit(const void * const src, unsigned src_pitch, 320 void * const dst, unsigned dst_pitch, 321 unsigned width, unsigned height) 322 { 323 unsigned row; /* current destination row */ 324 unsigned col; /* current destination column */ 325 unsigned k; /* current tile number */ 326 const unsigned tile_width = 4, tile_height = 4; 327 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 328 329 assert(src_pitch % tile_width == 0); 330 331 k = 0; 332 for (row = 0; row < height; row += tile_height) 333 { 334 for (col = 0; col < width; col += tile_width, ++k) 335 { 336 uint16_t *src2 = (uint16_t *)src + row * src_pitch + 337 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t); 338 uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col; 339 unsigned j; 340 341 for (j = 0; j < MIN2(tile_height, height - row); ++j) 342 { 343 unsigned columns = MIN2(tile_width, width - col); 344 memcpy(dst2, src2, columns * sizeof(uint16_t)); 345 dst2 += dst_pitch; 346 src2 += tile_width; 347 } 348 } 349 } 350 } 351 352 static void micro_untile_4_x_2_32bit(const void * const src, unsigned src_pitch, 353 void * const dst, unsigned dst_pitch, 354 unsigned width, unsigned height) 355 { 356 unsigned row; /* current destination row */ 357 unsigned col; /* current destination column */ 358 unsigned k; /* current tile number */ 359 const unsigned tile_width = 4, tile_height = 2; 360 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 361 362 assert(src_pitch % tile_width == 0); 363 364 k = 0; 365 for (row = 0; row < height; row += tile_height) 366 { 367 for (col = 0; col < width; col += tile_width, ++k) 368 { 369 uint32_t *src2 = (uint32_t *)src + row * src_pitch + 370 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t); 371 uint32_t *dst2 = (uint32_t *)dst + dst_pitch * row + col; 372 unsigned j; 373 374 for (j = 0; j < MIN2(tile_height, height - row); ++j) 375 { 376 unsigned columns = MIN2(tile_width, width - col); 377 memcpy(dst2, src2, columns * sizeof(uint32_t)); 378 dst2 += dst_pitch; 379 src2 += tile_width; 380 } 381 } 382 } 383 } 384 385 static void micro_untile_2_x_2_64bit(const void * const src, unsigned src_pitch, 386 void * const dst, unsigned dst_pitch, 387 unsigned width, unsigned height) 388 { 389 unsigned row; /* current destination row */ 390 unsigned col; /* current destination column */ 391 unsigned k; /* current tile number */ 392 const unsigned tile_width = 2, tile_height = 2; 393 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width; 394 395 assert(src_pitch % tile_width == 0); 396 397 k = 0; 398 for (row = 0; row < height; row += tile_height) 399 { 400 for (col = 0; col < width; col += tile_width, ++k) 401 { 402 uint64_t *src2 = (uint64_t *)src + row * src_pitch + 403 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t); 404 uint64_t *dst2 = (uint64_t *)dst + dst_pitch * row + col; 405 unsigned j; 406 407 for (j = 0; j < MIN2(tile_height, height - row); ++j) 408 { 409 unsigned columns = MIN2(tile_width, width - col); 410 memcpy(dst2, src2, columns * sizeof(uint64_t)); 411 dst2 += dst_pitch; 412 src2 += tile_width; 413 } 414 } 415 } 416 } 417 418 static void micro_untile_1_x_1_128bit(const void * src, unsigned src_pitch, 419 void * dst, unsigned dst_pitch, 420 unsigned width, unsigned height) 421 { 422 unsigned i, j; 423 const unsigned elem_size = 16; /* sizeof(uint128_t) */ 424 425 for (j = 0; j < height; ++j) 426 { 427 for (i = 0; i < width; ++i) 428 { 429 memcpy(dst, src, width * elem_size); 430 dst += dst_pitch * elem_size; 431 src += src_pitch * elem_size; 432 } 433 } 434 } 435 436 void untile_image(const void * src, unsigned src_pitch, 437 void *dst, unsigned dst_pitch, 438 mesa_format format, unsigned width, unsigned height) 439 { 440 assert(src_pitch >= width); 441 assert(dst_pitch >= width); 442 443 radeon_print(RADEON_TEXTURE, RADEON_TRACE, 444 "Software untiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n", 445 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format)); 446 447 switch (_mesa_get_format_bytes(format)) 448 { 449 case 16: 450 micro_untile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height); 451 break; 452 case 8: 453 micro_untile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height); 454 break; 455 case 4: 456 micro_untile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height); 457 break; 458 case 2: 459 if (_mesa_get_format_bits(format, GL_DEPTH_BITS)) 460 { 461 micro_untile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height); 462 } 463 else 464 { 465 micro_untile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height); 466 } 467 break; 468 case 1: 469 micro_untile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height); 470 break; 471 default: 472 assert(0); 473 break; 474 } 475 } 476 477 void get_tile_size(mesa_format format, unsigned *block_width, unsigned *block_height) 478 { 479 switch (_mesa_get_format_bytes(format)) 480 { 481 case 16: 482 *block_width = 1; 483 *block_height = 1; 484 break; 485 case 8: 486 *block_width = 2; 487 *block_height = 2; 488 break; 489 case 4: 490 *block_width = 4; 491 *block_height = 2; 492 break; 493 case 2: 494 if (_mesa_get_format_bits(format, GL_DEPTH_BITS)) 495 { 496 *block_width = 4; 497 *block_height = 4; 498 } 499 else 500 { 501 *block_width = 8; 502 *block_height = 2; 503 } 504 break; 505 case 1: 506 *block_width = 8; 507 *block_height = 4; 508 break; 509 default: 510 assert(0); 511 break; 512 } 513 } 514