1 /************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 20 * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 **************************************************************************/ 26 27 28 /** 29 * Code to convert images from tiled to linear and back. 30 * XXX there are quite a few assumptions about color and z/stencil being 31 * 32bpp. 32 */ 33 34 35 #include "util/u_format.h" 36 #include "util/u_memory.h" 37 #include "lp_tile_soa.h" 38 #include "lp_tile_image.h" 39 40 41 #define BYTES_PER_TILE (TILE_SIZE * TILE_SIZE * 4) 42 43 44 /** 45 * Untile a 4x4 block of 32-bit words (all contiguous) to linear layout 46 * at dst, with dst_stride words between rows. 47 */ 48 static void 49 untile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned dst_stride) 50 { 51 uint32_t *d0 = dst; 52 uint32_t *d1 = d0 + dst_stride; 53 uint32_t *d2 = d1 + dst_stride; 54 uint32_t *d3 = d2 + dst_stride; 55 56 d0[0] = src[0]; d0[1] = src[1]; d0[2] = src[4]; d0[3] = src[5]; 57 d1[0] = src[2]; d1[1] = src[3]; d1[2] = src[6]; d1[3] = src[7]; 58 d2[0] = src[8]; d2[1] = src[9]; d2[2] = src[12]; d2[3] = src[13]; 59 d3[0] = src[10]; d3[1] = src[11]; d3[2] = src[14]; d3[3] = src[15]; 60 } 61 62 63 64 /** 65 * Untile a 4x4 block of 16-bit words (all contiguous) to linear layout 66 * at dst, with dst_stride words between rows. 67 */ 68 static void 69 untile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned dst_stride) 70 { 71 uint16_t *d0 = dst; 72 uint16_t *d1 = d0 + dst_stride; 73 uint16_t *d2 = d1 + dst_stride; 74 uint16_t *d3 = d2 + dst_stride; 75 76 d0[0] = src[0]; d0[1] = src[1]; d0[2] = src[4]; d0[3] = src[5]; 77 d1[0] = src[2]; d1[1] = src[3]; d1[2] = src[6]; d1[3] = src[7]; 78 d2[0] = src[8]; d2[1] = src[9]; d2[2] = src[12]; d2[3] = src[13]; 79 d3[0] = src[10]; d3[1] = src[11]; d3[2] = src[14]; d3[3] = src[15]; 80 } 81 82 83 84 /** 85 * Convert a 4x4 rect of 32-bit words from a linear layout into tiled 86 * layout (in which all 16 words are contiguous). 87 */ 88 static void 89 tile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned src_stride) 90 { 91 const uint32_t *s0 = src; 92 const uint32_t *s1 = s0 + src_stride; 93 const uint32_t *s2 = s1 + src_stride; 94 const uint32_t *s3 = s2 + src_stride; 95 96 dst[0] = s0[0]; dst[1] = s0[1]; dst[4] = s0[2]; dst[5] = s0[3]; 97 dst[2] = s1[0]; dst[3] = s1[1]; dst[6] = s1[2]; dst[7] = s1[3]; 98 dst[8] = s2[0]; dst[9] = s2[1]; dst[12] = s2[2]; dst[13] = s2[3]; 99 dst[10] = s3[0]; dst[11] = s3[1]; dst[14] = s3[2]; dst[15] = s3[3]; 100 } 101 102 103 104 /** 105 * Convert a 4x4 rect of 16-bit words from a linear layout into tiled 106 * layout (in which all 16 words are contiguous). 107 */ 108 static void 109 tile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned src_stride) 110 { 111 const uint16_t *s0 = src; 112 const uint16_t *s1 = s0 + src_stride; 113 const uint16_t *s2 = s1 + src_stride; 114 const uint16_t *s3 = s2 + src_stride; 115 116 dst[0] = s0[0]; dst[1] = s0[1]; dst[4] = s0[2]; dst[5] = s0[3]; 117 dst[2] = s1[0]; dst[3] = s1[1]; dst[6] = s1[2]; dst[7] = s1[3]; 118 dst[8] = s2[0]; dst[9] = s2[1]; dst[12] = s2[2]; dst[13] = s2[3]; 119 dst[10] = s3[0]; dst[11] = s3[1]; dst[14] = s3[2]; dst[15] = s3[3]; 120 } 121 122 123 124 /** 125 * Convert a tiled image into a linear image. 126 * \param dst_stride dest row stride in bytes 127 */ 128 void 129 lp_tiled_to_linear(const void *src, void *dst, 130 unsigned x, unsigned y, 131 unsigned width, unsigned height, 132 enum pipe_format format, 133 unsigned dst_stride, 134 unsigned tiles_per_row) 135 { 136 assert(x % TILE_SIZE == 0); 137 assert(y % TILE_SIZE == 0); 138 /*assert(width % TILE_SIZE == 0); 139 assert(height % TILE_SIZE == 0);*/ 140 141 /* Note that Z/stencil surfaces use a different tiling size than 142 * color surfaces. 143 */ 144 if (util_format_is_depth_or_stencil(format)) { 145 const uint bpp = util_format_get_blocksize(format); 146 const uint src_stride = dst_stride * TILE_VECTOR_WIDTH; 147 const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT; 148 const uint tiles_per_row = src_stride / (tile_w * tile_h * bpp); 149 150 dst_stride /= bpp; /* convert from bytes to words */ 151 152 if (bpp == 4) { 153 const uint32_t *src32 = (const uint32_t *) src; 154 uint32_t *dst32 = (uint32_t *) dst; 155 uint i, j; 156 157 for (j = 0; j < height; j += tile_h) { 158 for (i = 0; i < width; i += tile_w) { 159 /* compute offsets in 32-bit words */ 160 uint ii = i + x, jj = j + y; 161 uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w) 162 * (tile_w * tile_h); 163 uint dst_offset = jj * dst_stride + ii; 164 untile_4_4_uint32(src32 + src_offset, 165 dst32 + dst_offset, 166 dst_stride); 167 } 168 } 169 } 170 else { 171 const uint16_t *src16 = (const uint16_t *) src; 172 uint16_t *dst16 = (uint16_t *) dst; 173 uint i, j; 174 175 assert(bpp == 2); 176 177 for (j = 0; j < height; j += tile_h) { 178 for (i = 0; i < width; i += tile_w) { 179 /* compute offsets in 16-bit words */ 180 uint ii = i + x, jj = j + y; 181 uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w) 182 * (tile_w * tile_h); 183 uint dst_offset = jj * dst_stride + ii; 184 untile_4_4_uint16(src16 + src_offset, 185 dst16 + dst_offset, 186 dst_stride); 187 } 188 } 189 } 190 } 191 else { 192 /* color image */ 193 const uint bpp = 4; 194 const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE; 195 const uint bytes_per_tile = tile_w * tile_h * bpp; 196 uint i, j; 197 198 for (j = 0; j < height; j += tile_h) { 199 for (i = 0; i < width; i += tile_w) { 200 uint ii = i + x, jj = j + y; 201 uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w); 202 uint byte_offset = tile_offset * bytes_per_tile; 203 const uint8_t *src_tile = (uint8_t *) src + byte_offset; 204 205 lp_tile_unswizzle_4ub(format, 206 src_tile, 207 dst, dst_stride, 208 ii, jj); 209 } 210 } 211 } 212 } 213 214 215 /** 216 * Convert a linear image into a tiled image. 217 * \param src_stride source row stride in bytes 218 */ 219 void 220 lp_linear_to_tiled(const void *src, void *dst, 221 unsigned x, unsigned y, 222 unsigned width, unsigned height, 223 enum pipe_format format, 224 unsigned src_stride, 225 unsigned tiles_per_row) 226 { 227 assert(x % TILE_SIZE == 0); 228 assert(y % TILE_SIZE == 0); 229 /* 230 assert(width % TILE_SIZE == 0); 231 assert(height % TILE_SIZE == 0); 232 */ 233 234 if (util_format_is_depth_or_stencil(format)) { 235 const uint bpp = util_format_get_blocksize(format); 236 const uint dst_stride = src_stride * TILE_VECTOR_WIDTH; 237 const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT; 238 const uint tiles_per_row = dst_stride / (tile_w * tile_h * bpp); 239 240 src_stride /= bpp; /* convert from bytes to words */ 241 242 if (bpp == 4) { 243 const uint32_t *src32 = (const uint32_t *) src; 244 uint32_t *dst32 = (uint32_t *) dst; 245 uint i, j; 246 247 for (j = 0; j < height; j += tile_h) { 248 for (i = 0; i < width; i += tile_w) { 249 /* compute offsets in 32-bit words */ 250 uint ii = i + x, jj = j + y; 251 uint src_offset = jj * src_stride + ii; 252 uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w) 253 * (tile_w * tile_h); 254 tile_4_4_uint32(src32 + src_offset, 255 dst32 + dst_offset, 256 src_stride); 257 } 258 } 259 } 260 else { 261 const uint16_t *src16 = (const uint16_t *) src; 262 uint16_t *dst16 = (uint16_t *) dst; 263 uint i, j; 264 265 assert(bpp == 2); 266 267 for (j = 0; j < height; j += tile_h) { 268 for (i = 0; i < width; i += tile_w) { 269 /* compute offsets in 16-bit words */ 270 uint ii = i + x, jj = j + y; 271 uint src_offset = jj * src_stride + ii; 272 uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w) 273 * (tile_w * tile_h); 274 tile_4_4_uint16(src16 + src_offset, 275 dst16 + dst_offset, 276 src_stride); 277 } 278 } 279 } 280 } 281 else { 282 const uint bpp = 4; 283 const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE; 284 const uint bytes_per_tile = tile_w * tile_h * bpp; 285 uint i, j; 286 287 for (j = 0; j < height; j += TILE_SIZE) { 288 for (i = 0; i < width; i += TILE_SIZE) { 289 uint ii = i + x, jj = j + y; 290 uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w); 291 uint byte_offset = tile_offset * bytes_per_tile; 292 uint8_t *dst_tile = (uint8_t *) dst + byte_offset; 293 294 lp_tile_swizzle_4ub(format, 295 dst_tile, 296 src, src_stride, 297 ii, jj); 298 } 299 } 300 } 301 } 302 303 304 /** 305 * For testing only. 306 */ 307 void 308 test_tiled_linear_conversion(void *data, 309 enum pipe_format format, 310 unsigned width, unsigned height, 311 unsigned stride) 312 { 313 /* size in tiles */ 314 unsigned wt = (width + TILE_SIZE - 1) / TILE_SIZE; 315 unsigned ht = (height + TILE_SIZE - 1) / TILE_SIZE; 316 317 uint8_t *tiled = MALLOC(wt * ht * TILE_SIZE * TILE_SIZE * 4); 318 319 /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/ 320 321 lp_linear_to_tiled(data, tiled, 0, 0, width, height, format, 322 stride, wt); 323 324 lp_tiled_to_linear(tiled, data, 0, 0, width, height, format, 325 stride, wt); 326 327 FREE(tiled); 328 } 329 330