1 /************************************************************************** 2 * 3 * Copyright 2007 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 #include "main/bufferobj.h" 29 #include "main/image.h" 30 #include "main/pbo.h" 31 #include "main/imports.h" 32 #include "main/readpix.h" 33 #include "main/enums.h" 34 #include "main/framebuffer.h" 35 #include "util/u_inlines.h" 36 #include "util/u_format.h" 37 #include "cso_cache/cso_context.h" 38 39 #include "st_cb_fbo.h" 40 #include "st_atom.h" 41 #include "st_context.h" 42 #include "st_cb_bitmap.h" 43 #include "st_cb_readpixels.h" 44 #include "st_debug.h" 45 #include "state_tracker/st_cb_texture.h" 46 #include "state_tracker/st_format.h" 47 #include "state_tracker/st_pbo.h" 48 #include "state_tracker/st_texture.h" 49 50 /* The readpixels cache caches a blitted staging texture so that back-to-back 51 * calls to glReadPixels with user pointers require less CPU-GPU synchronization. 52 * 53 * Assumptions: 54 * 55 * (1) Blits have high synchronization overheads, and it is beneficial to 56 * use a single blit of the entire framebuffer instead of many smaller 57 * blits (because the smaller blits cannot be batched, and we have to wait 58 * for the GPU after each one). 59 * 60 * (2) transfer_map implicitly involves a blit as well (for de-tiling, copy 61 * from VRAM, etc.), so that it is beneficial to replace the 62 * _mesa_readpixels path as well when possible. 63 * 64 * Change this #define to true to fill and use the cache whenever possible 65 * (this is inefficient and only meant for testing / debugging). 66 */ 67 #define ALWAYS_READPIXELS_CACHE false 68 69 static boolean 70 needs_integer_signed_unsigned_conversion(const struct gl_context *ctx, 71 GLenum format, GLenum type) 72 { 73 struct gl_renderbuffer *rb = 74 _mesa_get_read_renderbuffer_for_format(ctx, format); 75 76 assert(rb); 77 78 GLenum srcType = _mesa_get_format_datatype(rb->Format); 79 80 if ((srcType == GL_INT && 81 (type == GL_UNSIGNED_INT || 82 type == GL_UNSIGNED_SHORT || 83 type == GL_UNSIGNED_BYTE)) || 84 (srcType == GL_UNSIGNED_INT && 85 (type == GL_INT || 86 type == GL_SHORT || 87 type == GL_BYTE))) { 88 return TRUE; 89 } 90 91 return FALSE; 92 } 93 94 static bool 95 try_pbo_readpixels(struct st_context *st, struct st_renderbuffer *strb, 96 bool invert_y, 97 GLint x, GLint y, GLsizei width, GLsizei height, 98 enum pipe_format src_format, enum pipe_format dst_format, 99 const struct gl_pixelstore_attrib *pack, void *pixels) 100 { 101 struct pipe_context *pipe = st->pipe; 102 struct pipe_screen *screen = pipe->screen; 103 struct cso_context *cso = st->cso_context; 104 struct pipe_surface *surface = strb->surface; 105 struct pipe_resource *texture = strb->texture; 106 const struct util_format_description *desc; 107 struct st_pbo_addresses addr; 108 struct pipe_framebuffer_state fb; 109 enum pipe_texture_target view_target; 110 bool success = false; 111 112 if (texture->nr_samples > 1) 113 return false; 114 115 if (!screen->is_format_supported(screen, dst_format, PIPE_BUFFER, 0, 116 PIPE_BIND_SHADER_IMAGE)) 117 return false; 118 119 desc = util_format_description(dst_format); 120 121 /* Compute PBO addresses */ 122 addr.bytes_per_pixel = desc->block.bits / 8; 123 addr.xoffset = x; 124 addr.yoffset = y; 125 addr.width = width; 126 addr.height = height; 127 addr.depth = 1; 128 if (!st_pbo_addresses_pixelstore(st, GL_TEXTURE_2D, false, pack, pixels, &addr)) 129 return false; 130 131 cso_save_state(cso, (CSO_BIT_FRAGMENT_SAMPLER_VIEWS | 132 CSO_BIT_FRAGMENT_SAMPLERS | 133 CSO_BIT_FRAGMENT_IMAGE0 | 134 CSO_BIT_BLEND | 135 CSO_BIT_VERTEX_ELEMENTS | 136 CSO_BIT_AUX_VERTEX_BUFFER_SLOT | 137 CSO_BIT_FRAMEBUFFER | 138 CSO_BIT_VIEWPORT | 139 CSO_BIT_RASTERIZER | 140 CSO_BIT_DEPTH_STENCIL_ALPHA | 141 CSO_BIT_STREAM_OUTPUTS | 142 CSO_BIT_PAUSE_QUERIES | 143 CSO_BIT_SAMPLE_MASK | 144 CSO_BIT_MIN_SAMPLES | 145 CSO_BIT_RENDER_CONDITION | 146 CSO_BITS_ALL_SHADERS)); 147 cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT); 148 149 cso_set_sample_mask(cso, ~0); 150 cso_set_min_samples(cso, 1); 151 cso_set_render_condition(cso, NULL, FALSE, 0); 152 153 /* Set up the sampler_view */ 154 { 155 struct pipe_sampler_view templ; 156 struct pipe_sampler_view *sampler_view; 157 struct pipe_sampler_state sampler = {0}; 158 const struct pipe_sampler_state *samplers[1] = {&sampler}; 159 160 u_sampler_view_default_template(&templ, texture, src_format); 161 162 switch (texture->target) { 163 case PIPE_TEXTURE_CUBE: 164 case PIPE_TEXTURE_CUBE_ARRAY: 165 view_target = PIPE_TEXTURE_2D_ARRAY; 166 break; 167 default: 168 view_target = texture->target; 169 break; 170 } 171 172 templ.target = view_target; 173 templ.u.tex.first_level = surface->u.tex.level; 174 templ.u.tex.last_level = templ.u.tex.first_level; 175 176 if (view_target != PIPE_TEXTURE_3D) { 177 templ.u.tex.first_layer = surface->u.tex.first_layer; 178 templ.u.tex.last_layer = templ.u.tex.last_layer; 179 } else { 180 addr.constants.layer_offset = surface->u.tex.first_layer; 181 } 182 183 sampler_view = pipe->create_sampler_view(pipe, texture, &templ); 184 if (sampler_view == NULL) 185 goto fail; 186 187 cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 1, &sampler_view); 188 189 pipe_sampler_view_reference(&sampler_view, NULL); 190 191 cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 1, samplers); 192 } 193 194 /* Set up destination image */ 195 { 196 struct pipe_image_view image; 197 198 memset(&image, 0, sizeof(image)); 199 image.resource = addr.buffer; 200 image.format = dst_format; 201 image.access = PIPE_IMAGE_ACCESS_WRITE; 202 image.u.buf.offset = addr.first_element * addr.bytes_per_pixel; 203 image.u.buf.size = (addr.last_element - addr.first_element + 1) * 204 addr.bytes_per_pixel; 205 206 cso_set_shader_images(cso, PIPE_SHADER_FRAGMENT, 0, 1, &image); 207 } 208 209 /* Set up no-attachment framebuffer */ 210 memset(&fb, 0, sizeof(fb)); 211 fb.width = surface->width; 212 fb.height = surface->height; 213 fb.samples = 1; 214 fb.layers = 1; 215 cso_set_framebuffer(cso, &fb); 216 217 /* Any blend state would do. Set this just to prevent drivers having 218 * blend == NULL. 219 */ 220 cso_set_blend(cso, &st->pbo.upload_blend); 221 222 cso_set_viewport_dims(cso, fb.width, fb.height, invert_y); 223 224 if (invert_y) 225 st_pbo_addresses_invert_y(&addr, fb.height); 226 227 { 228 struct pipe_depth_stencil_alpha_state dsa; 229 memset(&dsa, 0, sizeof(dsa)); 230 cso_set_depth_stencil_alpha(cso, &dsa); 231 } 232 233 /* Set up the fragment shader */ 234 { 235 void *fs = st_pbo_get_download_fs(st, view_target, src_format, dst_format); 236 if (!fs) 237 goto fail; 238 239 cso_set_fragment_shader_handle(cso, fs); 240 } 241 242 success = st_pbo_draw(st, &addr, fb.width, fb.height); 243 244 /* Buffer written via shader images needs explicit synchronization. */ 245 pipe->memory_barrier(pipe, PIPE_BARRIER_ALL); 246 247 fail: 248 cso_restore_state(cso); 249 cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT); 250 251 return success; 252 } 253 254 /* Invalidate the readpixels cache to ensure we don't read stale data. 255 */ 256 void st_invalidate_readpix_cache(struct st_context *st) 257 { 258 pipe_resource_reference(&st->readpix_cache.src, NULL); 259 pipe_resource_reference(&st->readpix_cache.cache, NULL); 260 } 261 262 /** 263 * Create a staging texture and blit the requested region to it. 264 */ 265 static struct pipe_resource * 266 blit_to_staging(struct st_context *st, struct st_renderbuffer *strb, 267 bool invert_y, 268 GLint x, GLint y, GLsizei width, GLsizei height, 269 GLenum format, 270 enum pipe_format src_format, enum pipe_format dst_format) 271 { 272 struct pipe_context *pipe = st->pipe; 273 struct pipe_screen *screen = pipe->screen; 274 struct pipe_resource dst_templ; 275 struct pipe_resource *dst; 276 struct pipe_blit_info blit; 277 278 /* We are creating a texture of the size of the region being read back. 279 * Need to check for NPOT texture support. */ 280 if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) && 281 (!util_is_power_of_two(width) || 282 !util_is_power_of_two(height))) 283 return NULL; 284 285 /* create the destination texture */ 286 memset(&dst_templ, 0, sizeof(dst_templ)); 287 dst_templ.target = PIPE_TEXTURE_2D; 288 dst_templ.format = dst_format; 289 if (util_format_is_depth_or_stencil(dst_format)) 290 dst_templ.bind |= PIPE_BIND_DEPTH_STENCIL; 291 else 292 dst_templ.bind |= PIPE_BIND_RENDER_TARGET; 293 dst_templ.usage = PIPE_USAGE_STAGING; 294 295 st_gl_texture_dims_to_pipe_dims(GL_TEXTURE_2D, width, height, 1, 296 &dst_templ.width0, &dst_templ.height0, 297 &dst_templ.depth0, &dst_templ.array_size); 298 299 dst = screen->resource_create(screen, &dst_templ); 300 if (!dst) 301 return NULL; 302 303 memset(&blit, 0, sizeof(blit)); 304 blit.src.resource = strb->texture; 305 blit.src.level = strb->surface->u.tex.level; 306 blit.src.format = src_format; 307 blit.dst.resource = dst; 308 blit.dst.level = 0; 309 blit.dst.format = dst->format; 310 blit.src.box.x = x; 311 blit.dst.box.x = 0; 312 blit.src.box.y = y; 313 blit.dst.box.y = 0; 314 blit.src.box.z = strb->surface->u.tex.first_layer; 315 blit.dst.box.z = 0; 316 blit.src.box.width = blit.dst.box.width = width; 317 blit.src.box.height = blit.dst.box.height = height; 318 blit.src.box.depth = blit.dst.box.depth = 1; 319 blit.mask = st_get_blit_mask(strb->Base._BaseFormat, format); 320 blit.filter = PIPE_TEX_FILTER_NEAREST; 321 blit.scissor_enable = FALSE; 322 323 if (invert_y) { 324 blit.src.box.y = strb->Base.Height - blit.src.box.y; 325 blit.src.box.height = -blit.src.box.height; 326 } 327 328 /* blit */ 329 st->pipe->blit(st->pipe, &blit); 330 331 return dst; 332 } 333 334 static struct pipe_resource * 335 try_cached_readpixels(struct st_context *st, struct st_renderbuffer *strb, 336 bool invert_y, 337 GLsizei width, GLsizei height, 338 GLenum format, 339 enum pipe_format src_format, enum pipe_format dst_format) 340 { 341 struct pipe_resource *src = strb->texture; 342 struct pipe_resource *dst = NULL; 343 344 if (ST_DEBUG & DEBUG_NOREADPIXCACHE) 345 return NULL; 346 347 /* Reset cache after invalidation or switch of parameters. */ 348 if (st->readpix_cache.src != src || 349 st->readpix_cache.dst_format != dst_format || 350 st->readpix_cache.level != strb->surface->u.tex.level || 351 st->readpix_cache.layer != strb->surface->u.tex.first_layer) { 352 pipe_resource_reference(&st->readpix_cache.src, src); 353 pipe_resource_reference(&st->readpix_cache.cache, NULL); 354 st->readpix_cache.dst_format = dst_format; 355 st->readpix_cache.level = strb->surface->u.tex.level; 356 st->readpix_cache.layer = strb->surface->u.tex.first_layer; 357 st->readpix_cache.hits = 0; 358 } 359 360 /* Decide whether to trigger the cache. */ 361 if (!st->readpix_cache.cache) { 362 if (!strb->use_readpix_cache && !ALWAYS_READPIXELS_CACHE) { 363 /* Heuristic: If previous successive calls read at least a fraction 364 * of the surface _and_ we read again, trigger the cache. 365 */ 366 unsigned threshold = MAX2(1, strb->Base.Width * strb->Base.Height / 8); 367 368 if (st->readpix_cache.hits < threshold) { 369 st->readpix_cache.hits += width * height; 370 return NULL; 371 } 372 373 strb->use_readpix_cache = true; 374 } 375 376 /* Fill the cache */ 377 st->readpix_cache.cache = blit_to_staging(st, strb, invert_y, 378 0, 0, 379 strb->Base.Width, 380 strb->Base.Height, format, 381 src_format, dst_format); 382 } 383 384 /* Return an owning reference to stay consistent with the non-cached path */ 385 pipe_resource_reference(&dst, st->readpix_cache.cache); 386 387 return dst; 388 } 389 390 /** 391 * This uses a blit to copy the read buffer to a texture format which matches 392 * the format and type combo and then a fast read-back is done using memcpy. 393 * We can do arbitrary X/Y/Z/W/0/1 swizzling here as long as there is 394 * a format which matches the swizzling. 395 * 396 * If such a format isn't available, we fall back to _mesa_readpixels. 397 * 398 * NOTE: Some drivers use a blit to convert between tiled and linear 399 * texture layouts during texture uploads/downloads, so the blit 400 * we do here should be free in such cases. 401 */ 402 static void 403 st_ReadPixels(struct gl_context *ctx, GLint x, GLint y, 404 GLsizei width, GLsizei height, 405 GLenum format, GLenum type, 406 const struct gl_pixelstore_attrib *pack, 407 void *pixels) 408 { 409 struct st_context *st = st_context(ctx); 410 struct gl_renderbuffer *rb = 411 _mesa_get_read_renderbuffer_for_format(ctx, format); 412 struct st_renderbuffer *strb = st_renderbuffer(rb); 413 struct pipe_context *pipe = st->pipe; 414 struct pipe_screen *screen = pipe->screen; 415 struct pipe_resource *src; 416 struct pipe_resource *dst = NULL; 417 enum pipe_format dst_format, src_format; 418 unsigned bind; 419 struct pipe_transfer *tex_xfer; 420 ubyte *map = NULL; 421 int dst_x, dst_y; 422 423 /* Validate state (to be sure we have up-to-date framebuffer surfaces) 424 * and flush the bitmap cache prior to reading. */ 425 st_validate_state(st, ST_PIPELINE_RENDER); 426 st_flush_bitmap_cache(st); 427 428 if (!st->prefer_blit_based_texture_transfer) { 429 goto fallback; 430 } 431 432 /* This must be done after state validation. */ 433 src = strb->texture; 434 435 /* XXX Fallback for depth-stencil formats due to an incomplete 436 * stencil blit implementation in some drivers. */ 437 if (format == GL_DEPTH_STENCIL) { 438 goto fallback; 439 } 440 441 /* If the base internal format and the texture format don't match, we have 442 * to use the slow path. */ 443 if (rb->_BaseFormat != 444 _mesa_get_format_base_format(rb->Format)) { 445 goto fallback; 446 } 447 448 if (_mesa_readpixels_needs_slow_path(ctx, format, type, GL_TRUE)) { 449 goto fallback; 450 } 451 452 /* Convert the source format to what is expected by ReadPixels 453 * and see if it's supported. */ 454 src_format = util_format_linear(src->format); 455 src_format = util_format_luminance_to_red(src_format); 456 src_format = util_format_intensity_to_red(src_format); 457 458 if (!src_format || 459 !screen->is_format_supported(screen, src_format, src->target, 460 src->nr_samples, 461 PIPE_BIND_SAMPLER_VIEW)) { 462 goto fallback; 463 } 464 465 if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL) 466 bind = PIPE_BIND_DEPTH_STENCIL; 467 else 468 bind = PIPE_BIND_RENDER_TARGET; 469 470 /* Choose the destination format by finding the best match 471 * for the format+type combo. */ 472 dst_format = st_choose_matching_format(st, bind, format, type, 473 pack->SwapBytes); 474 if (dst_format == PIPE_FORMAT_NONE) { 475 goto fallback; 476 } 477 478 if (st->pbo.download_enabled && _mesa_is_bufferobj(pack->BufferObj)) { 479 if (try_pbo_readpixels(st, strb, 480 st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP, 481 x, y, width, height, 482 src_format, dst_format, 483 pack, pixels)) 484 return; 485 } 486 487 if (needs_integer_signed_unsigned_conversion(ctx, format, type)) { 488 goto fallback; 489 } 490 491 /* Cache a staging texture for back-to-back ReadPixels, to avoid CPU-GPU 492 * synchronization overhead. 493 */ 494 dst = try_cached_readpixels(st, strb, 495 st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP, 496 width, height, format, src_format, dst_format); 497 if (dst) { 498 dst_x = x; 499 dst_y = y; 500 } else { 501 /* See if the texture format already matches the format and type, 502 * in which case the memcpy-based fast path will likely be used and 503 * we don't have to blit. */ 504 if (_mesa_format_matches_format_and_type(rb->Format, format, 505 type, pack->SwapBytes, NULL)) { 506 goto fallback; 507 } 508 509 dst = blit_to_staging(st, strb, 510 st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP, 511 x, y, width, height, format, 512 src_format, dst_format); 513 if (!dst) 514 goto fallback; 515 516 dst_x = 0; 517 dst_y = 0; 518 } 519 520 /* map resources */ 521 pixels = _mesa_map_pbo_dest(ctx, pack, pixels); 522 523 map = pipe_transfer_map_3d(pipe, dst, 0, PIPE_TRANSFER_READ, 524 dst_x, dst_y, 0, width, height, 1, &tex_xfer); 525 if (!map) { 526 _mesa_unmap_pbo_dest(ctx, pack); 527 pipe_resource_reference(&dst, NULL); 528 goto fallback; 529 } 530 531 /* memcpy data into a user buffer */ 532 { 533 const uint bytesPerRow = width * util_format_get_blocksize(dst_format); 534 const int destStride = _mesa_image_row_stride(pack, width, format, type); 535 char *dest = _mesa_image_address2d(pack, pixels, 536 width, height, format, 537 type, 0, 0); 538 539 if (tex_xfer->stride == bytesPerRow && destStride == bytesPerRow) { 540 memcpy(dest, map, bytesPerRow * height); 541 } else { 542 GLuint row; 543 544 for (row = 0; row < (unsigned) height; row++) { 545 memcpy(dest, map, bytesPerRow); 546 map += tex_xfer->stride; 547 dest += destStride; 548 } 549 } 550 } 551 552 pipe_transfer_unmap(pipe, tex_xfer); 553 _mesa_unmap_pbo_dest(ctx, pack); 554 pipe_resource_reference(&dst, NULL); 555 return; 556 557 fallback: 558 _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels); 559 } 560 561 void st_init_readpixels_functions(struct dd_function_table *functions) 562 { 563 functions->ReadPixels = st_ReadPixels; 564 } 565