1 /************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 #include <limits.h> 29 #include "util/u_memory.h" 30 #include "util/u_math.h" 31 #include "util/u_rect.h" 32 #include "util/u_surface.h" 33 #include "util/u_pack_color.h" 34 #include "util/u_string.h" 35 #include "util/u_thread.h" 36 37 #include "util/os_time.h" 38 39 #include "lp_scene_queue.h" 40 #include "lp_context.h" 41 #include "lp_debug.h" 42 #include "lp_fence.h" 43 #include "lp_perf.h" 44 #include "lp_query.h" 45 #include "lp_rast.h" 46 #include "lp_rast_priv.h" 47 #include "gallivm/lp_bld_format.h" 48 #include "gallivm/lp_bld_debug.h" 49 #include "lp_scene.h" 50 #include "lp_tex_sample.h" 51 52 53 #ifdef DEBUG 54 int jit_line = 0; 55 const struct lp_rast_state *jit_state = NULL; 56 const struct lp_rasterizer_task *jit_task = NULL; 57 #endif 58 59 60 /** 61 * Begin rasterizing a scene. 62 * Called once per scene by one thread. 63 */ 64 static void 65 lp_rast_begin( struct lp_rasterizer *rast, 66 struct lp_scene *scene ) 67 { 68 rast->curr_scene = scene; 69 70 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 71 72 lp_scene_begin_rasterization( scene ); 73 lp_scene_bin_iter_begin( scene ); 74 } 75 76 77 static void 78 lp_rast_end( struct lp_rasterizer *rast ) 79 { 80 lp_scene_end_rasterization( rast->curr_scene ); 81 82 rast->curr_scene = NULL; 83 } 84 85 86 /** 87 * Beginning rasterization of a tile. 88 * \param x window X position of the tile, in pixels 89 * \param y window Y position of the tile, in pixels 90 */ 91 static void 92 lp_rast_tile_begin(struct lp_rasterizer_task *task, 93 const struct cmd_bin *bin, 94 int x, int y) 95 { 96 unsigned i; 97 struct lp_scene *scene = task->scene; 98 99 LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); 100 101 task->bin = bin; 102 task->x = x * TILE_SIZE; 103 task->y = y * TILE_SIZE; 104 task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ? 105 task->scene->fb.width - x * TILE_SIZE : TILE_SIZE; 106 task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ? 107 task->scene->fb.height - y * TILE_SIZE : TILE_SIZE; 108 109 task->thread_data.vis_counter = 0; 110 task->ps_invocations = 0; 111 112 for (i = 0; i < task->scene->fb.nr_cbufs; i++) { 113 if (task->scene->fb.cbufs[i]) { 114 task->color_tiles[i] = scene->cbufs[i].map + 115 scene->cbufs[i].stride * task->y + 116 scene->cbufs[i].format_bytes * task->x; 117 } 118 } 119 if (task->scene->fb.zsbuf) { 120 task->depth_tile = scene->zsbuf.map + 121 scene->zsbuf.stride * task->y + 122 scene->zsbuf.format_bytes * task->x; 123 } 124 } 125 126 127 /** 128 * Clear the rasterizer's current color tile. 129 * This is a bin command called during bin processing. 130 * Clear commands always clear all bound layers. 131 */ 132 static void 133 lp_rast_clear_color(struct lp_rasterizer_task *task, 134 const union lp_rast_cmd_arg arg) 135 { 136 const struct lp_scene *scene = task->scene; 137 unsigned cbuf = arg.clear_rb->cbuf; 138 union util_color uc; 139 enum pipe_format format; 140 141 /* we never bin clear commands for non-existing buffers */ 142 assert(cbuf < scene->fb.nr_cbufs); 143 assert(scene->fb.cbufs[cbuf]); 144 145 format = scene->fb.cbufs[cbuf]->format; 146 uc = arg.clear_rb->color_val; 147 148 /* 149 * this is pretty rough since we have target format (bunch of bytes...) here. 150 * dump it as raw 4 dwords. 151 */ 152 LP_DBG(DEBUG_RAST, "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n", 153 __FUNCTION__, format, uc.ui[0], uc.ui[1], uc.ui[2], uc.ui[3]); 154 155 156 util_fill_box(scene->cbufs[cbuf].map, 157 format, 158 scene->cbufs[cbuf].stride, 159 scene->cbufs[cbuf].layer_stride, 160 task->x, 161 task->y, 162 0, 163 task->width, 164 task->height, 165 scene->fb_max_layer + 1, 166 &uc); 167 168 /* this will increase for each rb which probably doesn't mean much */ 169 LP_COUNT(nr_color_tile_clear); 170 } 171 172 173 /** 174 * Clear the rasterizer's current z/stencil tile. 175 * This is a bin command called during bin processing. 176 * Clear commands always clear all bound layers. 177 */ 178 static void 179 lp_rast_clear_zstencil(struct lp_rasterizer_task *task, 180 const union lp_rast_cmd_arg arg) 181 { 182 const struct lp_scene *scene = task->scene; 183 uint64_t clear_value64 = arg.clear_zstencil.value; 184 uint64_t clear_mask64 = arg.clear_zstencil.mask; 185 uint32_t clear_value = (uint32_t) clear_value64; 186 uint32_t clear_mask = (uint32_t) clear_mask64; 187 const unsigned height = task->height; 188 const unsigned width = task->width; 189 const unsigned dst_stride = scene->zsbuf.stride; 190 uint8_t *dst; 191 unsigned i, j; 192 unsigned block_size; 193 194 LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n", 195 __FUNCTION__, clear_value, clear_mask); 196 197 /* 198 * Clear the area of the depth/depth buffer matching this tile. 199 */ 200 201 if (scene->fb.zsbuf) { 202 unsigned layer; 203 uint8_t *dst_layer = task->depth_tile; 204 block_size = util_format_get_blocksize(scene->fb.zsbuf->format); 205 206 clear_value &= clear_mask; 207 208 for (layer = 0; layer <= scene->fb_max_layer; layer++) { 209 dst = dst_layer; 210 211 switch (block_size) { 212 case 1: 213 assert(clear_mask == 0xff); 214 memset(dst, (uint8_t) clear_value, height * width); 215 break; 216 case 2: 217 if (clear_mask == 0xffff) { 218 for (i = 0; i < height; i++) { 219 uint16_t *row = (uint16_t *)dst; 220 for (j = 0; j < width; j++) 221 *row++ = (uint16_t) clear_value; 222 dst += dst_stride; 223 } 224 } 225 else { 226 for (i = 0; i < height; i++) { 227 uint16_t *row = (uint16_t *)dst; 228 for (j = 0; j < width; j++) { 229 uint16_t tmp = ~clear_mask & *row; 230 *row++ = clear_value | tmp; 231 } 232 dst += dst_stride; 233 } 234 } 235 break; 236 case 4: 237 if (clear_mask == 0xffffffff) { 238 for (i = 0; i < height; i++) { 239 uint32_t *row = (uint32_t *)dst; 240 for (j = 0; j < width; j++) 241 *row++ = clear_value; 242 dst += dst_stride; 243 } 244 } 245 else { 246 for (i = 0; i < height; i++) { 247 uint32_t *row = (uint32_t *)dst; 248 for (j = 0; j < width; j++) { 249 uint32_t tmp = ~clear_mask & *row; 250 *row++ = clear_value | tmp; 251 } 252 dst += dst_stride; 253 } 254 } 255 break; 256 case 8: 257 clear_value64 &= clear_mask64; 258 if (clear_mask64 == 0xffffffffffULL) { 259 for (i = 0; i < height; i++) { 260 uint64_t *row = (uint64_t *)dst; 261 for (j = 0; j < width; j++) 262 *row++ = clear_value64; 263 dst += dst_stride; 264 } 265 } 266 else { 267 for (i = 0; i < height; i++) { 268 uint64_t *row = (uint64_t *)dst; 269 for (j = 0; j < width; j++) { 270 uint64_t tmp = ~clear_mask64 & *row; 271 *row++ = clear_value64 | tmp; 272 } 273 dst += dst_stride; 274 } 275 } 276 break; 277 278 default: 279 assert(0); 280 break; 281 } 282 dst_layer += scene->zsbuf.layer_stride; 283 } 284 } 285 } 286 287 288 289 /** 290 * Run the shader on all blocks in a tile. This is used when a tile is 291 * completely contained inside a triangle. 292 * This is a bin command called during bin processing. 293 */ 294 static void 295 lp_rast_shade_tile(struct lp_rasterizer_task *task, 296 const union lp_rast_cmd_arg arg) 297 { 298 const struct lp_scene *scene = task->scene; 299 const struct lp_rast_shader_inputs *inputs = arg.shade_tile; 300 const struct lp_rast_state *state; 301 struct lp_fragment_shader_variant *variant; 302 const unsigned tile_x = task->x, tile_y = task->y; 303 unsigned x, y; 304 305 if (inputs->disable) { 306 /* This command was partially binned and has been disabled */ 307 return; 308 } 309 310 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 311 312 state = task->state; 313 assert(state); 314 if (!state) { 315 return; 316 } 317 variant = state->variant; 318 319 /* render the whole 64x64 tile in 4x4 chunks */ 320 for (y = 0; y < task->height; y += 4){ 321 for (x = 0; x < task->width; x += 4) { 322 uint8_t *color[PIPE_MAX_COLOR_BUFS]; 323 unsigned stride[PIPE_MAX_COLOR_BUFS]; 324 uint8_t *depth = NULL; 325 unsigned depth_stride = 0; 326 unsigned i; 327 328 /* color buffer */ 329 for (i = 0; i < scene->fb.nr_cbufs; i++){ 330 if (scene->fb.cbufs[i]) { 331 stride[i] = scene->cbufs[i].stride; 332 color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x, 333 tile_y + y, inputs->layer); 334 } 335 else { 336 stride[i] = 0; 337 color[i] = NULL; 338 } 339 } 340 341 /* depth buffer */ 342 if (scene->zsbuf.map) { 343 depth = lp_rast_get_depth_block_pointer(task, tile_x + x, 344 tile_y + y, inputs->layer); 345 depth_stride = scene->zsbuf.stride; 346 } 347 348 /* Propagate non-interpolated raster state. */ 349 task->thread_data.raster_state.viewport_index = inputs->viewport_index; 350 351 /* run shader on 4x4 block */ 352 BEGIN_JIT_CALL(state, task); 353 variant->jit_function[RAST_WHOLE]( &state->jit_context, 354 tile_x + x, tile_y + y, 355 inputs->frontfacing, 356 GET_A0(inputs), 357 GET_DADX(inputs), 358 GET_DADY(inputs), 359 color, 360 depth, 361 0xffff, 362 &task->thread_data, 363 stride, 364 depth_stride); 365 END_JIT_CALL(); 366 } 367 } 368 } 369 370 371 /** 372 * Run the shader on all blocks in a tile. This is used when a tile is 373 * completely contained inside a triangle, and the shader is opaque. 374 * This is a bin command called during bin processing. 375 */ 376 static void 377 lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task, 378 const union lp_rast_cmd_arg arg) 379 { 380 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 381 382 assert(task->state); 383 if (!task->state) { 384 return; 385 } 386 387 lp_rast_shade_tile(task, arg); 388 } 389 390 391 /** 392 * Compute shading for a 4x4 block of pixels inside a triangle. 393 * This is a bin command called during bin processing. 394 * \param x X position of quad in window coords 395 * \param y Y position of quad in window coords 396 */ 397 void 398 lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, 399 const struct lp_rast_shader_inputs *inputs, 400 unsigned x, unsigned y, 401 unsigned mask) 402 { 403 const struct lp_rast_state *state = task->state; 404 struct lp_fragment_shader_variant *variant = state->variant; 405 const struct lp_scene *scene = task->scene; 406 uint8_t *color[PIPE_MAX_COLOR_BUFS]; 407 unsigned stride[PIPE_MAX_COLOR_BUFS]; 408 uint8_t *depth = NULL; 409 unsigned depth_stride = 0; 410 unsigned i; 411 412 assert(state); 413 414 /* Sanity checks */ 415 assert(x < scene->tiles_x * TILE_SIZE); 416 assert(y < scene->tiles_y * TILE_SIZE); 417 assert(x % TILE_VECTOR_WIDTH == 0); 418 assert(y % TILE_VECTOR_HEIGHT == 0); 419 420 assert((x % 4) == 0); 421 assert((y % 4) == 0); 422 423 /* color buffer */ 424 for (i = 0; i < scene->fb.nr_cbufs; i++) { 425 if (scene->fb.cbufs[i]) { 426 stride[i] = scene->cbufs[i].stride; 427 color[i] = lp_rast_get_color_block_pointer(task, i, x, y, 428 inputs->layer); 429 } 430 else { 431 stride[i] = 0; 432 color[i] = NULL; 433 } 434 } 435 436 /* depth buffer */ 437 if (scene->zsbuf.map) { 438 depth_stride = scene->zsbuf.stride; 439 depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer); 440 } 441 442 assert(lp_check_alignment(state->jit_context.u8_blend_color, 16)); 443 444 /* 445 * The rasterizer may produce fragments outside our 446 * allocated 4x4 blocks hence need to filter them out here. 447 */ 448 if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { 449 /* not very accurate would need a popcount on the mask */ 450 /* always count this not worth bothering? */ 451 task->ps_invocations += 1 * variant->ps_inv_multiplier; 452 453 /* Propagate non-interpolated raster state. */ 454 task->thread_data.raster_state.viewport_index = inputs->viewport_index; 455 456 /* run shader on 4x4 block */ 457 BEGIN_JIT_CALL(state, task); 458 variant->jit_function[RAST_EDGE_TEST](&state->jit_context, 459 x, y, 460 inputs->frontfacing, 461 GET_A0(inputs), 462 GET_DADX(inputs), 463 GET_DADY(inputs), 464 color, 465 depth, 466 mask, 467 &task->thread_data, 468 stride, 469 depth_stride); 470 END_JIT_CALL(); 471 } 472 } 473 474 475 476 /** 477 * Begin a new occlusion query. 478 * This is a bin command put in all bins. 479 * Called per thread. 480 */ 481 static void 482 lp_rast_begin_query(struct lp_rasterizer_task *task, 483 const union lp_rast_cmd_arg arg) 484 { 485 struct llvmpipe_query *pq = arg.query_obj; 486 487 switch (pq->type) { 488 case PIPE_QUERY_OCCLUSION_COUNTER: 489 case PIPE_QUERY_OCCLUSION_PREDICATE: 490 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 491 pq->start[task->thread_index] = task->thread_data.vis_counter; 492 break; 493 case PIPE_QUERY_PIPELINE_STATISTICS: 494 pq->start[task->thread_index] = task->ps_invocations; 495 break; 496 default: 497 assert(0); 498 break; 499 } 500 } 501 502 503 /** 504 * End the current occlusion query. 505 * This is a bin command put in all bins. 506 * Called per thread. 507 */ 508 static void 509 lp_rast_end_query(struct lp_rasterizer_task *task, 510 const union lp_rast_cmd_arg arg) 511 { 512 struct llvmpipe_query *pq = arg.query_obj; 513 514 switch (pq->type) { 515 case PIPE_QUERY_OCCLUSION_COUNTER: 516 case PIPE_QUERY_OCCLUSION_PREDICATE: 517 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 518 pq->end[task->thread_index] += 519 task->thread_data.vis_counter - pq->start[task->thread_index]; 520 pq->start[task->thread_index] = 0; 521 break; 522 case PIPE_QUERY_TIMESTAMP: 523 pq->end[task->thread_index] = os_time_get_nano(); 524 break; 525 case PIPE_QUERY_PIPELINE_STATISTICS: 526 pq->end[task->thread_index] += 527 task->ps_invocations - pq->start[task->thread_index]; 528 pq->start[task->thread_index] = 0; 529 break; 530 default: 531 assert(0); 532 break; 533 } 534 } 535 536 537 void 538 lp_rast_set_state(struct lp_rasterizer_task *task, 539 const union lp_rast_cmd_arg arg) 540 { 541 task->state = arg.state; 542 } 543 544 545 546 /** 547 * Called when we're done writing to a color tile. 548 */ 549 static void 550 lp_rast_tile_end(struct lp_rasterizer_task *task) 551 { 552 unsigned i; 553 554 for (i = 0; i < task->scene->num_active_queries; ++i) { 555 lp_rast_end_query(task, lp_rast_arg_query(task->scene->active_queries[i])); 556 } 557 558 /* debug */ 559 memset(task->color_tiles, 0, sizeof(task->color_tiles)); 560 task->depth_tile = NULL; 561 562 task->bin = NULL; 563 } 564 565 static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = 566 { 567 lp_rast_clear_color, 568 lp_rast_clear_zstencil, 569 lp_rast_triangle_1, 570 lp_rast_triangle_2, 571 lp_rast_triangle_3, 572 lp_rast_triangle_4, 573 lp_rast_triangle_5, 574 lp_rast_triangle_6, 575 lp_rast_triangle_7, 576 lp_rast_triangle_8, 577 lp_rast_triangle_3_4, 578 lp_rast_triangle_3_16, 579 lp_rast_triangle_4_16, 580 lp_rast_shade_tile, 581 lp_rast_shade_tile_opaque, 582 lp_rast_begin_query, 583 lp_rast_end_query, 584 lp_rast_set_state, 585 lp_rast_triangle_32_1, 586 lp_rast_triangle_32_2, 587 lp_rast_triangle_32_3, 588 lp_rast_triangle_32_4, 589 lp_rast_triangle_32_5, 590 lp_rast_triangle_32_6, 591 lp_rast_triangle_32_7, 592 lp_rast_triangle_32_8, 593 lp_rast_triangle_32_3_4, 594 lp_rast_triangle_32_3_16, 595 lp_rast_triangle_32_4_16 596 }; 597 598 599 static void 600 do_rasterize_bin(struct lp_rasterizer_task *task, 601 const struct cmd_bin *bin, 602 int x, int y) 603 { 604 const struct cmd_block *block; 605 unsigned k; 606 607 if (0) 608 lp_debug_bin(bin, x, y); 609 610 for (block = bin->head; block; block = block->next) { 611 for (k = 0; k < block->count; k++) { 612 dispatch[block->cmd[k]]( task, block->arg[k] ); 613 } 614 } 615 } 616 617 618 619 /** 620 * Rasterize commands for a single bin. 621 * \param x, y position of the bin's tile in the framebuffer 622 * Must be called between lp_rast_begin() and lp_rast_end(). 623 * Called per thread. 624 */ 625 static void 626 rasterize_bin(struct lp_rasterizer_task *task, 627 const struct cmd_bin *bin, int x, int y ) 628 { 629 lp_rast_tile_begin( task, bin, x, y ); 630 631 do_rasterize_bin(task, bin, x, y); 632 633 lp_rast_tile_end(task); 634 635 636 /* Debug/Perf flags: 637 */ 638 if (bin->head->count == 1) { 639 if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE) 640 LP_COUNT(nr_pure_shade_opaque_64); 641 else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE) 642 LP_COUNT(nr_pure_shade_64); 643 } 644 } 645 646 647 /* An empty bin is one that just loads the contents of the tile and 648 * stores them again unchanged. This typically happens when bins have 649 * been flushed for some reason in the middle of a frame, or when 650 * incremental updates are being made to a render target. 651 * 652 * Try to avoid doing pointless work in this case. 653 */ 654 static boolean 655 is_empty_bin( const struct cmd_bin *bin ) 656 { 657 return bin->head == NULL; 658 } 659 660 661 /** 662 * Rasterize/execute all bins within a scene. 663 * Called per thread. 664 */ 665 static void 666 rasterize_scene(struct lp_rasterizer_task *task, 667 struct lp_scene *scene) 668 { 669 task->scene = scene; 670 671 /* Clear the cache tags. This should not always be necessary but 672 simpler for now. */ 673 #if LP_USE_TEXTURE_CACHE 674 memset(task->thread_data.cache->cache_tags, 0, 675 sizeof(task->thread_data.cache->cache_tags)); 676 #if LP_BUILD_FORMAT_CACHE_DEBUG 677 task->thread_data.cache->cache_access_total = 0; 678 task->thread_data.cache->cache_access_miss = 0; 679 #endif 680 #endif 681 682 if (!task->rast->no_rast && !scene->discard) { 683 /* loop over scene bins, rasterize each */ 684 { 685 struct cmd_bin *bin; 686 int i, j; 687 688 assert(scene); 689 while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) { 690 if (!is_empty_bin( bin )) 691 rasterize_bin(task, bin, i, j); 692 } 693 } 694 } 695 696 697 #if LP_BUILD_FORMAT_CACHE_DEBUG 698 { 699 uint64_t total, miss; 700 total = task->thread_data.cache->cache_access_total; 701 miss = task->thread_data.cache->cache_access_miss; 702 if (total) { 703 debug_printf("thread %d cache access %llu miss %llu hit rate %f\n", 704 task->thread_index, (long long unsigned)total, 705 (long long unsigned)miss, 706 (float)(total - miss)/(float)total); 707 } 708 } 709 #endif 710 711 if (scene->fence) { 712 lp_fence_signal(scene->fence); 713 } 714 715 task->scene = NULL; 716 } 717 718 719 /** 720 * Called by setup module when it has something for us to render. 721 */ 722 void 723 lp_rast_queue_scene( struct lp_rasterizer *rast, 724 struct lp_scene *scene) 725 { 726 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); 727 728 if (rast->num_threads == 0) { 729 /* no threading */ 730 unsigned fpstate = util_fpstate_get(); 731 732 /* Make sure that denorms are treated like zeros. This is 733 * the behavior required by D3D10. OpenGL doesn't care. 734 */ 735 util_fpstate_set_denorms_to_zero(fpstate); 736 737 lp_rast_begin( rast, scene ); 738 739 rasterize_scene( &rast->tasks[0], scene ); 740 741 lp_rast_end( rast ); 742 743 util_fpstate_set(fpstate); 744 745 rast->curr_scene = NULL; 746 } 747 else { 748 /* threaded rendering! */ 749 unsigned i; 750 751 lp_scene_enqueue( rast->full_scenes, scene ); 752 753 /* signal the threads that there's work to do */ 754 for (i = 0; i < rast->num_threads; i++) { 755 pipe_semaphore_signal(&rast->tasks[i].work_ready); 756 } 757 } 758 759 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); 760 } 761 762 763 void 764 lp_rast_finish( struct lp_rasterizer *rast ) 765 { 766 if (rast->num_threads == 0) { 767 /* nothing to do */ 768 } 769 else { 770 int i; 771 772 /* wait for work to complete */ 773 for (i = 0; i < rast->num_threads; i++) { 774 pipe_semaphore_wait(&rast->tasks[i].work_done); 775 } 776 } 777 } 778 779 780 /** 781 * This is the thread's main entrypoint. 782 * It's a simple loop: 783 * 1. wait for work 784 * 2. do work 785 * 3. signal that we're done 786 */ 787 static int 788 thread_function(void *init_data) 789 { 790 struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data; 791 struct lp_rasterizer *rast = task->rast; 792 boolean debug = false; 793 char thread_name[16]; 794 unsigned fpstate; 795 796 util_snprintf(thread_name, sizeof thread_name, "llvmpipe-%u", task->thread_index); 797 u_thread_setname(thread_name); 798 799 /* Make sure that denorms are treated like zeros. This is 800 * the behavior required by D3D10. OpenGL doesn't care. 801 */ 802 fpstate = util_fpstate_get(); 803 util_fpstate_set_denorms_to_zero(fpstate); 804 805 while (1) { 806 /* wait for work */ 807 if (debug) 808 debug_printf("thread %d waiting for work\n", task->thread_index); 809 pipe_semaphore_wait(&task->work_ready); 810 811 if (rast->exit_flag) 812 break; 813 814 if (task->thread_index == 0) { 815 /* thread[0]: 816 * - get next scene to rasterize 817 * - map the framebuffer surfaces 818 */ 819 lp_rast_begin( rast, 820 lp_scene_dequeue( rast->full_scenes, TRUE ) ); 821 } 822 823 /* Wait for all threads to get here so that threads[1+] don't 824 * get a null rast->curr_scene pointer. 825 */ 826 util_barrier_wait( &rast->barrier ); 827 828 /* do work */ 829 if (debug) 830 debug_printf("thread %d doing work\n", task->thread_index); 831 832 rasterize_scene(task, 833 rast->curr_scene); 834 835 /* wait for all threads to finish with this scene */ 836 util_barrier_wait( &rast->barrier ); 837 838 /* XXX: shouldn't be necessary: 839 */ 840 if (task->thread_index == 0) { 841 lp_rast_end( rast ); 842 } 843 844 /* signal done with work */ 845 if (debug) 846 debug_printf("thread %d done working\n", task->thread_index); 847 848 pipe_semaphore_signal(&task->work_done); 849 } 850 851 #ifdef _WIN32 852 pipe_semaphore_signal(&task->work_done); 853 #endif 854 855 return 0; 856 } 857 858 859 /** 860 * Initialize semaphores and spawn the threads. 861 */ 862 static void 863 create_rast_threads(struct lp_rasterizer *rast) 864 { 865 unsigned i; 866 867 /* NOTE: if num_threads is zero, we won't use any threads */ 868 for (i = 0; i < rast->num_threads; i++) { 869 pipe_semaphore_init(&rast->tasks[i].work_ready, 0); 870 pipe_semaphore_init(&rast->tasks[i].work_done, 0); 871 rast->threads[i] = u_thread_create(thread_function, 872 (void *) &rast->tasks[i]); 873 } 874 } 875 876 877 878 /** 879 * Create new lp_rasterizer. If num_threads is zero, don't create any 880 * new threads, do rendering synchronously. 881 * \param num_threads number of rasterizer threads to create 882 */ 883 struct lp_rasterizer * 884 lp_rast_create( unsigned num_threads ) 885 { 886 struct lp_rasterizer *rast; 887 unsigned i; 888 889 rast = CALLOC_STRUCT(lp_rasterizer); 890 if (!rast) { 891 goto no_rast; 892 } 893 894 rast->full_scenes = lp_scene_queue_create(); 895 if (!rast->full_scenes) { 896 goto no_full_scenes; 897 } 898 899 for (i = 0; i < MAX2(1, num_threads); i++) { 900 struct lp_rasterizer_task *task = &rast->tasks[i]; 901 task->rast = rast; 902 task->thread_index = i; 903 task->thread_data.cache = align_malloc(sizeof(struct lp_build_format_cache), 904 16); 905 if (!task->thread_data.cache) { 906 goto no_thread_data_cache; 907 } 908 } 909 910 rast->num_threads = num_threads; 911 912 rast->no_rast = debug_get_bool_option("LP_NO_RAST", FALSE); 913 914 create_rast_threads(rast); 915 916 /* for synchronizing rasterization threads */ 917 if (rast->num_threads > 0) { 918 util_barrier_init( &rast->barrier, rast->num_threads ); 919 } 920 921 memset(lp_dummy_tile, 0, sizeof lp_dummy_tile); 922 923 return rast; 924 925 no_thread_data_cache: 926 for (i = 0; i < MAX2(1, rast->num_threads); i++) { 927 if (rast->tasks[i].thread_data.cache) { 928 align_free(rast->tasks[i].thread_data.cache); 929 } 930 } 931 932 lp_scene_queue_destroy(rast->full_scenes); 933 no_full_scenes: 934 FREE(rast); 935 no_rast: 936 return NULL; 937 } 938 939 940 /* Shutdown: 941 */ 942 void lp_rast_destroy( struct lp_rasterizer *rast ) 943 { 944 unsigned i; 945 946 /* Set exit_flag and signal each thread's work_ready semaphore. 947 * Each thread will be woken up, notice that the exit_flag is set and 948 * break out of its main loop. The thread will then exit. 949 */ 950 rast->exit_flag = TRUE; 951 for (i = 0; i < rast->num_threads; i++) { 952 pipe_semaphore_signal(&rast->tasks[i].work_ready); 953 } 954 955 /* Wait for threads to terminate before cleaning up per-thread data. 956 * We don't actually call pipe_thread_wait to avoid dead lock on Windows 957 * per https://bugs.freedesktop.org/show_bug.cgi?id=76252 */ 958 for (i = 0; i < rast->num_threads; i++) { 959 #ifdef _WIN32 960 pipe_semaphore_wait(&rast->tasks[i].work_done); 961 #else 962 thrd_join(rast->threads[i], NULL); 963 #endif 964 } 965 966 /* Clean up per-thread data */ 967 for (i = 0; i < rast->num_threads; i++) { 968 pipe_semaphore_destroy(&rast->tasks[i].work_ready); 969 pipe_semaphore_destroy(&rast->tasks[i].work_done); 970 } 971 for (i = 0; i < MAX2(1, rast->num_threads); i++) { 972 align_free(rast->tasks[i].thread_data.cache); 973 } 974 975 /* for synchronizing rasterization threads */ 976 if (rast->num_threads > 0) { 977 util_barrier_destroy( &rast->barrier ); 978 } 979 980 lp_scene_queue_destroy(rast->full_scenes); 981 982 FREE(rast); 983 } 984 985 986