1 /************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 #include <limits.h> 29 #include "util/u_memory.h" 30 #include "util/u_math.h" 31 #include "util/u_rect.h" 32 #include "util/u_surface.h" 33 #include "util/u_pack_color.h" 34 #include "util/u_string.h" 35 36 #include "os/os_time.h" 37 38 #include "lp_scene_queue.h" 39 #include "lp_context.h" 40 #include "lp_debug.h" 41 #include "lp_fence.h" 42 #include "lp_perf.h" 43 #include "lp_query.h" 44 #include "lp_rast.h" 45 #include "lp_rast_priv.h" 46 #include "gallivm/lp_bld_format.h" 47 #include "gallivm/lp_bld_debug.h" 48 #include "lp_scene.h" 49 #include "lp_tex_sample.h" 50 51 52 #ifdef DEBUG 53 int jit_line = 0; 54 const struct lp_rast_state *jit_state = NULL; 55 const struct lp_rasterizer_task *jit_task = NULL; 56 #endif 57 58 59 /** 60 * Begin rasterizing a scene. 61 * Called once per scene by one thread. 62 */ 63 static void 64 lp_rast_begin( struct lp_rasterizer *rast, 65 struct lp_scene *scene ) 66 { 67 rast->curr_scene = scene; 68 69 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 70 71 lp_scene_begin_rasterization( scene ); 72 lp_scene_bin_iter_begin( scene ); 73 } 74 75 76 static void 77 lp_rast_end( struct lp_rasterizer *rast ) 78 { 79 lp_scene_end_rasterization( rast->curr_scene ); 80 81 rast->curr_scene = NULL; 82 } 83 84 85 /** 86 * Beginning rasterization of a tile. 87 * \param x window X position of the tile, in pixels 88 * \param y window Y position of the tile, in pixels 89 */ 90 static void 91 lp_rast_tile_begin(struct lp_rasterizer_task *task, 92 const struct cmd_bin *bin, 93 int x, int y) 94 { 95 unsigned i; 96 struct lp_scene *scene = task->scene; 97 98 LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); 99 100 task->bin = bin; 101 task->x = x * TILE_SIZE; 102 task->y = y * TILE_SIZE; 103 task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ? 104 task->scene->fb.width - x * TILE_SIZE : TILE_SIZE; 105 task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ? 106 task->scene->fb.height - y * TILE_SIZE : TILE_SIZE; 107 108 task->thread_data.vis_counter = 0; 109 task->ps_invocations = 0; 110 111 for (i = 0; i < task->scene->fb.nr_cbufs; i++) { 112 if (task->scene->fb.cbufs[i]) { 113 task->color_tiles[i] = scene->cbufs[i].map + 114 scene->cbufs[i].stride * task->y + 115 scene->cbufs[i].format_bytes * task->x; 116 } 117 } 118 if (task->scene->fb.zsbuf) { 119 task->depth_tile = scene->zsbuf.map + 120 scene->zsbuf.stride * task->y + 121 scene->zsbuf.format_bytes * task->x; 122 } 123 } 124 125 126 /** 127 * Clear the rasterizer's current color tile. 128 * This is a bin command called during bin processing. 129 * Clear commands always clear all bound layers. 130 */ 131 static void 132 lp_rast_clear_color(struct lp_rasterizer_task *task, 133 const union lp_rast_cmd_arg arg) 134 { 135 const struct lp_scene *scene = task->scene; 136 unsigned cbuf = arg.clear_rb->cbuf; 137 union util_color uc; 138 enum pipe_format format; 139 140 /* we never bin clear commands for non-existing buffers */ 141 assert(cbuf < scene->fb.nr_cbufs); 142 assert(scene->fb.cbufs[cbuf]); 143 144 format = scene->fb.cbufs[cbuf]->format; 145 uc = arg.clear_rb->color_val; 146 147 /* 148 * this is pretty rough since we have target format (bunch of bytes...) here. 149 * dump it as raw 4 dwords. 150 */ 151 LP_DBG(DEBUG_RAST, "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n", 152 __FUNCTION__, format, uc.ui[0], uc.ui[1], uc.ui[2], uc.ui[3]); 153 154 155 util_fill_box(scene->cbufs[cbuf].map, 156 format, 157 scene->cbufs[cbuf].stride, 158 scene->cbufs[cbuf].layer_stride, 159 task->x, 160 task->y, 161 0, 162 task->width, 163 task->height, 164 scene->fb_max_layer + 1, 165 &uc); 166 167 /* this will increase for each rb which probably doesn't mean much */ 168 LP_COUNT(nr_color_tile_clear); 169 } 170 171 172 /** 173 * Clear the rasterizer's current z/stencil tile. 174 * This is a bin command called during bin processing. 175 * Clear commands always clear all bound layers. 176 */ 177 static void 178 lp_rast_clear_zstencil(struct lp_rasterizer_task *task, 179 const union lp_rast_cmd_arg arg) 180 { 181 const struct lp_scene *scene = task->scene; 182 uint64_t clear_value64 = arg.clear_zstencil.value; 183 uint64_t clear_mask64 = arg.clear_zstencil.mask; 184 uint32_t clear_value = (uint32_t) clear_value64; 185 uint32_t clear_mask = (uint32_t) clear_mask64; 186 const unsigned height = task->height; 187 const unsigned width = task->width; 188 const unsigned dst_stride = scene->zsbuf.stride; 189 uint8_t *dst; 190 unsigned i, j; 191 unsigned block_size; 192 193 LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n", 194 __FUNCTION__, clear_value, clear_mask); 195 196 /* 197 * Clear the area of the depth/depth buffer matching this tile. 198 */ 199 200 if (scene->fb.zsbuf) { 201 unsigned layer; 202 uint8_t *dst_layer = task->depth_tile; 203 block_size = util_format_get_blocksize(scene->fb.zsbuf->format); 204 205 clear_value &= clear_mask; 206 207 for (layer = 0; layer <= scene->fb_max_layer; layer++) { 208 dst = dst_layer; 209 210 switch (block_size) { 211 case 1: 212 assert(clear_mask == 0xff); 213 memset(dst, (uint8_t) clear_value, height * width); 214 break; 215 case 2: 216 if (clear_mask == 0xffff) { 217 for (i = 0; i < height; i++) { 218 uint16_t *row = (uint16_t *)dst; 219 for (j = 0; j < width; j++) 220 *row++ = (uint16_t) clear_value; 221 dst += dst_stride; 222 } 223 } 224 else { 225 for (i = 0; i < height; i++) { 226 uint16_t *row = (uint16_t *)dst; 227 for (j = 0; j < width; j++) { 228 uint16_t tmp = ~clear_mask & *row; 229 *row++ = clear_value | tmp; 230 } 231 dst += dst_stride; 232 } 233 } 234 break; 235 case 4: 236 if (clear_mask == 0xffffffff) { 237 for (i = 0; i < height; i++) { 238 uint32_t *row = (uint32_t *)dst; 239 for (j = 0; j < width; j++) 240 *row++ = clear_value; 241 dst += dst_stride; 242 } 243 } 244 else { 245 for (i = 0; i < height; i++) { 246 uint32_t *row = (uint32_t *)dst; 247 for (j = 0; j < width; j++) { 248 uint32_t tmp = ~clear_mask & *row; 249 *row++ = clear_value | tmp; 250 } 251 dst += dst_stride; 252 } 253 } 254 break; 255 case 8: 256 clear_value64 &= clear_mask64; 257 if (clear_mask64 == 0xffffffffffULL) { 258 for (i = 0; i < height; i++) { 259 uint64_t *row = (uint64_t *)dst; 260 for (j = 0; j < width; j++) 261 *row++ = clear_value64; 262 dst += dst_stride; 263 } 264 } 265 else { 266 for (i = 0; i < height; i++) { 267 uint64_t *row = (uint64_t *)dst; 268 for (j = 0; j < width; j++) { 269 uint64_t tmp = ~clear_mask64 & *row; 270 *row++ = clear_value64 | tmp; 271 } 272 dst += dst_stride; 273 } 274 } 275 break; 276 277 default: 278 assert(0); 279 break; 280 } 281 dst_layer += scene->zsbuf.layer_stride; 282 } 283 } 284 } 285 286 287 288 /** 289 * Run the shader on all blocks in a tile. This is used when a tile is 290 * completely contained inside a triangle. 291 * This is a bin command called during bin processing. 292 */ 293 static void 294 lp_rast_shade_tile(struct lp_rasterizer_task *task, 295 const union lp_rast_cmd_arg arg) 296 { 297 const struct lp_scene *scene = task->scene; 298 const struct lp_rast_shader_inputs *inputs = arg.shade_tile; 299 const struct lp_rast_state *state; 300 struct lp_fragment_shader_variant *variant; 301 const unsigned tile_x = task->x, tile_y = task->y; 302 unsigned x, y; 303 304 if (inputs->disable) { 305 /* This command was partially binned and has been disabled */ 306 return; 307 } 308 309 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 310 311 state = task->state; 312 assert(state); 313 if (!state) { 314 return; 315 } 316 variant = state->variant; 317 318 /* render the whole 64x64 tile in 4x4 chunks */ 319 for (y = 0; y < task->height; y += 4){ 320 for (x = 0; x < task->width; x += 4) { 321 uint8_t *color[PIPE_MAX_COLOR_BUFS]; 322 unsigned stride[PIPE_MAX_COLOR_BUFS]; 323 uint8_t *depth = NULL; 324 unsigned depth_stride = 0; 325 unsigned i; 326 327 /* color buffer */ 328 for (i = 0; i < scene->fb.nr_cbufs; i++){ 329 if (scene->fb.cbufs[i]) { 330 stride[i] = scene->cbufs[i].stride; 331 color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x, 332 tile_y + y, inputs->layer); 333 } 334 else { 335 stride[i] = 0; 336 color[i] = NULL; 337 } 338 } 339 340 /* depth buffer */ 341 if (scene->zsbuf.map) { 342 depth = lp_rast_get_depth_block_pointer(task, tile_x + x, 343 tile_y + y, inputs->layer); 344 depth_stride = scene->zsbuf.stride; 345 } 346 347 /* Propagate non-interpolated raster state. */ 348 task->thread_data.raster_state.viewport_index = inputs->viewport_index; 349 350 /* run shader on 4x4 block */ 351 BEGIN_JIT_CALL(state, task); 352 variant->jit_function[RAST_WHOLE]( &state->jit_context, 353 tile_x + x, tile_y + y, 354 inputs->frontfacing, 355 GET_A0(inputs), 356 GET_DADX(inputs), 357 GET_DADY(inputs), 358 color, 359 depth, 360 0xffff, 361 &task->thread_data, 362 stride, 363 depth_stride); 364 END_JIT_CALL(); 365 } 366 } 367 } 368 369 370 /** 371 * Run the shader on all blocks in a tile. This is used when a tile is 372 * completely contained inside a triangle, and the shader is opaque. 373 * This is a bin command called during bin processing. 374 */ 375 static void 376 lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task, 377 const union lp_rast_cmd_arg arg) 378 { 379 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 380 381 assert(task->state); 382 if (!task->state) { 383 return; 384 } 385 386 lp_rast_shade_tile(task, arg); 387 } 388 389 390 /** 391 * Compute shading for a 4x4 block of pixels inside a triangle. 392 * This is a bin command called during bin processing. 393 * \param x X position of quad in window coords 394 * \param y Y position of quad in window coords 395 */ 396 void 397 lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, 398 const struct lp_rast_shader_inputs *inputs, 399 unsigned x, unsigned y, 400 unsigned mask) 401 { 402 const struct lp_rast_state *state = task->state; 403 struct lp_fragment_shader_variant *variant = state->variant; 404 const struct lp_scene *scene = task->scene; 405 uint8_t *color[PIPE_MAX_COLOR_BUFS]; 406 unsigned stride[PIPE_MAX_COLOR_BUFS]; 407 uint8_t *depth = NULL; 408 unsigned depth_stride = 0; 409 unsigned i; 410 411 assert(state); 412 413 /* Sanity checks */ 414 assert(x < scene->tiles_x * TILE_SIZE); 415 assert(y < scene->tiles_y * TILE_SIZE); 416 assert(x % TILE_VECTOR_WIDTH == 0); 417 assert(y % TILE_VECTOR_HEIGHT == 0); 418 419 assert((x % 4) == 0); 420 assert((y % 4) == 0); 421 422 /* color buffer */ 423 for (i = 0; i < scene->fb.nr_cbufs; i++) { 424 if (scene->fb.cbufs[i]) { 425 stride[i] = scene->cbufs[i].stride; 426 color[i] = lp_rast_get_color_block_pointer(task, i, x, y, 427 inputs->layer); 428 } 429 else { 430 stride[i] = 0; 431 color[i] = NULL; 432 } 433 } 434 435 /* depth buffer */ 436 if (scene->zsbuf.map) { 437 depth_stride = scene->zsbuf.stride; 438 depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer); 439 } 440 441 assert(lp_check_alignment(state->jit_context.u8_blend_color, 16)); 442 443 /* 444 * The rasterizer may produce fragments outside our 445 * allocated 4x4 blocks hence need to filter them out here. 446 */ 447 if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { 448 /* not very accurate would need a popcount on the mask */ 449 /* always count this not worth bothering? */ 450 task->ps_invocations += 1 * variant->ps_inv_multiplier; 451 452 /* Propagate non-interpolated raster state. */ 453 task->thread_data.raster_state.viewport_index = inputs->viewport_index; 454 455 /* run shader on 4x4 block */ 456 BEGIN_JIT_CALL(state, task); 457 variant->jit_function[RAST_EDGE_TEST](&state->jit_context, 458 x, y, 459 inputs->frontfacing, 460 GET_A0(inputs), 461 GET_DADX(inputs), 462 GET_DADY(inputs), 463 color, 464 depth, 465 mask, 466 &task->thread_data, 467 stride, 468 depth_stride); 469 END_JIT_CALL(); 470 } 471 } 472 473 474 475 /** 476 * Begin a new occlusion query. 477 * This is a bin command put in all bins. 478 * Called per thread. 479 */ 480 static void 481 lp_rast_begin_query(struct lp_rasterizer_task *task, 482 const union lp_rast_cmd_arg arg) 483 { 484 struct llvmpipe_query *pq = arg.query_obj; 485 486 switch (pq->type) { 487 case PIPE_QUERY_OCCLUSION_COUNTER: 488 case PIPE_QUERY_OCCLUSION_PREDICATE: 489 pq->start[task->thread_index] = task->thread_data.vis_counter; 490 break; 491 case PIPE_QUERY_PIPELINE_STATISTICS: 492 pq->start[task->thread_index] = task->ps_invocations; 493 break; 494 default: 495 assert(0); 496 break; 497 } 498 } 499 500 501 /** 502 * End the current occlusion query. 503 * This is a bin command put in all bins. 504 * Called per thread. 505 */ 506 static void 507 lp_rast_end_query(struct lp_rasterizer_task *task, 508 const union lp_rast_cmd_arg arg) 509 { 510 struct llvmpipe_query *pq = arg.query_obj; 511 512 switch (pq->type) { 513 case PIPE_QUERY_OCCLUSION_COUNTER: 514 case PIPE_QUERY_OCCLUSION_PREDICATE: 515 pq->end[task->thread_index] += 516 task->thread_data.vis_counter - pq->start[task->thread_index]; 517 pq->start[task->thread_index] = 0; 518 break; 519 case PIPE_QUERY_TIMESTAMP: 520 pq->end[task->thread_index] = os_time_get_nano(); 521 break; 522 case PIPE_QUERY_PIPELINE_STATISTICS: 523 pq->end[task->thread_index] += 524 task->ps_invocations - pq->start[task->thread_index]; 525 pq->start[task->thread_index] = 0; 526 break; 527 default: 528 assert(0); 529 break; 530 } 531 } 532 533 534 void 535 lp_rast_set_state(struct lp_rasterizer_task *task, 536 const union lp_rast_cmd_arg arg) 537 { 538 task->state = arg.state; 539 } 540 541 542 543 /** 544 * Called when we're done writing to a color tile. 545 */ 546 static void 547 lp_rast_tile_end(struct lp_rasterizer_task *task) 548 { 549 unsigned i; 550 551 for (i = 0; i < task->scene->num_active_queries; ++i) { 552 lp_rast_end_query(task, lp_rast_arg_query(task->scene->active_queries[i])); 553 } 554 555 /* debug */ 556 memset(task->color_tiles, 0, sizeof(task->color_tiles)); 557 task->depth_tile = NULL; 558 559 task->bin = NULL; 560 } 561 562 static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = 563 { 564 lp_rast_clear_color, 565 lp_rast_clear_zstencil, 566 lp_rast_triangle_1, 567 lp_rast_triangle_2, 568 lp_rast_triangle_3, 569 lp_rast_triangle_4, 570 lp_rast_triangle_5, 571 lp_rast_triangle_6, 572 lp_rast_triangle_7, 573 lp_rast_triangle_8, 574 lp_rast_triangle_3_4, 575 lp_rast_triangle_3_16, 576 lp_rast_triangle_4_16, 577 lp_rast_shade_tile, 578 lp_rast_shade_tile_opaque, 579 lp_rast_begin_query, 580 lp_rast_end_query, 581 lp_rast_set_state, 582 lp_rast_triangle_32_1, 583 lp_rast_triangle_32_2, 584 lp_rast_triangle_32_3, 585 lp_rast_triangle_32_4, 586 lp_rast_triangle_32_5, 587 lp_rast_triangle_32_6, 588 lp_rast_triangle_32_7, 589 lp_rast_triangle_32_8, 590 lp_rast_triangle_32_3_4, 591 lp_rast_triangle_32_3_16, 592 lp_rast_triangle_32_4_16 593 }; 594 595 596 static void 597 do_rasterize_bin(struct lp_rasterizer_task *task, 598 const struct cmd_bin *bin, 599 int x, int y) 600 { 601 const struct cmd_block *block; 602 unsigned k; 603 604 if (0) 605 lp_debug_bin(bin, x, y); 606 607 for (block = bin->head; block; block = block->next) { 608 for (k = 0; k < block->count; k++) { 609 dispatch[block->cmd[k]]( task, block->arg[k] ); 610 } 611 } 612 } 613 614 615 616 /** 617 * Rasterize commands for a single bin. 618 * \param x, y position of the bin's tile in the framebuffer 619 * Must be called between lp_rast_begin() and lp_rast_end(). 620 * Called per thread. 621 */ 622 static void 623 rasterize_bin(struct lp_rasterizer_task *task, 624 const struct cmd_bin *bin, int x, int y ) 625 { 626 lp_rast_tile_begin( task, bin, x, y ); 627 628 do_rasterize_bin(task, bin, x, y); 629 630 lp_rast_tile_end(task); 631 632 633 /* Debug/Perf flags: 634 */ 635 if (bin->head->count == 1) { 636 if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE) 637 LP_COUNT(nr_pure_shade_opaque_64); 638 else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE) 639 LP_COUNT(nr_pure_shade_64); 640 } 641 } 642 643 644 /* An empty bin is one that just loads the contents of the tile and 645 * stores them again unchanged. This typically happens when bins have 646 * been flushed for some reason in the middle of a frame, or when 647 * incremental updates are being made to a render target. 648 * 649 * Try to avoid doing pointless work in this case. 650 */ 651 static boolean 652 is_empty_bin( const struct cmd_bin *bin ) 653 { 654 return bin->head == NULL; 655 } 656 657 658 /** 659 * Rasterize/execute all bins within a scene. 660 * Called per thread. 661 */ 662 static void 663 rasterize_scene(struct lp_rasterizer_task *task, 664 struct lp_scene *scene) 665 { 666 task->scene = scene; 667 668 /* Clear the cache tags. This should not always be necessary but 669 simpler for now. */ 670 #if LP_USE_TEXTURE_CACHE 671 memset(task->thread_data.cache->cache_tags, 0, 672 sizeof(task->thread_data.cache->cache_tags)); 673 #if LP_BUILD_FORMAT_CACHE_DEBUG 674 task->thread_data.cache->cache_access_total = 0; 675 task->thread_data.cache->cache_access_miss = 0; 676 #endif 677 #endif 678 679 if (!task->rast->no_rast && !scene->discard) { 680 /* loop over scene bins, rasterize each */ 681 { 682 struct cmd_bin *bin; 683 int i, j; 684 685 assert(scene); 686 while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) { 687 if (!is_empty_bin( bin )) 688 rasterize_bin(task, bin, i, j); 689 } 690 } 691 } 692 693 694 #if LP_BUILD_FORMAT_CACHE_DEBUG 695 { 696 uint64_t total, miss; 697 total = task->thread_data.cache->cache_access_total; 698 miss = task->thread_data.cache->cache_access_miss; 699 if (total) { 700 debug_printf("thread %d cache access %llu miss %llu hit rate %f\n", 701 task->thread_index, (long long unsigned)total, 702 (long long unsigned)miss, 703 (float)(total - miss)/(float)total); 704 } 705 } 706 #endif 707 708 if (scene->fence) { 709 lp_fence_signal(scene->fence); 710 } 711 712 task->scene = NULL; 713 } 714 715 716 /** 717 * Called by setup module when it has something for us to render. 718 */ 719 void 720 lp_rast_queue_scene( struct lp_rasterizer *rast, 721 struct lp_scene *scene) 722 { 723 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); 724 725 if (rast->num_threads == 0) { 726 /* no threading */ 727 unsigned fpstate = util_fpstate_get(); 728 729 /* Make sure that denorms are treated like zeros. This is 730 * the behavior required by D3D10. OpenGL doesn't care. 731 */ 732 util_fpstate_set_denorms_to_zero(fpstate); 733 734 lp_rast_begin( rast, scene ); 735 736 rasterize_scene( &rast->tasks[0], scene ); 737 738 lp_rast_end( rast ); 739 740 util_fpstate_set(fpstate); 741 742 rast->curr_scene = NULL; 743 } 744 else { 745 /* threaded rendering! */ 746 unsigned i; 747 748 lp_scene_enqueue( rast->full_scenes, scene ); 749 750 /* signal the threads that there's work to do */ 751 for (i = 0; i < rast->num_threads; i++) { 752 pipe_semaphore_signal(&rast->tasks[i].work_ready); 753 } 754 } 755 756 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); 757 } 758 759 760 void 761 lp_rast_finish( struct lp_rasterizer *rast ) 762 { 763 if (rast->num_threads == 0) { 764 /* nothing to do */ 765 } 766 else { 767 int i; 768 769 /* wait for work to complete */ 770 for (i = 0; i < rast->num_threads; i++) { 771 pipe_semaphore_wait(&rast->tasks[i].work_done); 772 } 773 } 774 } 775 776 777 /** 778 * This is the thread's main entrypoint. 779 * It's a simple loop: 780 * 1. wait for work 781 * 2. do work 782 * 3. signal that we're done 783 */ 784 static PIPE_THREAD_ROUTINE( thread_function, init_data ) 785 { 786 struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data; 787 struct lp_rasterizer *rast = task->rast; 788 boolean debug = false; 789 char thread_name[16]; 790 unsigned fpstate; 791 792 util_snprintf(thread_name, sizeof thread_name, "llvmpipe-%u", task->thread_index); 793 pipe_thread_setname(thread_name); 794 795 /* Make sure that denorms are treated like zeros. This is 796 * the behavior required by D3D10. OpenGL doesn't care. 797 */ 798 fpstate = util_fpstate_get(); 799 util_fpstate_set_denorms_to_zero(fpstate); 800 801 while (1) { 802 /* wait for work */ 803 if (debug) 804 debug_printf("thread %d waiting for work\n", task->thread_index); 805 pipe_semaphore_wait(&task->work_ready); 806 807 if (rast->exit_flag) 808 break; 809 810 if (task->thread_index == 0) { 811 /* thread[0]: 812 * - get next scene to rasterize 813 * - map the framebuffer surfaces 814 */ 815 lp_rast_begin( rast, 816 lp_scene_dequeue( rast->full_scenes, TRUE ) ); 817 } 818 819 /* Wait for all threads to get here so that threads[1+] don't 820 * get a null rast->curr_scene pointer. 821 */ 822 pipe_barrier_wait( &rast->barrier ); 823 824 /* do work */ 825 if (debug) 826 debug_printf("thread %d doing work\n", task->thread_index); 827 828 rasterize_scene(task, 829 rast->curr_scene); 830 831 /* wait for all threads to finish with this scene */ 832 pipe_barrier_wait( &rast->barrier ); 833 834 /* XXX: shouldn't be necessary: 835 */ 836 if (task->thread_index == 0) { 837 lp_rast_end( rast ); 838 } 839 840 /* signal done with work */ 841 if (debug) 842 debug_printf("thread %d done working\n", task->thread_index); 843 844 pipe_semaphore_signal(&task->work_done); 845 } 846 847 #ifdef _WIN32 848 pipe_semaphore_signal(&task->work_done); 849 #endif 850 851 return 0; 852 } 853 854 855 /** 856 * Initialize semaphores and spawn the threads. 857 */ 858 static void 859 create_rast_threads(struct lp_rasterizer *rast) 860 { 861 unsigned i; 862 863 /* NOTE: if num_threads is zero, we won't use any threads */ 864 for (i = 0; i < rast->num_threads; i++) { 865 pipe_semaphore_init(&rast->tasks[i].work_ready, 0); 866 pipe_semaphore_init(&rast->tasks[i].work_done, 0); 867 rast->threads[i] = pipe_thread_create(thread_function, 868 (void *) &rast->tasks[i]); 869 } 870 } 871 872 873 874 /** 875 * Create new lp_rasterizer. If num_threads is zero, don't create any 876 * new threads, do rendering synchronously. 877 * \param num_threads number of rasterizer threads to create 878 */ 879 struct lp_rasterizer * 880 lp_rast_create( unsigned num_threads ) 881 { 882 struct lp_rasterizer *rast; 883 unsigned i; 884 885 rast = CALLOC_STRUCT(lp_rasterizer); 886 if (!rast) { 887 goto no_rast; 888 } 889 890 rast->full_scenes = lp_scene_queue_create(); 891 if (!rast->full_scenes) { 892 goto no_full_scenes; 893 } 894 895 for (i = 0; i < MAX2(1, num_threads); i++) { 896 struct lp_rasterizer_task *task = &rast->tasks[i]; 897 task->rast = rast; 898 task->thread_index = i; 899 task->thread_data.cache = align_malloc(sizeof(struct lp_build_format_cache), 900 16); 901 if (!task->thread_data.cache) { 902 goto no_thread_data_cache; 903 } 904 } 905 906 rast->num_threads = num_threads; 907 908 rast->no_rast = debug_get_bool_option("LP_NO_RAST", FALSE); 909 910 create_rast_threads(rast); 911 912 /* for synchronizing rasterization threads */ 913 if (rast->num_threads > 0) { 914 pipe_barrier_init( &rast->barrier, rast->num_threads ); 915 } 916 917 memset(lp_dummy_tile, 0, sizeof lp_dummy_tile); 918 919 return rast; 920 921 no_thread_data_cache: 922 for (i = 0; i < MAX2(1, rast->num_threads); i++) { 923 if (rast->tasks[i].thread_data.cache) { 924 align_free(rast->tasks[i].thread_data.cache); 925 } 926 } 927 928 lp_scene_queue_destroy(rast->full_scenes); 929 no_full_scenes: 930 FREE(rast); 931 no_rast: 932 return NULL; 933 } 934 935 936 /* Shutdown: 937 */ 938 void lp_rast_destroy( struct lp_rasterizer *rast ) 939 { 940 unsigned i; 941 942 /* Set exit_flag and signal each thread's work_ready semaphore. 943 * Each thread will be woken up, notice that the exit_flag is set and 944 * break out of its main loop. The thread will then exit. 945 */ 946 rast->exit_flag = TRUE; 947 for (i = 0; i < rast->num_threads; i++) { 948 pipe_semaphore_signal(&rast->tasks[i].work_ready); 949 } 950 951 /* Wait for threads to terminate before cleaning up per-thread data. 952 * We don't actually call pipe_thread_wait to avoid dead lock on Windows 953 * per https://bugs.freedesktop.org/show_bug.cgi?id=76252 */ 954 for (i = 0; i < rast->num_threads; i++) { 955 #ifdef _WIN32 956 pipe_semaphore_wait(&rast->tasks[i].work_done); 957 #else 958 pipe_thread_wait(rast->threads[i]); 959 #endif 960 } 961 962 /* Clean up per-thread data */ 963 for (i = 0; i < rast->num_threads; i++) { 964 pipe_semaphore_destroy(&rast->tasks[i].work_ready); 965 pipe_semaphore_destroy(&rast->tasks[i].work_done); 966 } 967 for (i = 0; i < MAX2(1, rast->num_threads); i++) { 968 align_free(rast->tasks[i].thread_data.cache); 969 } 970 971 /* for synchronizing rasterization threads */ 972 if (rast->num_threads > 0) { 973 pipe_barrier_destroy( &rast->barrier ); 974 } 975 976 lp_scene_queue_destroy(rast->full_scenes); 977 978 FREE(rast); 979 } 980 981 982