1 /* 2 * Mesa 3-D graphics library 3 * 4 * Copyright (C) 2012-2015 LunarG, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Chia-I Wu <olv (at) lunarg.com> 26 */ 27 28 #include "ilo_debug.h" 29 #include "ilo_state_shader.h" 30 31 struct pixel_ff { 32 uint8_t dispatch_modes; 33 34 uint32_t kernel_offsets[3]; 35 uint8_t grf_starts[3]; 36 bool pcb_enable; 37 uint8_t per_thread_scratch_space; 38 uint32_t per_thread_scratch_size; 39 40 uint8_t sampler_count; 41 uint8_t surface_count; 42 bool has_uav; 43 44 uint16_t thread_count; 45 46 struct ilo_state_ps_dispatch_conds conds; 47 48 bool kill_pixel; 49 bool dispatch_enable; 50 bool dual_source_blending; 51 uint32_t sample_mask; 52 }; 53 54 static bool 55 ps_kernel_validate_gen6(const struct ilo_dev *dev, 56 const struct ilo_state_shader_kernel_info *kernel) 57 { 58 /* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */ 59 const uint8_t max_grf_start = 128; 60 61 ILO_DEV_ASSERT(dev, 6, 8); 62 63 /* "Kernel Start Pointer" is 64-byte aligned */ 64 assert(kernel->offset % 64 == 0); 65 66 assert(kernel->grf_start < max_grf_start); 67 68 return true; 69 } 70 71 static bool 72 ps_validate_gen6(const struct ilo_dev *dev, 73 const struct ilo_state_ps_info *info) 74 { 75 const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8; 76 const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16; 77 const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32; 78 const struct ilo_state_ps_io_info *io = &info->io; 79 80 ILO_DEV_ASSERT(dev, 6, 8); 81 82 if (!ps_kernel_validate_gen6(dev, kernel_8) || 83 !ps_kernel_validate_gen6(dev, kernel_16) || 84 !ps_kernel_validate_gen6(dev, kernel_32)) 85 return false; 86 87 /* unsupported on Gen6 */ 88 if (ilo_dev_gen(dev) == ILO_GEN(6)) 89 assert(!io->use_coverage_mask); 90 91 /* 92 * From the Sandy Bridge PRM, volume 2 part 1, page 275: 93 * 94 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth 95 * field must be set to disabled." 96 */ 97 if (ilo_dev_gen(dev) == ILO_GEN(6) && io->pscdepth != GEN7_PSCDEPTH_OFF) 98 assert(info->cv_has_depth_buffer); 99 100 if (!info->per_sample_dispatch) { 101 /* 102 * From the Sandy Bridge PRM, volume 2 part 1, page 281: 103 * 104 * "MSDISPMODE_PERSAMPLE is required in order to select 105 * POSOFFSET_SAMPLE." 106 */ 107 assert(io->posoffset != GEN6_POSOFFSET_SAMPLE); 108 109 /* 110 * From the Sandy Bridge PRM, volume 2 part 1, page 282: 111 * 112 * "MSDISPMODE_PERSAMPLE is required in order to select 113 * INTERP_SAMPLE." 114 * 115 * From the Sandy Bridge PRM, volume 2 part 1, page 283: 116 * 117 * "MSDISPMODE_PERSAMPLE is required in order to select Perspective 118 * Sample or Non-perspective Sample barycentric coordinates." 119 */ 120 assert(!info->cv_per_sample_interp); 121 } 122 123 /* 124 * 125 * From the Sandy Bridge PRM, volume 2 part 1, page 314: 126 * 127 * "Pixel Shader Dispatch, Alpha... must all be disabled." 128 * 129 * Simply disallow any valid kernel when there is early-z op. Also, when 130 * there is no valid kernel, io should be zeroed. 131 */ 132 if (info->valid_kernels) 133 assert(!info->cv_has_earlyz_op); 134 else 135 assert(ilo_is_zeroed(io, sizeof(*io))); 136 137 return true; 138 } 139 140 static uint8_t 141 ps_get_gen6_dispatch_modes(const struct ilo_dev *dev, 142 const struct ilo_state_ps_info *info) 143 { 144 const struct ilo_state_ps_io_info *io = &info->io; 145 uint8_t dispatch_modes = info->valid_kernels; 146 147 ILO_DEV_ASSERT(dev, 6, 8); 148 149 if (!dispatch_modes) 150 return 0; 151 152 /* 153 * From the Sandy Bridge PRM, volume 2 part 1, page 334: 154 * 155 * "Not valid on [DevSNB] if 4x PERPIXEL mode with pixel shader 156 * computed depth." 157 * 158 * "Valid on all products, except when in non-1x PERSAMPLE mode 159 * (applies to [DevSNB+] only)" 160 * 161 * From the Sandy Bridge PRM, volume 4 part 1, page 239: 162 * 163 * "[DevSNB]: When Pixel Shader outputs oDepth and PS invocation mode 164 * is PERPIXEL, Message Type for Render Target Write must be SIMD8. 165 * 166 * Errata: [DevSNB+]: When Pixel Shader outputs oMask, this message 167 * type is not supported: SIMD8 (including SIMD8_DUALSRC_xx)." 168 * 169 * It is really hard to follow what combinations are valid on what 170 * platforms. Judging from the restrictions on RT write messages on Gen6, 171 * oDepth and oMask related issues should be Gen6-specific. PERSAMPLE 172 * issue should be universal, and disallows multiple dispatch modes. 173 */ 174 if (ilo_dev_gen(dev) == ILO_GEN(6)) { 175 if (io->pscdepth != GEN7_PSCDEPTH_OFF && !info->per_sample_dispatch) 176 dispatch_modes &= GEN6_PS_DISPATCH_8; 177 if (io->write_omask) 178 dispatch_modes &= ~GEN6_PS_DISPATCH_8; 179 } 180 if (info->per_sample_dispatch && !info->sample_count_one) { 181 /* prefer 32 over 16 over 8 */ 182 if (dispatch_modes & GEN6_PS_DISPATCH_32) 183 dispatch_modes &= GEN6_PS_DISPATCH_32; 184 else if (dispatch_modes & GEN6_PS_DISPATCH_16) 185 dispatch_modes &= GEN6_PS_DISPATCH_16; 186 else 187 dispatch_modes &= GEN6_PS_DISPATCH_8; 188 } 189 190 /* 191 * From the Broadwell PRM, volume 2b, page 149: 192 * 193 * "When Render Target Fast Clear Enable is ENABLED or Render Target 194 * Resolve Type = RESOLVE_PARTIAL or RESOLVE_FULL, this bit (8 Pixel 195 * Dispatch or Dual-8 Pixel Dispatch Enable) must be DISABLED." 196 */ 197 if (info->rt_clear_enable || info->rt_resolve_enable) 198 dispatch_modes &= ~GEN6_PS_DISPATCH_8; 199 200 assert(dispatch_modes); 201 202 return dispatch_modes; 203 } 204 205 static uint16_t 206 ps_get_gen6_thread_count(const struct ilo_dev *dev, 207 const struct ilo_state_ps_info *info) 208 { 209 uint16_t thread_count; 210 211 ILO_DEV_ASSERT(dev, 6, 8); 212 213 /* Maximum Number of Threads of 3DSTATE_PS */ 214 switch (ilo_dev_gen(dev)) { 215 case ILO_GEN(8): 216 /* scaled automatically */ 217 thread_count = 64 - 1; 218 break; 219 case ILO_GEN(7.5): 220 thread_count = (dev->gt == 3) ? 408 : 221 (dev->gt == 2) ? 204 : 102; 222 break; 223 case ILO_GEN(7): 224 thread_count = (dev->gt == 2) ? 172 : 48; 225 break; 226 case ILO_GEN(6): 227 default: 228 /* from the classic driver instead of the PRM */ 229 thread_count = (dev->gt == 2) ? 80 : 40; 230 break; 231 } 232 233 return thread_count - 1; 234 } 235 236 static bool 237 ps_params_get_gen6_kill_pixel(const struct ilo_dev *dev, 238 const struct ilo_state_ps_params_info *params, 239 const struct ilo_state_ps_dispatch_conds *conds) 240 { 241 ILO_DEV_ASSERT(dev, 6, 8); 242 243 /* 244 * From the Sandy Bridge PRM, volume 2 part 1, page 275: 245 * 246 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the 247 * PS kernel or color calculator has the ability to kill (discard) 248 * pixels or samples, other than due to depth or stencil testing. 249 * This bit is required to be ENABLED in the following situations: 250 * 251 * The API pixel shader program contains "killpix" or "discard" 252 * instructions, or other code in the pixel shader kernel that can 253 * cause the final pixel mask to differ from the pixel mask received 254 * on dispatch. 255 * 256 * A sampler with chroma key enabled with kill pixel mode is used by 257 * the pixel shader. 258 * 259 * Any render target has Alpha Test Enable or AlphaToCoverage Enable 260 * enabled. 261 * 262 * The pixel shader kernel generates and outputs oMask. 263 * 264 * Note: As ClipDistance clipping is fully supported in hardware and 265 * therefore not via PS instructions, there should be no need to 266 * ENABLE this bit due to ClipDistance clipping." 267 */ 268 return (conds->ps_may_kill || params->alpha_may_kill); 269 } 270 271 static bool 272 ps_params_get_gen6_dispatch_enable(const struct ilo_dev *dev, 273 const struct ilo_state_ps_params_info *params, 274 const struct ilo_state_ps_dispatch_conds *conds) 275 { 276 /* 277 * We want to skip dispatching when EarlyZ suffices. The conditions that 278 * require dispatching are 279 * 280 * - PS writes RTs and RTs are writeable 281 * - PS changes depth value and depth test/write is enabled 282 * - PS changes stencil value and stencil test is enabled 283 * - PS writes UAVs 284 * - PS or CC kills pixels 285 * - EDSC is PSEXEC, and depth test/write or stencil test is enabled 286 */ 287 bool dispatch_required = 288 ((conds->has_rt_write && params->has_writeable_rt) || 289 conds->write_odepth || 290 conds->write_ostencil || 291 conds->has_uav_write || 292 ps_params_get_gen6_kill_pixel(dev, params, conds) || 293 params->earlyz_control_psexec); 294 295 ILO_DEV_ASSERT(dev, 6, 8); 296 297 /* 298 * From the Ivy Bridge PRM, volume 2 part 1, page 280: 299 * 300 * "If EDSC_PSEXEC mode is selected, Thread Dispatch Enable must be 301 * set." 302 */ 303 if (ilo_dev_gen(dev) < ILO_GEN(8) && params->earlyz_control_psexec) 304 dispatch_required = true; 305 306 /* assert it is valid to dispatch */ 307 if (dispatch_required) 308 assert(conds->ps_valid); 309 310 return dispatch_required; 311 } 312 313 static bool 314 ps_get_gen6_ff_kernels(const struct ilo_dev *dev, 315 const struct ilo_state_ps_info *info, 316 struct pixel_ff *ff) 317 { 318 const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8; 319 const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16; 320 const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32; 321 322 ILO_DEV_ASSERT(dev, 6, 8); 323 324 ff->dispatch_modes = ps_get_gen6_dispatch_modes(dev, info); 325 326 /* initialize kernel offsets and GRF starts */ 327 if (util_is_power_of_two(ff->dispatch_modes)) { 328 if (ff->dispatch_modes & GEN6_PS_DISPATCH_8) { 329 ff->kernel_offsets[0] = kernel_8->offset; 330 ff->grf_starts[0] = kernel_8->grf_start; 331 } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_16) { 332 ff->kernel_offsets[0] = kernel_16->offset; 333 ff->grf_starts[0] = kernel_16->grf_start; 334 } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_32) { 335 ff->kernel_offsets[0] = kernel_32->offset; 336 ff->grf_starts[0] = kernel_32->grf_start; 337 } 338 } else { 339 ff->kernel_offsets[0] = kernel_8->offset; 340 ff->kernel_offsets[1] = kernel_32->offset; 341 ff->kernel_offsets[2] = kernel_16->offset; 342 343 ff->grf_starts[0] = kernel_8->grf_start; 344 ff->grf_starts[1] = kernel_32->grf_start; 345 ff->grf_starts[2] = kernel_16->grf_start; 346 } 347 348 /* we do not want to save it */ 349 assert(ff->kernel_offsets[0] == 0); 350 351 ff->pcb_enable = (((ff->dispatch_modes & GEN6_PS_DISPATCH_8) && 352 kernel_8->pcb_attr_count) || 353 ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) && 354 kernel_16->pcb_attr_count) || 355 ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) && 356 kernel_32->pcb_attr_count)); 357 358 /* GPU hangs on Haswell if none of the dispatch mode bits is set */ 359 if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes) 360 ff->dispatch_modes |= GEN6_PS_DISPATCH_8; 361 362 return true; 363 } 364 365 static bool 366 ps_get_gen6_ff(const struct ilo_dev *dev, 367 const struct ilo_state_ps_info *info, 368 struct pixel_ff *ff) 369 { 370 const struct ilo_state_shader_resource_info *resource = &info->resource; 371 const struct ilo_state_ps_io_info *io = &info->io; 372 const struct ilo_state_ps_params_info *params = &info->params; 373 374 ILO_DEV_ASSERT(dev, 6, 8); 375 376 memset(ff, 0, sizeof(*ff)); 377 378 if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff)) 379 return false; 380 381 if (info->per_thread_scratch_size) { 382 /* 383 * From the Sandy Bridge PRM, volume 2 part 1, page 271: 384 * 385 * "(Per-Thread Scratch Space) 386 * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two" 387 */ 388 assert(info->per_thread_scratch_size <= 2 * 1024 * 1024); 389 390 /* next power of two, starting from 1KB */ 391 ff->per_thread_scratch_space = (info->per_thread_scratch_size > 1024) ? 392 (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0; 393 ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space); 394 } 395 396 ff->sampler_count = (resource->sampler_count <= 12) ? 397 (resource->sampler_count + 3) / 4 : 4; 398 ff->surface_count = resource->surface_count; 399 ff->has_uav = resource->has_uav; 400 401 ff->thread_count = ps_get_gen6_thread_count(dev, info); 402 403 ff->conds.ps_valid = (info->valid_kernels != 0x0); 404 ff->conds.has_rt_write = io->has_rt_write; 405 ff->conds.write_odepth = (io->pscdepth != GEN7_PSCDEPTH_OFF); 406 ff->conds.write_ostencil = false; 407 ff->conds.has_uav_write = resource->has_uav; 408 ff->conds.ps_may_kill = (io->write_pixel_mask || io->write_omask); 409 410 ff->kill_pixel = ps_params_get_gen6_kill_pixel(dev, params, &ff->conds); 411 ff->dispatch_enable = 412 ps_params_get_gen6_dispatch_enable(dev, params, &ff->conds); 413 ff->dual_source_blending = params->dual_source_blending; 414 ff->sample_mask = params->sample_mask; 415 416 return true; 417 } 418 419 static bool 420 ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps, 421 const struct ilo_dev *dev, 422 const struct ilo_state_ps_info *info, 423 const struct pixel_ff *ff) 424 { 425 const struct ilo_state_ps_io_info *io = &info->io; 426 uint32_t dw2, dw3, dw4, dw5, dw6; 427 428 ILO_DEV_ASSERT(dev, 6, 6); 429 430 dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | 431 ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; 432 433 if (false) 434 dw2 |= GEN6_THREADDISP_FP_MODE_ALT; 435 436 dw3 = ff->per_thread_scratch_space << 437 GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; 438 439 dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT | 440 ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT | 441 ff->grf_starts[2] << GEN6_WM_DW4_URB_GRF_START2__SHIFT; 442 443 dw5 = ff->thread_count << GEN6_WM_DW5_MAX_THREADS__SHIFT | 444 ff->dispatch_modes << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT; 445 446 if (ff->kill_pixel) 447 dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL; 448 449 if (io->pscdepth != GEN7_PSCDEPTH_OFF) 450 dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH; 451 if (io->use_z) 452 dw5 |= GEN6_WM_DW5_PS_USE_DEPTH; 453 454 if (ff->dispatch_enable) 455 dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE; 456 457 if (io->write_omask) 458 dw5 |= GEN6_WM_DW5_PS_COMPUTE_OMASK; 459 if (io->use_w) 460 dw5 |= GEN6_WM_DW5_PS_USE_W; 461 462 if (ff->dual_source_blending) 463 dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND; 464 465 dw6 = io->attr_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT | 466 io->posoffset << GEN6_WM_DW6_PS_POSOFFSET__SHIFT; 467 468 dw6 |= (info->per_sample_dispatch) ? 469 GEN6_WM_DW6_MSDISPMODE_PERSAMPLE : GEN6_WM_DW6_MSDISPMODE_PERPIXEL; 470 471 STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 7); 472 ps->ps[0] = dw2; 473 ps->ps[1] = dw3; 474 ps->ps[2] = dw4; 475 ps->ps[3] = dw5; 476 ps->ps[4] = dw6; 477 ps->ps[5] = ff->kernel_offsets[1]; 478 ps->ps[6] = ff->kernel_offsets[2]; 479 480 return true; 481 } 482 483 static bool 484 ps_set_gen7_3dstate_wm(struct ilo_state_ps *ps, 485 const struct ilo_dev *dev, 486 const struct ilo_state_ps_info *info, 487 const struct pixel_ff *ff) 488 { 489 const struct ilo_state_ps_io_info *io = &info->io; 490 uint32_t dw1, dw2; 491 492 ILO_DEV_ASSERT(dev, 7, 7.5); 493 494 dw1 = io->pscdepth << GEN7_WM_DW1_PSCDEPTH__SHIFT; 495 496 if (ff->dispatch_enable) 497 dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE; 498 if (ff->kill_pixel) 499 dw1 |= GEN7_WM_DW1_PS_KILL_PIXEL; 500 501 if (io->use_z) 502 dw1 |= GEN7_WM_DW1_PS_USE_DEPTH; 503 if (io->use_w) 504 dw1 |= GEN7_WM_DW1_PS_USE_W; 505 if (io->use_coverage_mask) 506 dw1 |= GEN7_WM_DW1_PS_USE_COVERAGE_MASK; 507 508 dw2 = (info->per_sample_dispatch) ? 509 GEN7_WM_DW2_MSDISPMODE_PERSAMPLE : GEN7_WM_DW2_MSDISPMODE_PERPIXEL; 510 511 STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 2); 512 ps->ps[0] = dw1; 513 ps->ps[1] = dw2; 514 515 return true; 516 } 517 518 static bool 519 ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps, 520 const struct ilo_dev *dev, 521 const struct ilo_state_ps_info *info, 522 const struct pixel_ff *ff) 523 { 524 const struct ilo_state_ps_io_info *io = &info->io; 525 uint32_t dw2, dw3, dw4, dw5; 526 527 ILO_DEV_ASSERT(dev, 7, 7.5); 528 529 dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | 530 ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; 531 532 if (false) 533 dw2 |= GEN6_THREADDISP_FP_MODE_ALT; 534 535 dw3 = ff->per_thread_scratch_space << 536 GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; 537 538 dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT | 539 ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT; 540 541 if (ilo_dev_gen(dev) == ILO_GEN(7.5)) { 542 dw4 |= ff->thread_count << GEN75_PS_DW4_MAX_THREADS__SHIFT | 543 (ff->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT; 544 } else { 545 dw4 |= ff->thread_count << GEN7_PS_DW4_MAX_THREADS__SHIFT; 546 } 547 548 if (ff->pcb_enable) 549 dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE; 550 if (io->attr_count) 551 dw4 |= GEN7_PS_DW4_ATTR_ENABLE; 552 if (io->write_omask) 553 dw4 |= GEN7_PS_DW4_COMPUTE_OMASK; 554 if (info->rt_clear_enable) 555 dw4 |= GEN7_PS_DW4_RT_FAST_CLEAR; 556 if (ff->dual_source_blending) 557 dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; 558 if (info->rt_resolve_enable) 559 dw4 |= GEN7_PS_DW4_RT_RESOLVE; 560 if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff->has_uav) 561 dw4 |= GEN75_PS_DW4_ACCESS_UAV; 562 563 dw5 = ff->grf_starts[0] << GEN7_PS_DW5_URB_GRF_START0__SHIFT | 564 ff->grf_starts[1] << GEN7_PS_DW5_URB_GRF_START1__SHIFT | 565 ff->grf_starts[2] << GEN7_PS_DW5_URB_GRF_START2__SHIFT; 566 567 STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 8); 568 ps->ps[2] = dw2; 569 ps->ps[3] = dw3; 570 ps->ps[4] = dw4; 571 ps->ps[5] = dw5; 572 ps->ps[6] = ff->kernel_offsets[1]; 573 ps->ps[7] = ff->kernel_offsets[2]; 574 575 return true; 576 } 577 578 static bool 579 ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps, 580 const struct ilo_dev *dev, 581 const struct ilo_state_ps_info *info, 582 const struct pixel_ff *ff) 583 { 584 const struct ilo_state_ps_io_info *io = &info->io; 585 uint32_t dw3, dw4, dw6, dw7; 586 587 ILO_DEV_ASSERT(dev, 8, 8); 588 589 /* 590 * Set VME here for correct computation of LODs and others. Not sure why 591 * it is needed now. 592 */ 593 dw3 = GEN6_THREADDISP_VME | 594 ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | 595 ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; 596 597 if (false) 598 dw3 |= GEN6_THREADDISP_FP_MODE_ALT; 599 600 dw4 = ff->per_thread_scratch_space << 601 GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; 602 603 dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT | 604 io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT | 605 ff->dispatch_modes << GEN8_PS_DW6_DISPATCH_MODE__SHIFT; 606 607 if (ff->pcb_enable) 608 dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE; 609 610 if (info->rt_clear_enable) 611 dw6 |= GEN8_PS_DW6_RT_FAST_CLEAR; 612 if (info->rt_resolve_enable) 613 dw6 |= GEN8_PS_DW6_RT_RESOLVE; 614 615 dw7 = ff->grf_starts[0] << GEN8_PS_DW7_URB_GRF_START0__SHIFT | 616 ff->grf_starts[1] << GEN8_PS_DW7_URB_GRF_START1__SHIFT | 617 ff->grf_starts[2] << GEN8_PS_DW7_URB_GRF_START2__SHIFT; 618 619 STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 6); 620 ps->ps[0] = dw3; 621 ps->ps[1] = dw4; 622 ps->ps[2] = dw6; 623 ps->ps[3] = dw7; 624 ps->ps[4] = ff->kernel_offsets[1]; 625 ps->ps[5] = ff->kernel_offsets[2]; 626 627 return true; 628 } 629 630 static bool 631 ps_set_gen8_3DSTATE_PS_EXTRA(struct ilo_state_ps *ps, 632 const struct ilo_dev *dev, 633 const struct ilo_state_ps_info *info, 634 const struct pixel_ff *ff) 635 { 636 const struct ilo_state_ps_io_info *io = &info->io; 637 uint32_t dw1; 638 639 ILO_DEV_ASSERT(dev, 8, 8); 640 641 dw1 = io->pscdepth << GEN8_PSX_DW1_PSCDEPTH__SHIFT; 642 643 if (info->valid_kernels) 644 dw1 |= GEN8_PSX_DW1_VALID; 645 if (!io->has_rt_write) 646 dw1 |= GEN8_PSX_DW1_UAV_ONLY; 647 if (io->write_omask) 648 dw1 |= GEN8_PSX_DW1_COMPUTE_OMASK; 649 if (io->write_pixel_mask) 650 dw1 |= GEN8_PSX_DW1_KILL_PIXEL; 651 652 if (io->use_z) 653 dw1 |= GEN8_PSX_DW1_USE_DEPTH; 654 if (io->use_w) 655 dw1 |= GEN8_PSX_DW1_USE_W; 656 if (io->attr_count) 657 dw1 |= GEN8_PSX_DW1_ATTR_ENABLE; 658 659 if (info->per_sample_dispatch) 660 dw1 |= GEN8_PSX_DW1_PER_SAMPLE; 661 if (ff->has_uav) 662 dw1 |= GEN8_PSX_DW1_ACCESS_UAV; 663 if (io->use_coverage_mask) 664 dw1 |= GEN8_PSX_DW1_USE_COVERAGE_MASK; 665 666 /* 667 * From the Broadwell PRM, volume 2b, page 151: 668 * 669 * "When this bit (Pixel Shader Valid) clear the rest of this command 670 * should also be clear. 671 */ 672 if (!info->valid_kernels) 673 dw1 = 0; 674 675 STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 5); 676 ps->ps[4] = dw1; 677 678 return true; 679 } 680 681 bool 682 ilo_state_ps_init(struct ilo_state_ps *ps, 683 const struct ilo_dev *dev, 684 const struct ilo_state_ps_info *info) 685 { 686 struct pixel_ff ff; 687 bool ret = true; 688 689 assert(ilo_is_zeroed(ps, sizeof(*ps))); 690 691 ret &= ps_get_gen6_ff(dev, info, &ff); 692 693 if (ilo_dev_gen(dev) >= ILO_GEN(8)) { 694 ret &= ps_set_gen8_3DSTATE_PS(ps, dev, info, &ff); 695 ret &= ps_set_gen8_3DSTATE_PS_EXTRA(ps, dev, info, &ff); 696 } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) { 697 ret &= ps_set_gen7_3dstate_wm(ps, dev, info, &ff); 698 ret &= ps_set_gen7_3DSTATE_PS(ps, dev, info, &ff); 699 } else { 700 ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff); 701 } 702 703 ps->scratch_size = ff.per_thread_scratch_size * ff.thread_count; 704 /* save conditions */ 705 ps->conds = ff.conds; 706 707 assert(ret); 708 709 return ret; 710 } 711 712 bool 713 ilo_state_ps_init_disabled(struct ilo_state_ps *ps, 714 const struct ilo_dev *dev) 715 { 716 struct ilo_state_ps_info info; 717 718 memset(&info, 0, sizeof(info)); 719 720 return ilo_state_ps_init(ps, dev, &info); 721 } 722 723 bool 724 ilo_state_ps_set_params(struct ilo_state_ps *ps, 725 const struct ilo_dev *dev, 726 const struct ilo_state_ps_params_info *params) 727 { 728 ILO_DEV_ASSERT(dev, 6, 8); 729 730 /* modify sample mask */ 731 if (ilo_dev_gen(dev) == ILO_GEN(7.5)) { 732 ps->ps[4] = (ps->ps[4] & ~GEN75_PS_DW4_SAMPLE_MASK__MASK) | 733 (params->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT; 734 } 735 736 /* modify dispatch enable, pixel kill, and dual source blending */ 737 if (ilo_dev_gen(dev) < ILO_GEN(8)) { 738 if (ilo_dev_gen(dev) >= ILO_GEN(7)) { 739 if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds)) 740 ps->ps[0] |= GEN7_WM_DW1_PS_DISPATCH_ENABLE; 741 else 742 ps->ps[0] &= ~GEN7_WM_DW1_PS_DISPATCH_ENABLE; 743 744 if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds)) 745 ps->ps[0] |= GEN7_WM_DW1_PS_KILL_PIXEL; 746 else 747 ps->ps[0] &= ~GEN7_WM_DW1_PS_KILL_PIXEL; 748 749 if (params->dual_source_blending) 750 ps->ps[4] |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; 751 else 752 ps->ps[4] &= ~GEN7_PS_DW4_DUAL_SOURCE_BLEND; 753 } else { 754 if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds)) 755 ps->ps[3] |= GEN6_WM_DW5_PS_DISPATCH_ENABLE; 756 else 757 ps->ps[3] &= ~GEN6_WM_DW5_PS_DISPATCH_ENABLE; 758 759 if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds)) 760 ps->ps[3] |= GEN6_WM_DW5_PS_KILL_PIXEL; 761 else 762 ps->ps[3] &= ~GEN6_WM_DW5_PS_KILL_PIXEL; 763 764 if (params->dual_source_blending) 765 ps->ps[3] |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND; 766 else 767 ps->ps[3] &= ~GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND; 768 } 769 } 770 771 return true; 772 } 773