1 /* 2 * Copyright 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "brw_context.h" 25 #include "brw_cfg.h" 26 #include "brw_eu.h" 27 #include "brw_fs.h" 28 #include "brw_nir.h" 29 #include "brw_vec4_tes.h" 30 #include "main/uniforms.h" 31 32 extern "C" void 33 brw_mark_surface_used(struct brw_stage_prog_data *prog_data, 34 unsigned surf_index) 35 { 36 assert(surf_index < BRW_MAX_SURFACES); 37 38 prog_data->binding_table.size_bytes = 39 MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4); 40 } 41 42 enum brw_reg_type 43 brw_type_for_base_type(const struct glsl_type *type) 44 { 45 switch (type->base_type) { 46 case GLSL_TYPE_FLOAT: 47 return BRW_REGISTER_TYPE_F; 48 case GLSL_TYPE_INT: 49 case GLSL_TYPE_BOOL: 50 case GLSL_TYPE_SUBROUTINE: 51 return BRW_REGISTER_TYPE_D; 52 case GLSL_TYPE_UINT: 53 return BRW_REGISTER_TYPE_UD; 54 case GLSL_TYPE_ARRAY: 55 return brw_type_for_base_type(type->fields.array); 56 case GLSL_TYPE_STRUCT: 57 case GLSL_TYPE_SAMPLER: 58 case GLSL_TYPE_ATOMIC_UINT: 59 /* These should be overridden with the type of the member when 60 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 61 * way to trip up if we don't. 62 */ 63 return BRW_REGISTER_TYPE_UD; 64 case GLSL_TYPE_IMAGE: 65 return BRW_REGISTER_TYPE_UD; 66 case GLSL_TYPE_DOUBLE: 67 return BRW_REGISTER_TYPE_DF; 68 case GLSL_TYPE_VOID: 69 case GLSL_TYPE_ERROR: 70 case GLSL_TYPE_INTERFACE: 71 case GLSL_TYPE_FUNCTION: 72 unreachable("not reached"); 73 } 74 75 return BRW_REGISTER_TYPE_F; 76 } 77 78 enum brw_conditional_mod 79 brw_conditional_for_comparison(unsigned int op) 80 { 81 switch (op) { 82 case ir_binop_less: 83 return BRW_CONDITIONAL_L; 84 case ir_binop_greater: 85 return BRW_CONDITIONAL_G; 86 case ir_binop_lequal: 87 return BRW_CONDITIONAL_LE; 88 case ir_binop_gequal: 89 return BRW_CONDITIONAL_GE; 90 case ir_binop_equal: 91 case ir_binop_all_equal: /* same as equal for scalars */ 92 return BRW_CONDITIONAL_Z; 93 case ir_binop_nequal: 94 case ir_binop_any_nequal: /* same as nequal for scalars */ 95 return BRW_CONDITIONAL_NZ; 96 default: 97 unreachable("not reached: bad operation for comparison"); 98 } 99 } 100 101 uint32_t 102 brw_math_function(enum opcode op) 103 { 104 switch (op) { 105 case SHADER_OPCODE_RCP: 106 return BRW_MATH_FUNCTION_INV; 107 case SHADER_OPCODE_RSQ: 108 return BRW_MATH_FUNCTION_RSQ; 109 case SHADER_OPCODE_SQRT: 110 return BRW_MATH_FUNCTION_SQRT; 111 case SHADER_OPCODE_EXP2: 112 return BRW_MATH_FUNCTION_EXP; 113 case SHADER_OPCODE_LOG2: 114 return BRW_MATH_FUNCTION_LOG; 115 case SHADER_OPCODE_POW: 116 return BRW_MATH_FUNCTION_POW; 117 case SHADER_OPCODE_SIN: 118 return BRW_MATH_FUNCTION_SIN; 119 case SHADER_OPCODE_COS: 120 return BRW_MATH_FUNCTION_COS; 121 case SHADER_OPCODE_INT_QUOTIENT: 122 return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT; 123 case SHADER_OPCODE_INT_REMAINDER: 124 return BRW_MATH_FUNCTION_INT_DIV_REMAINDER; 125 default: 126 unreachable("not reached: unknown math function"); 127 } 128 } 129 130 bool 131 brw_texture_offset(int *offsets, unsigned num_components, uint32_t *offset_bits) 132 { 133 if (!offsets) return false; /* nonconstant offset; caller will handle it. */ 134 135 /* offset out of bounds; caller will handle it. */ 136 for (unsigned i = 0; i < num_components; i++) 137 if (offsets[i] > 7 || offsets[i] < -8) 138 return false; 139 140 /* Combine all three offsets into a single unsigned dword: 141 * 142 * bits 11:8 - U Offset (X component) 143 * bits 7:4 - V Offset (Y component) 144 * bits 3:0 - R Offset (Z component) 145 */ 146 *offset_bits = 0; 147 for (unsigned i = 0; i < num_components; i++) { 148 const unsigned shift = 4 * (2 - i); 149 *offset_bits |= (offsets[i] << shift) & (0xF << shift); 150 } 151 return true; 152 } 153 154 const char * 155 brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) 156 { 157 switch (op) { 158 case BRW_OPCODE_ILLEGAL ... BRW_OPCODE_NOP: 159 /* The DO instruction doesn't exist on Gen6+, but we use it to mark the 160 * start of a loop in the IR. 161 */ 162 if (devinfo->gen >= 6 && op == BRW_OPCODE_DO) 163 return "do"; 164 165 assert(brw_opcode_desc(devinfo, op)->name); 166 return brw_opcode_desc(devinfo, op)->name; 167 case FS_OPCODE_FB_WRITE: 168 return "fb_write"; 169 case FS_OPCODE_FB_WRITE_LOGICAL: 170 return "fb_write_logical"; 171 case FS_OPCODE_REP_FB_WRITE: 172 return "rep_fb_write"; 173 case FS_OPCODE_FB_READ: 174 return "fb_read"; 175 case FS_OPCODE_FB_READ_LOGICAL: 176 return "fb_read_logical"; 177 178 case SHADER_OPCODE_RCP: 179 return "rcp"; 180 case SHADER_OPCODE_RSQ: 181 return "rsq"; 182 case SHADER_OPCODE_SQRT: 183 return "sqrt"; 184 case SHADER_OPCODE_EXP2: 185 return "exp2"; 186 case SHADER_OPCODE_LOG2: 187 return "log2"; 188 case SHADER_OPCODE_POW: 189 return "pow"; 190 case SHADER_OPCODE_INT_QUOTIENT: 191 return "int_quot"; 192 case SHADER_OPCODE_INT_REMAINDER: 193 return "int_rem"; 194 case SHADER_OPCODE_SIN: 195 return "sin"; 196 case SHADER_OPCODE_COS: 197 return "cos"; 198 199 case SHADER_OPCODE_TEX: 200 return "tex"; 201 case SHADER_OPCODE_TEX_LOGICAL: 202 return "tex_logical"; 203 case SHADER_OPCODE_TXD: 204 return "txd"; 205 case SHADER_OPCODE_TXD_LOGICAL: 206 return "txd_logical"; 207 case SHADER_OPCODE_TXF: 208 return "txf"; 209 case SHADER_OPCODE_TXF_LOGICAL: 210 return "txf_logical"; 211 case SHADER_OPCODE_TXF_LZ: 212 return "txf_lz"; 213 case SHADER_OPCODE_TXL: 214 return "txl"; 215 case SHADER_OPCODE_TXL_LOGICAL: 216 return "txl_logical"; 217 case SHADER_OPCODE_TXL_LZ: 218 return "txl_lz"; 219 case SHADER_OPCODE_TXS: 220 return "txs"; 221 case SHADER_OPCODE_TXS_LOGICAL: 222 return "txs_logical"; 223 case FS_OPCODE_TXB: 224 return "txb"; 225 case FS_OPCODE_TXB_LOGICAL: 226 return "txb_logical"; 227 case SHADER_OPCODE_TXF_CMS: 228 return "txf_cms"; 229 case SHADER_OPCODE_TXF_CMS_LOGICAL: 230 return "txf_cms_logical"; 231 case SHADER_OPCODE_TXF_CMS_W: 232 return "txf_cms_w"; 233 case SHADER_OPCODE_TXF_CMS_W_LOGICAL: 234 return "txf_cms_w_logical"; 235 case SHADER_OPCODE_TXF_UMS: 236 return "txf_ums"; 237 case SHADER_OPCODE_TXF_UMS_LOGICAL: 238 return "txf_ums_logical"; 239 case SHADER_OPCODE_TXF_MCS: 240 return "txf_mcs"; 241 case SHADER_OPCODE_TXF_MCS_LOGICAL: 242 return "txf_mcs_logical"; 243 case SHADER_OPCODE_LOD: 244 return "lod"; 245 case SHADER_OPCODE_LOD_LOGICAL: 246 return "lod_logical"; 247 case SHADER_OPCODE_TG4: 248 return "tg4"; 249 case SHADER_OPCODE_TG4_LOGICAL: 250 return "tg4_logical"; 251 case SHADER_OPCODE_TG4_OFFSET: 252 return "tg4_offset"; 253 case SHADER_OPCODE_TG4_OFFSET_LOGICAL: 254 return "tg4_offset_logical"; 255 case SHADER_OPCODE_SAMPLEINFO: 256 return "sampleinfo"; 257 case SHADER_OPCODE_SAMPLEINFO_LOGICAL: 258 return "sampleinfo_logical"; 259 260 case SHADER_OPCODE_SHADER_TIME_ADD: 261 return "shader_time_add"; 262 263 case SHADER_OPCODE_UNTYPED_ATOMIC: 264 return "untyped_atomic"; 265 case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: 266 return "untyped_atomic_logical"; 267 case SHADER_OPCODE_UNTYPED_SURFACE_READ: 268 return "untyped_surface_read"; 269 case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: 270 return "untyped_surface_read_logical"; 271 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: 272 return "untyped_surface_write"; 273 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: 274 return "untyped_surface_write_logical"; 275 case SHADER_OPCODE_TYPED_ATOMIC: 276 return "typed_atomic"; 277 case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: 278 return "typed_atomic_logical"; 279 case SHADER_OPCODE_TYPED_SURFACE_READ: 280 return "typed_surface_read"; 281 case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: 282 return "typed_surface_read_logical"; 283 case SHADER_OPCODE_TYPED_SURFACE_WRITE: 284 return "typed_surface_write"; 285 case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: 286 return "typed_surface_write_logical"; 287 case SHADER_OPCODE_MEMORY_FENCE: 288 return "memory_fence"; 289 290 case SHADER_OPCODE_LOAD_PAYLOAD: 291 return "load_payload"; 292 case FS_OPCODE_PACK: 293 return "pack"; 294 295 case SHADER_OPCODE_GEN4_SCRATCH_READ: 296 return "gen4_scratch_read"; 297 case SHADER_OPCODE_GEN4_SCRATCH_WRITE: 298 return "gen4_scratch_write"; 299 case SHADER_OPCODE_GEN7_SCRATCH_READ: 300 return "gen7_scratch_read"; 301 case SHADER_OPCODE_URB_WRITE_SIMD8: 302 return "gen8_urb_write_simd8"; 303 case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: 304 return "gen8_urb_write_simd8_per_slot"; 305 case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: 306 return "gen8_urb_write_simd8_masked"; 307 case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: 308 return "gen8_urb_write_simd8_masked_per_slot"; 309 case SHADER_OPCODE_URB_READ_SIMD8: 310 return "urb_read_simd8"; 311 case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: 312 return "urb_read_simd8_per_slot"; 313 314 case SHADER_OPCODE_FIND_LIVE_CHANNEL: 315 return "find_live_channel"; 316 case SHADER_OPCODE_BROADCAST: 317 return "broadcast"; 318 319 case VEC4_OPCODE_MOV_BYTES: 320 return "mov_bytes"; 321 case VEC4_OPCODE_PACK_BYTES: 322 return "pack_bytes"; 323 case VEC4_OPCODE_UNPACK_UNIFORM: 324 return "unpack_uniform"; 325 case VEC4_OPCODE_FROM_DOUBLE: 326 return "double_to_single"; 327 case VEC4_OPCODE_TO_DOUBLE: 328 return "single_to_double"; 329 case VEC4_OPCODE_PICK_LOW_32BIT: 330 return "pick_low_32bit"; 331 case VEC4_OPCODE_PICK_HIGH_32BIT: 332 return "pick_high_32bit"; 333 case VEC4_OPCODE_SET_LOW_32BIT: 334 return "set_low_32bit"; 335 case VEC4_OPCODE_SET_HIGH_32BIT: 336 return "set_high_32bit"; 337 338 case FS_OPCODE_DDX_COARSE: 339 return "ddx_coarse"; 340 case FS_OPCODE_DDX_FINE: 341 return "ddx_fine"; 342 case FS_OPCODE_DDY_COARSE: 343 return "ddy_coarse"; 344 case FS_OPCODE_DDY_FINE: 345 return "ddy_fine"; 346 347 case FS_OPCODE_CINTERP: 348 return "cinterp"; 349 case FS_OPCODE_LINTERP: 350 return "linterp"; 351 352 case FS_OPCODE_PIXEL_X: 353 return "pixel_x"; 354 case FS_OPCODE_PIXEL_Y: 355 return "pixel_y"; 356 357 case FS_OPCODE_GET_BUFFER_SIZE: 358 return "fs_get_buffer_size"; 359 360 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: 361 return "uniform_pull_const"; 362 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: 363 return "uniform_pull_const_gen7"; 364 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: 365 return "varying_pull_const_gen4"; 366 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: 367 return "varying_pull_const_gen7"; 368 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: 369 return "varying_pull_const_logical"; 370 371 case FS_OPCODE_MOV_DISPATCH_TO_FLAGS: 372 return "mov_dispatch_to_flags"; 373 case FS_OPCODE_DISCARD_JUMP: 374 return "discard_jump"; 375 376 case FS_OPCODE_SET_SAMPLE_ID: 377 return "set_sample_id"; 378 379 case FS_OPCODE_PACK_HALF_2x16_SPLIT: 380 return "pack_half_2x16_split"; 381 case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: 382 return "unpack_half_2x16_split_x"; 383 case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: 384 return "unpack_half_2x16_split_y"; 385 386 case FS_OPCODE_PLACEHOLDER_HALT: 387 return "placeholder_halt"; 388 389 case FS_OPCODE_INTERPOLATE_AT_SAMPLE: 390 return "interp_sample"; 391 case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: 392 return "interp_shared_offset"; 393 case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: 394 return "interp_per_slot_offset"; 395 396 case VS_OPCODE_URB_WRITE: 397 return "vs_urb_write"; 398 case VS_OPCODE_PULL_CONSTANT_LOAD: 399 return "pull_constant_load"; 400 case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: 401 return "pull_constant_load_gen7"; 402 403 case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: 404 return "set_simd4x2_header_gen9"; 405 406 case VS_OPCODE_GET_BUFFER_SIZE: 407 return "vs_get_buffer_size"; 408 409 case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: 410 return "unpack_flags_simd4x2"; 411 412 case GS_OPCODE_URB_WRITE: 413 return "gs_urb_write"; 414 case GS_OPCODE_URB_WRITE_ALLOCATE: 415 return "gs_urb_write_allocate"; 416 case GS_OPCODE_THREAD_END: 417 return "gs_thread_end"; 418 case GS_OPCODE_SET_WRITE_OFFSET: 419 return "set_write_offset"; 420 case GS_OPCODE_SET_VERTEX_COUNT: 421 return "set_vertex_count"; 422 case GS_OPCODE_SET_DWORD_2: 423 return "set_dword_2"; 424 case GS_OPCODE_PREPARE_CHANNEL_MASKS: 425 return "prepare_channel_masks"; 426 case GS_OPCODE_SET_CHANNEL_MASKS: 427 return "set_channel_masks"; 428 case GS_OPCODE_GET_INSTANCE_ID: 429 return "get_instance_id"; 430 case GS_OPCODE_FF_SYNC: 431 return "ff_sync"; 432 case GS_OPCODE_SET_PRIMITIVE_ID: 433 return "set_primitive_id"; 434 case GS_OPCODE_SVB_WRITE: 435 return "gs_svb_write"; 436 case GS_OPCODE_SVB_SET_DST_INDEX: 437 return "gs_svb_set_dst_index"; 438 case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: 439 return "gs_ff_sync_set_primitives"; 440 case CS_OPCODE_CS_TERMINATE: 441 return "cs_terminate"; 442 case SHADER_OPCODE_BARRIER: 443 return "barrier"; 444 case SHADER_OPCODE_MULH: 445 return "mulh"; 446 case SHADER_OPCODE_MOV_INDIRECT: 447 return "mov_indirect"; 448 449 case VEC4_OPCODE_URB_READ: 450 return "urb_read"; 451 case TCS_OPCODE_GET_INSTANCE_ID: 452 return "tcs_get_instance_id"; 453 case TCS_OPCODE_URB_WRITE: 454 return "tcs_urb_write"; 455 case TCS_OPCODE_SET_INPUT_URB_OFFSETS: 456 return "tcs_set_input_urb_offsets"; 457 case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS: 458 return "tcs_set_output_urb_offsets"; 459 case TCS_OPCODE_GET_PRIMITIVE_ID: 460 return "tcs_get_primitive_id"; 461 case TCS_OPCODE_CREATE_BARRIER_HEADER: 462 return "tcs_create_barrier_header"; 463 case TCS_OPCODE_SRC0_010_IS_ZERO: 464 return "tcs_src0<0,1,0>_is_zero"; 465 case TCS_OPCODE_RELEASE_INPUT: 466 return "tcs_release_input"; 467 case TCS_OPCODE_THREAD_END: 468 return "tcs_thread_end"; 469 case TES_OPCODE_CREATE_INPUT_READ_HEADER: 470 return "tes_create_input_read_header"; 471 case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: 472 return "tes_add_indirect_urb_offset"; 473 case TES_OPCODE_GET_PRIMITIVE_ID: 474 return "tes_get_primitive_id"; 475 } 476 477 unreachable("not reached"); 478 } 479 480 bool 481 brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg) 482 { 483 union { 484 unsigned ud; 485 int d; 486 float f; 487 double df; 488 } imm, sat_imm = { 0 }; 489 490 const unsigned size = type_sz(type); 491 492 /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise 493 * irrelevant, so just check the size of the type and copy from/to an 494 * appropriately sized field. 495 */ 496 if (size < 8) 497 imm.ud = reg->ud; 498 else 499 imm.df = reg->df; 500 501 switch (type) { 502 case BRW_REGISTER_TYPE_UD: 503 case BRW_REGISTER_TYPE_D: 504 case BRW_REGISTER_TYPE_UW: 505 case BRW_REGISTER_TYPE_W: 506 case BRW_REGISTER_TYPE_UQ: 507 case BRW_REGISTER_TYPE_Q: 508 /* Nothing to do. */ 509 return false; 510 case BRW_REGISTER_TYPE_F: 511 sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f); 512 break; 513 case BRW_REGISTER_TYPE_DF: 514 sat_imm.df = CLAMP(imm.df, 0.0, 1.0); 515 break; 516 case BRW_REGISTER_TYPE_UB: 517 case BRW_REGISTER_TYPE_B: 518 unreachable("no UB/B immediates"); 519 case BRW_REGISTER_TYPE_V: 520 case BRW_REGISTER_TYPE_UV: 521 case BRW_REGISTER_TYPE_VF: 522 unreachable("unimplemented: saturate vector immediate"); 523 case BRW_REGISTER_TYPE_HF: 524 unreachable("unimplemented: saturate HF immediate"); 525 } 526 527 if (size < 8) { 528 if (imm.ud != sat_imm.ud) { 529 reg->ud = sat_imm.ud; 530 return true; 531 } 532 } else { 533 if (imm.df != sat_imm.df) { 534 reg->df = sat_imm.df; 535 return true; 536 } 537 } 538 return false; 539 } 540 541 bool 542 brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg) 543 { 544 switch (type) { 545 case BRW_REGISTER_TYPE_D: 546 case BRW_REGISTER_TYPE_UD: 547 reg->d = -reg->d; 548 return true; 549 case BRW_REGISTER_TYPE_W: 550 case BRW_REGISTER_TYPE_UW: 551 reg->d = -(int16_t)reg->ud; 552 return true; 553 case BRW_REGISTER_TYPE_F: 554 reg->f = -reg->f; 555 return true; 556 case BRW_REGISTER_TYPE_VF: 557 reg->ud ^= 0x80808080; 558 return true; 559 case BRW_REGISTER_TYPE_DF: 560 reg->df = -reg->df; 561 return true; 562 case BRW_REGISTER_TYPE_UB: 563 case BRW_REGISTER_TYPE_B: 564 unreachable("no UB/B immediates"); 565 case BRW_REGISTER_TYPE_UV: 566 case BRW_REGISTER_TYPE_V: 567 assert(!"unimplemented: negate UV/V immediate"); 568 case BRW_REGISTER_TYPE_UQ: 569 case BRW_REGISTER_TYPE_Q: 570 assert(!"unimplemented: negate UQ/Q immediate"); 571 case BRW_REGISTER_TYPE_HF: 572 assert(!"unimplemented: negate HF immediate"); 573 } 574 575 return false; 576 } 577 578 bool 579 brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) 580 { 581 switch (type) { 582 case BRW_REGISTER_TYPE_D: 583 reg->d = abs(reg->d); 584 return true; 585 case BRW_REGISTER_TYPE_W: 586 reg->d = abs((int16_t)reg->ud); 587 return true; 588 case BRW_REGISTER_TYPE_F: 589 reg->f = fabsf(reg->f); 590 return true; 591 case BRW_REGISTER_TYPE_DF: 592 reg->df = fabs(reg->df); 593 return true; 594 case BRW_REGISTER_TYPE_VF: 595 reg->ud &= ~0x80808080; 596 return true; 597 case BRW_REGISTER_TYPE_UB: 598 case BRW_REGISTER_TYPE_B: 599 unreachable("no UB/B immediates"); 600 case BRW_REGISTER_TYPE_UQ: 601 case BRW_REGISTER_TYPE_UD: 602 case BRW_REGISTER_TYPE_UW: 603 case BRW_REGISTER_TYPE_UV: 604 /* Presumably the absolute value modifier on an unsigned source is a 605 * nop, but it would be nice to confirm. 606 */ 607 assert(!"unimplemented: abs unsigned immediate"); 608 case BRW_REGISTER_TYPE_V: 609 assert(!"unimplemented: abs V immediate"); 610 case BRW_REGISTER_TYPE_Q: 611 assert(!"unimplemented: abs Q immediate"); 612 case BRW_REGISTER_TYPE_HF: 613 assert(!"unimplemented: abs HF immediate"); 614 } 615 616 return false; 617 } 618 619 /** 620 * Get the appropriate atomic op for an image atomic intrinsic. 621 */ 622 unsigned 623 get_atomic_counter_op(nir_intrinsic_op op) 624 { 625 switch (op) { 626 case nir_intrinsic_atomic_counter_inc: 627 return BRW_AOP_INC; 628 case nir_intrinsic_atomic_counter_dec: 629 return BRW_AOP_PREDEC; 630 case nir_intrinsic_atomic_counter_add: 631 return BRW_AOP_ADD; 632 case nir_intrinsic_atomic_counter_min: 633 return BRW_AOP_UMIN; 634 case nir_intrinsic_atomic_counter_max: 635 return BRW_AOP_UMAX; 636 case nir_intrinsic_atomic_counter_and: 637 return BRW_AOP_AND; 638 case nir_intrinsic_atomic_counter_or: 639 return BRW_AOP_OR; 640 case nir_intrinsic_atomic_counter_xor: 641 return BRW_AOP_XOR; 642 case nir_intrinsic_atomic_counter_exchange: 643 return BRW_AOP_MOV; 644 case nir_intrinsic_atomic_counter_comp_swap: 645 return BRW_AOP_CMPWR; 646 default: 647 unreachable("Not reachable."); 648 } 649 } 650 651 backend_shader::backend_shader(const struct brw_compiler *compiler, 652 void *log_data, 653 void *mem_ctx, 654 const nir_shader *shader, 655 struct brw_stage_prog_data *stage_prog_data) 656 : compiler(compiler), 657 log_data(log_data), 658 devinfo(compiler->devinfo), 659 nir(shader), 660 stage_prog_data(stage_prog_data), 661 mem_ctx(mem_ctx), 662 cfg(NULL), 663 stage(shader->stage) 664 { 665 debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); 666 stage_name = _mesa_shader_stage_to_string(stage); 667 stage_abbrev = _mesa_shader_stage_to_abbrev(stage); 668 } 669 670 bool 671 backend_reg::equals(const backend_reg &r) const 672 { 673 return brw_regs_equal(this, &r) && offset == r.offset; 674 } 675 676 bool 677 backend_reg::is_zero() const 678 { 679 if (file != IMM) 680 return false; 681 682 switch (type) { 683 case BRW_REGISTER_TYPE_F: 684 return f == 0; 685 case BRW_REGISTER_TYPE_DF: 686 return df == 0; 687 case BRW_REGISTER_TYPE_D: 688 case BRW_REGISTER_TYPE_UD: 689 return d == 0; 690 default: 691 return false; 692 } 693 } 694 695 bool 696 backend_reg::is_one() const 697 { 698 if (file != IMM) 699 return false; 700 701 switch (type) { 702 case BRW_REGISTER_TYPE_F: 703 return f == 1.0f; 704 case BRW_REGISTER_TYPE_DF: 705 return df == 1.0; 706 case BRW_REGISTER_TYPE_D: 707 case BRW_REGISTER_TYPE_UD: 708 return d == 1; 709 default: 710 return false; 711 } 712 } 713 714 bool 715 backend_reg::is_negative_one() const 716 { 717 if (file != IMM) 718 return false; 719 720 switch (type) { 721 case BRW_REGISTER_TYPE_F: 722 return f == -1.0; 723 case BRW_REGISTER_TYPE_DF: 724 return df == -1.0; 725 case BRW_REGISTER_TYPE_D: 726 return d == -1; 727 default: 728 return false; 729 } 730 } 731 732 bool 733 backend_reg::is_null() const 734 { 735 return file == ARF && nr == BRW_ARF_NULL; 736 } 737 738 739 bool 740 backend_reg::is_accumulator() const 741 { 742 return file == ARF && nr == BRW_ARF_ACCUMULATOR; 743 } 744 745 bool 746 backend_instruction::is_commutative() const 747 { 748 switch (opcode) { 749 case BRW_OPCODE_AND: 750 case BRW_OPCODE_OR: 751 case BRW_OPCODE_XOR: 752 case BRW_OPCODE_ADD: 753 case BRW_OPCODE_MUL: 754 case SHADER_OPCODE_MULH: 755 return true; 756 case BRW_OPCODE_SEL: 757 /* MIN and MAX are commutative. */ 758 if (conditional_mod == BRW_CONDITIONAL_GE || 759 conditional_mod == BRW_CONDITIONAL_L) { 760 return true; 761 } 762 /* fallthrough */ 763 default: 764 return false; 765 } 766 } 767 768 bool 769 backend_instruction::is_3src(const struct gen_device_info *devinfo) const 770 { 771 return ::is_3src(devinfo, opcode); 772 } 773 774 bool 775 backend_instruction::is_tex() const 776 { 777 return (opcode == SHADER_OPCODE_TEX || 778 opcode == FS_OPCODE_TXB || 779 opcode == SHADER_OPCODE_TXD || 780 opcode == SHADER_OPCODE_TXF || 781 opcode == SHADER_OPCODE_TXF_LZ || 782 opcode == SHADER_OPCODE_TXF_CMS || 783 opcode == SHADER_OPCODE_TXF_CMS_W || 784 opcode == SHADER_OPCODE_TXF_UMS || 785 opcode == SHADER_OPCODE_TXF_MCS || 786 opcode == SHADER_OPCODE_TXL || 787 opcode == SHADER_OPCODE_TXL_LZ || 788 opcode == SHADER_OPCODE_TXS || 789 opcode == SHADER_OPCODE_LOD || 790 opcode == SHADER_OPCODE_TG4 || 791 opcode == SHADER_OPCODE_TG4_OFFSET || 792 opcode == SHADER_OPCODE_SAMPLEINFO); 793 } 794 795 bool 796 backend_instruction::is_math() const 797 { 798 return (opcode == SHADER_OPCODE_RCP || 799 opcode == SHADER_OPCODE_RSQ || 800 opcode == SHADER_OPCODE_SQRT || 801 opcode == SHADER_OPCODE_EXP2 || 802 opcode == SHADER_OPCODE_LOG2 || 803 opcode == SHADER_OPCODE_SIN || 804 opcode == SHADER_OPCODE_COS || 805 opcode == SHADER_OPCODE_INT_QUOTIENT || 806 opcode == SHADER_OPCODE_INT_REMAINDER || 807 opcode == SHADER_OPCODE_POW); 808 } 809 810 bool 811 backend_instruction::is_control_flow() const 812 { 813 switch (opcode) { 814 case BRW_OPCODE_DO: 815 case BRW_OPCODE_WHILE: 816 case BRW_OPCODE_IF: 817 case BRW_OPCODE_ELSE: 818 case BRW_OPCODE_ENDIF: 819 case BRW_OPCODE_BREAK: 820 case BRW_OPCODE_CONTINUE: 821 return true; 822 default: 823 return false; 824 } 825 } 826 827 bool 828 backend_instruction::can_do_source_mods() const 829 { 830 switch (opcode) { 831 case BRW_OPCODE_ADDC: 832 case BRW_OPCODE_BFE: 833 case BRW_OPCODE_BFI1: 834 case BRW_OPCODE_BFI2: 835 case BRW_OPCODE_BFREV: 836 case BRW_OPCODE_CBIT: 837 case BRW_OPCODE_FBH: 838 case BRW_OPCODE_FBL: 839 case BRW_OPCODE_SUBB: 840 return false; 841 default: 842 return true; 843 } 844 } 845 846 bool 847 backend_instruction::can_do_saturate() const 848 { 849 switch (opcode) { 850 case BRW_OPCODE_ADD: 851 case BRW_OPCODE_ASR: 852 case BRW_OPCODE_AVG: 853 case BRW_OPCODE_DP2: 854 case BRW_OPCODE_DP3: 855 case BRW_OPCODE_DP4: 856 case BRW_OPCODE_DPH: 857 case BRW_OPCODE_F16TO32: 858 case BRW_OPCODE_F32TO16: 859 case BRW_OPCODE_LINE: 860 case BRW_OPCODE_LRP: 861 case BRW_OPCODE_MAC: 862 case BRW_OPCODE_MAD: 863 case BRW_OPCODE_MATH: 864 case BRW_OPCODE_MOV: 865 case BRW_OPCODE_MUL: 866 case SHADER_OPCODE_MULH: 867 case BRW_OPCODE_PLN: 868 case BRW_OPCODE_RNDD: 869 case BRW_OPCODE_RNDE: 870 case BRW_OPCODE_RNDU: 871 case BRW_OPCODE_RNDZ: 872 case BRW_OPCODE_SEL: 873 case BRW_OPCODE_SHL: 874 case BRW_OPCODE_SHR: 875 case FS_OPCODE_LINTERP: 876 case SHADER_OPCODE_COS: 877 case SHADER_OPCODE_EXP2: 878 case SHADER_OPCODE_LOG2: 879 case SHADER_OPCODE_POW: 880 case SHADER_OPCODE_RCP: 881 case SHADER_OPCODE_RSQ: 882 case SHADER_OPCODE_SIN: 883 case SHADER_OPCODE_SQRT: 884 return true; 885 default: 886 return false; 887 } 888 } 889 890 bool 891 backend_instruction::can_do_cmod() const 892 { 893 switch (opcode) { 894 case BRW_OPCODE_ADD: 895 case BRW_OPCODE_ADDC: 896 case BRW_OPCODE_AND: 897 case BRW_OPCODE_ASR: 898 case BRW_OPCODE_AVG: 899 case BRW_OPCODE_CMP: 900 case BRW_OPCODE_CMPN: 901 case BRW_OPCODE_DP2: 902 case BRW_OPCODE_DP3: 903 case BRW_OPCODE_DP4: 904 case BRW_OPCODE_DPH: 905 case BRW_OPCODE_F16TO32: 906 case BRW_OPCODE_F32TO16: 907 case BRW_OPCODE_FRC: 908 case BRW_OPCODE_LINE: 909 case BRW_OPCODE_LRP: 910 case BRW_OPCODE_LZD: 911 case BRW_OPCODE_MAC: 912 case BRW_OPCODE_MACH: 913 case BRW_OPCODE_MAD: 914 case BRW_OPCODE_MOV: 915 case BRW_OPCODE_MUL: 916 case BRW_OPCODE_NOT: 917 case BRW_OPCODE_OR: 918 case BRW_OPCODE_PLN: 919 case BRW_OPCODE_RNDD: 920 case BRW_OPCODE_RNDE: 921 case BRW_OPCODE_RNDU: 922 case BRW_OPCODE_RNDZ: 923 case BRW_OPCODE_SAD2: 924 case BRW_OPCODE_SADA2: 925 case BRW_OPCODE_SHL: 926 case BRW_OPCODE_SHR: 927 case BRW_OPCODE_SUBB: 928 case BRW_OPCODE_XOR: 929 case FS_OPCODE_CINTERP: 930 case FS_OPCODE_LINTERP: 931 return true; 932 default: 933 return false; 934 } 935 } 936 937 bool 938 backend_instruction::reads_accumulator_implicitly() const 939 { 940 switch (opcode) { 941 case BRW_OPCODE_MAC: 942 case BRW_OPCODE_MACH: 943 case BRW_OPCODE_SADA2: 944 return true; 945 default: 946 return false; 947 } 948 } 949 950 bool 951 backend_instruction::writes_accumulator_implicitly(const struct gen_device_info *devinfo) const 952 { 953 return writes_accumulator || 954 (devinfo->gen < 6 && 955 ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) || 956 (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP && 957 opcode != FS_OPCODE_CINTERP))); 958 } 959 960 bool 961 backend_instruction::has_side_effects() const 962 { 963 switch (opcode) { 964 case SHADER_OPCODE_UNTYPED_ATOMIC: 965 case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: 966 case SHADER_OPCODE_GEN4_SCRATCH_WRITE: 967 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: 968 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: 969 case SHADER_OPCODE_TYPED_ATOMIC: 970 case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: 971 case SHADER_OPCODE_TYPED_SURFACE_WRITE: 972 case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: 973 case SHADER_OPCODE_MEMORY_FENCE: 974 case SHADER_OPCODE_URB_WRITE_SIMD8: 975 case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: 976 case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: 977 case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: 978 case FS_OPCODE_FB_WRITE: 979 case FS_OPCODE_FB_WRITE_LOGICAL: 980 case SHADER_OPCODE_BARRIER: 981 case TCS_OPCODE_URB_WRITE: 982 case TCS_OPCODE_RELEASE_INPUT: 983 return true; 984 default: 985 return false; 986 } 987 } 988 989 bool 990 backend_instruction::is_volatile() const 991 { 992 switch (opcode) { 993 case SHADER_OPCODE_UNTYPED_SURFACE_READ: 994 case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: 995 case SHADER_OPCODE_TYPED_SURFACE_READ: 996 case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: 997 case SHADER_OPCODE_URB_READ_SIMD8: 998 case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: 999 case VEC4_OPCODE_URB_READ: 1000 return true; 1001 default: 1002 return false; 1003 } 1004 } 1005 1006 #ifndef NDEBUG 1007 static bool 1008 inst_is_in_block(const bblock_t *block, const backend_instruction *inst) 1009 { 1010 bool found = false; 1011 foreach_inst_in_block (backend_instruction, i, block) { 1012 if (inst == i) { 1013 found = true; 1014 } 1015 } 1016 return found; 1017 } 1018 #endif 1019 1020 static void 1021 adjust_later_block_ips(bblock_t *start_block, int ip_adjustment) 1022 { 1023 for (bblock_t *block_iter = start_block->next(); 1024 block_iter; 1025 block_iter = block_iter->next()) { 1026 block_iter->start_ip += ip_adjustment; 1027 block_iter->end_ip += ip_adjustment; 1028 } 1029 } 1030 1031 void 1032 backend_instruction::insert_after(bblock_t *block, backend_instruction *inst) 1033 { 1034 assert(this != inst); 1035 1036 if (!this->is_head_sentinel()) 1037 assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1038 1039 block->end_ip++; 1040 1041 adjust_later_block_ips(block, 1); 1042 1043 exec_node::insert_after(inst); 1044 } 1045 1046 void 1047 backend_instruction::insert_before(bblock_t *block, backend_instruction *inst) 1048 { 1049 assert(this != inst); 1050 1051 if (!this->is_tail_sentinel()) 1052 assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1053 1054 block->end_ip++; 1055 1056 adjust_later_block_ips(block, 1); 1057 1058 exec_node::insert_before(inst); 1059 } 1060 1061 void 1062 backend_instruction::insert_before(bblock_t *block, exec_list *list) 1063 { 1064 assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1065 1066 unsigned num_inst = list->length(); 1067 1068 block->end_ip += num_inst; 1069 1070 adjust_later_block_ips(block, num_inst); 1071 1072 exec_node::insert_before(list); 1073 } 1074 1075 void 1076 backend_instruction::remove(bblock_t *block) 1077 { 1078 assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1079 1080 adjust_later_block_ips(block, -1); 1081 1082 if (block->start_ip == block->end_ip) { 1083 block->cfg->remove_block(block); 1084 } else { 1085 block->end_ip--; 1086 } 1087 1088 exec_node::remove(); 1089 } 1090 1091 void 1092 backend_shader::dump_instructions() 1093 { 1094 dump_instructions(NULL); 1095 } 1096 1097 void 1098 backend_shader::dump_instructions(const char *name) 1099 { 1100 FILE *file = stderr; 1101 if (name && geteuid() != 0) { 1102 file = fopen(name, "w"); 1103 if (!file) 1104 file = stderr; 1105 } 1106 1107 if (cfg) { 1108 int ip = 0; 1109 foreach_block_and_inst(block, backend_instruction, inst, cfg) { 1110 if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) 1111 fprintf(file, "%4d: ", ip++); 1112 dump_instruction(inst, file); 1113 } 1114 } else { 1115 int ip = 0; 1116 foreach_in_list(backend_instruction, inst, &instructions) { 1117 if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) 1118 fprintf(file, "%4d: ", ip++); 1119 dump_instruction(inst, file); 1120 } 1121 } 1122 1123 if (file != stderr) { 1124 fclose(file); 1125 } 1126 } 1127 1128 void 1129 backend_shader::calculate_cfg() 1130 { 1131 if (this->cfg) 1132 return; 1133 cfg = new(mem_ctx) cfg_t(&this->instructions); 1134 } 1135 1136 /** 1137 * Sets up the starting offsets for the groups of binding table entries 1138 * commong to all pipeline stages. 1139 * 1140 * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're 1141 * unused but also make sure that addition of small offsets to them will 1142 * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES. 1143 */ 1144 uint32_t 1145 brw_assign_common_binding_table_offsets(const struct gen_device_info *devinfo, 1146 const struct gl_program *prog, 1147 struct brw_stage_prog_data *stage_prog_data, 1148 uint32_t next_binding_table_offset) 1149 { 1150 int num_textures = util_last_bit(prog->SamplersUsed); 1151 1152 stage_prog_data->binding_table.texture_start = next_binding_table_offset; 1153 next_binding_table_offset += num_textures; 1154 1155 if (prog->info.num_ubos) { 1156 assert(prog->info.num_ubos <= BRW_MAX_UBO); 1157 stage_prog_data->binding_table.ubo_start = next_binding_table_offset; 1158 next_binding_table_offset += prog->info.num_ubos; 1159 } else { 1160 stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0; 1161 } 1162 1163 if (prog->info.num_ssbos) { 1164 assert(prog->info.num_ssbos <= BRW_MAX_SSBO); 1165 stage_prog_data->binding_table.ssbo_start = next_binding_table_offset; 1166 next_binding_table_offset += prog->info.num_ssbos; 1167 } else { 1168 stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0; 1169 } 1170 1171 if (INTEL_DEBUG & DEBUG_SHADER_TIME) { 1172 stage_prog_data->binding_table.shader_time_start = next_binding_table_offset; 1173 next_binding_table_offset++; 1174 } else { 1175 stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0; 1176 } 1177 1178 if (prog->nir->info->uses_texture_gather) { 1179 if (devinfo->gen >= 8) { 1180 stage_prog_data->binding_table.gather_texture_start = 1181 stage_prog_data->binding_table.texture_start; 1182 } else { 1183 stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset; 1184 next_binding_table_offset += num_textures; 1185 } 1186 } else { 1187 stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0; 1188 } 1189 1190 if (prog->info.num_abos) { 1191 stage_prog_data->binding_table.abo_start = next_binding_table_offset; 1192 next_binding_table_offset += prog->info.num_abos; 1193 } else { 1194 stage_prog_data->binding_table.abo_start = 0xd0d0d0d0; 1195 } 1196 1197 if (prog->info.num_images) { 1198 stage_prog_data->binding_table.image_start = next_binding_table_offset; 1199 next_binding_table_offset += prog->info.num_images; 1200 } else { 1201 stage_prog_data->binding_table.image_start = 0xd0d0d0d0; 1202 } 1203 1204 /* This may or may not be used depending on how the compile goes. */ 1205 stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset; 1206 next_binding_table_offset++; 1207 1208 /* Plane 0 is just the regular texture section */ 1209 stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start; 1210 1211 stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset; 1212 next_binding_table_offset += num_textures; 1213 1214 stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset; 1215 next_binding_table_offset += num_textures; 1216 1217 /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */ 1218 1219 assert(next_binding_table_offset <= BRW_MAX_SURFACES); 1220 return next_binding_table_offset; 1221 } 1222 1223 static void 1224 setup_vec4_uniform_value(const gl_constant_value **params, 1225 const gl_constant_value *values, 1226 unsigned n) 1227 { 1228 static const gl_constant_value zero = { 0 }; 1229 1230 for (unsigned i = 0; i < n; ++i) 1231 params[i] = &values[i]; 1232 1233 for (unsigned i = n; i < 4; ++i) 1234 params[i] = &zero; 1235 } 1236 1237 void 1238 brw_setup_image_uniform_values(gl_shader_stage stage, 1239 struct brw_stage_prog_data *stage_prog_data, 1240 unsigned param_start_index, 1241 const gl_uniform_storage *storage) 1242 { 1243 const gl_constant_value **param = 1244 &stage_prog_data->param[param_start_index]; 1245 1246 for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) { 1247 const unsigned image_idx = storage->opaque[stage].index + i; 1248 const brw_image_param *image_param = 1249 &stage_prog_data->image_param[image_idx]; 1250 1251 /* Upload the brw_image_param structure. The order is expected to match 1252 * the BRW_IMAGE_PARAM_*_OFFSET defines. 1253 */ 1254 setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, 1255 (const gl_constant_value *)&image_param->surface_idx, 1); 1256 setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, 1257 (const gl_constant_value *)image_param->offset, 2); 1258 setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, 1259 (const gl_constant_value *)image_param->size, 3); 1260 setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, 1261 (const gl_constant_value *)image_param->stride, 4); 1262 setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, 1263 (const gl_constant_value *)image_param->tiling, 3); 1264 setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, 1265 (const gl_constant_value *)image_param->swizzling, 2); 1266 param += BRW_IMAGE_PARAM_SIZE; 1267 1268 brw_mark_surface_used( 1269 stage_prog_data, 1270 stage_prog_data->binding_table.image_start + image_idx); 1271 } 1272 } 1273 1274 /** 1275 * Decide which set of clip planes should be used when clipping via 1276 * gl_Position or gl_ClipVertex. 1277 */ 1278 gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx) 1279 { 1280 if (ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]) { 1281 /* There is currently a GLSL vertex shader, so clip according to GLSL 1282 * rules, which means compare gl_ClipVertex (or gl_Position, if 1283 * gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes 1284 * that were stored in EyeUserPlane at the time the clip planes were 1285 * specified. 1286 */ 1287 return ctx->Transform.EyeUserPlane; 1288 } else { 1289 /* Either we are using fixed function or an ARB vertex program. In 1290 * either case the clip planes are going to be compared against 1291 * gl_Position (which is in clip coordinates) so we have to clip using 1292 * _ClipUserPlane, which was transformed into clip coordinates by Mesa 1293 * core. 1294 */ 1295 return ctx->Transform._ClipUserPlane; 1296 } 1297 } 1298 1299 extern "C" const unsigned * 1300 brw_compile_tes(const struct brw_compiler *compiler, 1301 void *log_data, 1302 void *mem_ctx, 1303 const struct brw_tes_prog_key *key, 1304 const struct brw_vue_map *input_vue_map, 1305 struct brw_tes_prog_data *prog_data, 1306 const nir_shader *src_shader, 1307 struct gl_program *prog, 1308 int shader_time_index, 1309 unsigned *final_assembly_size, 1310 char **error_str) 1311 { 1312 const struct gen_device_info *devinfo = compiler->devinfo; 1313 const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL]; 1314 1315 nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); 1316 nir->info->inputs_read = key->inputs_read; 1317 nir->info->patch_inputs_read = key->patch_inputs_read; 1318 1319 nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar); 1320 brw_nir_lower_tes_inputs(nir, input_vue_map); 1321 brw_nir_lower_vue_outputs(nir, is_scalar); 1322 nir = brw_postprocess_nir(nir, compiler, is_scalar); 1323 1324 brw_compute_vue_map(devinfo, &prog_data->base.vue_map, 1325 nir->info->outputs_written, 1326 nir->info->separate_shader); 1327 1328 unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4; 1329 1330 assert(output_size_bytes >= 1); 1331 if (output_size_bytes > GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES) { 1332 if (error_str) 1333 *error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size"); 1334 return NULL; 1335 } 1336 1337 prog_data->base.clip_distance_mask = 1338 ((1 << nir->info->clip_distance_array_size) - 1); 1339 prog_data->base.cull_distance_mask = 1340 ((1 << nir->info->cull_distance_array_size) - 1) << 1341 nir->info->clip_distance_array_size; 1342 1343 /* URB entry sizes are stored as a multiple of 64 bytes. */ 1344 prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; 1345 prog_data->base.urb_read_length = 0; 1346 1347 STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1); 1348 STATIC_ASSERT(BRW_TESS_PARTITIONING_ODD_FRACTIONAL == 1349 TESS_SPACING_FRACTIONAL_ODD - 1); 1350 STATIC_ASSERT(BRW_TESS_PARTITIONING_EVEN_FRACTIONAL == 1351 TESS_SPACING_FRACTIONAL_EVEN - 1); 1352 1353 prog_data->partitioning = 1354 (enum brw_tess_partitioning) (nir->info->tess.spacing - 1); 1355 1356 switch (nir->info->tess.primitive_mode) { 1357 case GL_QUADS: 1358 prog_data->domain = BRW_TESS_DOMAIN_QUAD; 1359 break; 1360 case GL_TRIANGLES: 1361 prog_data->domain = BRW_TESS_DOMAIN_TRI; 1362 break; 1363 case GL_ISOLINES: 1364 prog_data->domain = BRW_TESS_DOMAIN_ISOLINE; 1365 break; 1366 default: 1367 unreachable("invalid domain shader primitive mode"); 1368 } 1369 1370 if (nir->info->tess.point_mode) { 1371 prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT; 1372 } else if (nir->info->tess.primitive_mode == GL_ISOLINES) { 1373 prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE; 1374 } else { 1375 /* Hardware winding order is backwards from OpenGL */ 1376 prog_data->output_topology = 1377 nir->info->tess.ccw ? BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW 1378 : BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW; 1379 } 1380 1381 if (unlikely(INTEL_DEBUG & DEBUG_TES)) { 1382 fprintf(stderr, "TES Input "); 1383 brw_print_vue_map(stderr, input_vue_map); 1384 fprintf(stderr, "TES Output "); 1385 brw_print_vue_map(stderr, &prog_data->base.vue_map); 1386 } 1387 1388 if (is_scalar) { 1389 fs_visitor v(compiler, log_data, mem_ctx, (void *) key, 1390 &prog_data->base.base, NULL, nir, 8, 1391 shader_time_index, input_vue_map); 1392 if (!v.run_tes()) { 1393 if (error_str) 1394 *error_str = ralloc_strdup(mem_ctx, v.fail_msg); 1395 return NULL; 1396 } 1397 1398 prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; 1399 prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; 1400 1401 fs_generator g(compiler, log_data, mem_ctx, (void *) key, 1402 &prog_data->base.base, v.promoted_constants, false, 1403 MESA_SHADER_TESS_EVAL); 1404 if (unlikely(INTEL_DEBUG & DEBUG_TES)) { 1405 g.enable_debug(ralloc_asprintf(mem_ctx, 1406 "%s tessellation evaluation shader %s", 1407 nir->info->label ? nir->info->label 1408 : "unnamed", 1409 nir->info->name)); 1410 } 1411 1412 g.generate_code(v.cfg, 8); 1413 1414 return g.get_assembly(final_assembly_size); 1415 } else { 1416 brw::vec4_tes_visitor v(compiler, log_data, key, prog_data, 1417 nir, mem_ctx, shader_time_index); 1418 if (!v.run()) { 1419 if (error_str) 1420 *error_str = ralloc_strdup(mem_ctx, v.fail_msg); 1421 return NULL; 1422 } 1423 1424 if (unlikely(INTEL_DEBUG & DEBUG_TES)) 1425 v.dump_instructions(); 1426 1427 return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, 1428 &prog_data->base, v.cfg, 1429 final_assembly_size); 1430 } 1431 } 1432