1 /* 2 * Copyright 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "brw_cfg.h" 25 #include "brw_eu.h" 26 #include "brw_fs.h" 27 #include "brw_nir.h" 28 #include "brw_vec4_tes.h" 29 #include "common/gen_debug.h" 30 #include "main/uniforms.h" 31 #include "util/macros.h" 32 33 enum brw_reg_type 34 brw_type_for_base_type(const struct glsl_type *type) 35 { 36 switch (type->base_type) { 37 case GLSL_TYPE_FLOAT16: 38 return BRW_REGISTER_TYPE_HF; 39 case GLSL_TYPE_FLOAT: 40 return BRW_REGISTER_TYPE_F; 41 case GLSL_TYPE_INT: 42 case GLSL_TYPE_BOOL: 43 case GLSL_TYPE_SUBROUTINE: 44 return BRW_REGISTER_TYPE_D; 45 case GLSL_TYPE_INT16: 46 return BRW_REGISTER_TYPE_W; 47 case GLSL_TYPE_UINT: 48 return BRW_REGISTER_TYPE_UD; 49 case GLSL_TYPE_UINT16: 50 return BRW_REGISTER_TYPE_UW; 51 case GLSL_TYPE_ARRAY: 52 return brw_type_for_base_type(type->fields.array); 53 case GLSL_TYPE_STRUCT: 54 case GLSL_TYPE_SAMPLER: 55 case GLSL_TYPE_ATOMIC_UINT: 56 /* These should be overridden with the type of the member when 57 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 58 * way to trip up if we don't. 59 */ 60 return BRW_REGISTER_TYPE_UD; 61 case GLSL_TYPE_IMAGE: 62 return BRW_REGISTER_TYPE_UD; 63 case GLSL_TYPE_DOUBLE: 64 return BRW_REGISTER_TYPE_DF; 65 case GLSL_TYPE_UINT64: 66 return BRW_REGISTER_TYPE_UQ; 67 case GLSL_TYPE_INT64: 68 return BRW_REGISTER_TYPE_Q; 69 case GLSL_TYPE_VOID: 70 case GLSL_TYPE_ERROR: 71 case GLSL_TYPE_INTERFACE: 72 case GLSL_TYPE_FUNCTION: 73 unreachable("not reached"); 74 } 75 76 return BRW_REGISTER_TYPE_F; 77 } 78 79 enum brw_conditional_mod 80 brw_conditional_for_comparison(unsigned int op) 81 { 82 switch (op) { 83 case ir_binop_less: 84 return BRW_CONDITIONAL_L; 85 case ir_binop_gequal: 86 return BRW_CONDITIONAL_GE; 87 case ir_binop_equal: 88 case ir_binop_all_equal: /* same as equal for scalars */ 89 return BRW_CONDITIONAL_Z; 90 case ir_binop_nequal: 91 case ir_binop_any_nequal: /* same as nequal for scalars */ 92 return BRW_CONDITIONAL_NZ; 93 default: 94 unreachable("not reached: bad operation for comparison"); 95 } 96 } 97 98 uint32_t 99 brw_math_function(enum opcode op) 100 { 101 switch (op) { 102 case SHADER_OPCODE_RCP: 103 return BRW_MATH_FUNCTION_INV; 104 case SHADER_OPCODE_RSQ: 105 return BRW_MATH_FUNCTION_RSQ; 106 case SHADER_OPCODE_SQRT: 107 return BRW_MATH_FUNCTION_SQRT; 108 case SHADER_OPCODE_EXP2: 109 return BRW_MATH_FUNCTION_EXP; 110 case SHADER_OPCODE_LOG2: 111 return BRW_MATH_FUNCTION_LOG; 112 case SHADER_OPCODE_POW: 113 return BRW_MATH_FUNCTION_POW; 114 case SHADER_OPCODE_SIN: 115 return BRW_MATH_FUNCTION_SIN; 116 case SHADER_OPCODE_COS: 117 return BRW_MATH_FUNCTION_COS; 118 case SHADER_OPCODE_INT_QUOTIENT: 119 return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT; 120 case SHADER_OPCODE_INT_REMAINDER: 121 return BRW_MATH_FUNCTION_INT_DIV_REMAINDER; 122 default: 123 unreachable("not reached: unknown math function"); 124 } 125 } 126 127 bool 128 brw_texture_offset(int *offsets, unsigned num_components, uint32_t *offset_bits) 129 { 130 if (!offsets) return false; /* nonconstant offset; caller will handle it. */ 131 132 /* offset out of bounds; caller will handle it. */ 133 for (unsigned i = 0; i < num_components; i++) 134 if (offsets[i] > 7 || offsets[i] < -8) 135 return false; 136 137 /* Combine all three offsets into a single unsigned dword: 138 * 139 * bits 11:8 - U Offset (X component) 140 * bits 7:4 - V Offset (Y component) 141 * bits 3:0 - R Offset (Z component) 142 */ 143 *offset_bits = 0; 144 for (unsigned i = 0; i < num_components; i++) { 145 const unsigned shift = 4 * (2 - i); 146 *offset_bits |= (offsets[i] << shift) & (0xF << shift); 147 } 148 return true; 149 } 150 151 const char * 152 brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) 153 { 154 switch (op) { 155 case BRW_OPCODE_ILLEGAL ... BRW_OPCODE_NOP: 156 /* The DO instruction doesn't exist on Gen6+, but we use it to mark the 157 * start of a loop in the IR. 158 */ 159 if (devinfo->gen >= 6 && op == BRW_OPCODE_DO) 160 return "do"; 161 162 /* The following conversion opcodes doesn't exist on Gen8+, but we use 163 * then to mark that we want to do the conversion. 164 */ 165 if (devinfo->gen > 7 && op == BRW_OPCODE_F32TO16) 166 return "f32to16"; 167 168 if (devinfo->gen > 7 && op == BRW_OPCODE_F16TO32) 169 return "f16to32"; 170 171 assert(brw_opcode_desc(devinfo, op)->name); 172 return brw_opcode_desc(devinfo, op)->name; 173 case FS_OPCODE_FB_WRITE: 174 return "fb_write"; 175 case FS_OPCODE_FB_WRITE_LOGICAL: 176 return "fb_write_logical"; 177 case FS_OPCODE_REP_FB_WRITE: 178 return "rep_fb_write"; 179 case FS_OPCODE_FB_READ: 180 return "fb_read"; 181 case FS_OPCODE_FB_READ_LOGICAL: 182 return "fb_read_logical"; 183 184 case SHADER_OPCODE_RCP: 185 return "rcp"; 186 case SHADER_OPCODE_RSQ: 187 return "rsq"; 188 case SHADER_OPCODE_SQRT: 189 return "sqrt"; 190 case SHADER_OPCODE_EXP2: 191 return "exp2"; 192 case SHADER_OPCODE_LOG2: 193 return "log2"; 194 case SHADER_OPCODE_POW: 195 return "pow"; 196 case SHADER_OPCODE_INT_QUOTIENT: 197 return "int_quot"; 198 case SHADER_OPCODE_INT_REMAINDER: 199 return "int_rem"; 200 case SHADER_OPCODE_SIN: 201 return "sin"; 202 case SHADER_OPCODE_COS: 203 return "cos"; 204 205 case SHADER_OPCODE_TEX: 206 return "tex"; 207 case SHADER_OPCODE_TEX_LOGICAL: 208 return "tex_logical"; 209 case SHADER_OPCODE_TXD: 210 return "txd"; 211 case SHADER_OPCODE_TXD_LOGICAL: 212 return "txd_logical"; 213 case SHADER_OPCODE_TXF: 214 return "txf"; 215 case SHADER_OPCODE_TXF_LOGICAL: 216 return "txf_logical"; 217 case SHADER_OPCODE_TXF_LZ: 218 return "txf_lz"; 219 case SHADER_OPCODE_TXL: 220 return "txl"; 221 case SHADER_OPCODE_TXL_LOGICAL: 222 return "txl_logical"; 223 case SHADER_OPCODE_TXL_LZ: 224 return "txl_lz"; 225 case SHADER_OPCODE_TXS: 226 return "txs"; 227 case SHADER_OPCODE_TXS_LOGICAL: 228 return "txs_logical"; 229 case FS_OPCODE_TXB: 230 return "txb"; 231 case FS_OPCODE_TXB_LOGICAL: 232 return "txb_logical"; 233 case SHADER_OPCODE_TXF_CMS: 234 return "txf_cms"; 235 case SHADER_OPCODE_TXF_CMS_LOGICAL: 236 return "txf_cms_logical"; 237 case SHADER_OPCODE_TXF_CMS_W: 238 return "txf_cms_w"; 239 case SHADER_OPCODE_TXF_CMS_W_LOGICAL: 240 return "txf_cms_w_logical"; 241 case SHADER_OPCODE_TXF_UMS: 242 return "txf_ums"; 243 case SHADER_OPCODE_TXF_UMS_LOGICAL: 244 return "txf_ums_logical"; 245 case SHADER_OPCODE_TXF_MCS: 246 return "txf_mcs"; 247 case SHADER_OPCODE_TXF_MCS_LOGICAL: 248 return "txf_mcs_logical"; 249 case SHADER_OPCODE_LOD: 250 return "lod"; 251 case SHADER_OPCODE_LOD_LOGICAL: 252 return "lod_logical"; 253 case SHADER_OPCODE_TG4: 254 return "tg4"; 255 case SHADER_OPCODE_TG4_LOGICAL: 256 return "tg4_logical"; 257 case SHADER_OPCODE_TG4_OFFSET: 258 return "tg4_offset"; 259 case SHADER_OPCODE_TG4_OFFSET_LOGICAL: 260 return "tg4_offset_logical"; 261 case SHADER_OPCODE_SAMPLEINFO: 262 return "sampleinfo"; 263 case SHADER_OPCODE_SAMPLEINFO_LOGICAL: 264 return "sampleinfo_logical"; 265 266 case SHADER_OPCODE_SHADER_TIME_ADD: 267 return "shader_time_add"; 268 269 case SHADER_OPCODE_UNTYPED_ATOMIC: 270 return "untyped_atomic"; 271 case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: 272 return "untyped_atomic_logical"; 273 case SHADER_OPCODE_UNTYPED_SURFACE_READ: 274 return "untyped_surface_read"; 275 case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: 276 return "untyped_surface_read_logical"; 277 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: 278 return "untyped_surface_write"; 279 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: 280 return "untyped_surface_write_logical"; 281 case SHADER_OPCODE_TYPED_ATOMIC: 282 return "typed_atomic"; 283 case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: 284 return "typed_atomic_logical"; 285 case SHADER_OPCODE_TYPED_SURFACE_READ: 286 return "typed_surface_read"; 287 case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: 288 return "typed_surface_read_logical"; 289 case SHADER_OPCODE_TYPED_SURFACE_WRITE: 290 return "typed_surface_write"; 291 case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: 292 return "typed_surface_write_logical"; 293 case SHADER_OPCODE_MEMORY_FENCE: 294 return "memory_fence"; 295 296 case SHADER_OPCODE_BYTE_SCATTERED_READ: 297 return "byte_scattered_read"; 298 case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: 299 return "byte_scattered_read_logical"; 300 case SHADER_OPCODE_BYTE_SCATTERED_WRITE: 301 return "byte_scattered_write"; 302 case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: 303 return "byte_scattered_write_logical"; 304 305 case SHADER_OPCODE_LOAD_PAYLOAD: 306 return "load_payload"; 307 case FS_OPCODE_PACK: 308 return "pack"; 309 310 case SHADER_OPCODE_GEN4_SCRATCH_READ: 311 return "gen4_scratch_read"; 312 case SHADER_OPCODE_GEN4_SCRATCH_WRITE: 313 return "gen4_scratch_write"; 314 case SHADER_OPCODE_GEN7_SCRATCH_READ: 315 return "gen7_scratch_read"; 316 case SHADER_OPCODE_URB_WRITE_SIMD8: 317 return "gen8_urb_write_simd8"; 318 case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: 319 return "gen8_urb_write_simd8_per_slot"; 320 case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: 321 return "gen8_urb_write_simd8_masked"; 322 case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: 323 return "gen8_urb_write_simd8_masked_per_slot"; 324 case SHADER_OPCODE_URB_READ_SIMD8: 325 return "urb_read_simd8"; 326 case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: 327 return "urb_read_simd8_per_slot"; 328 329 case SHADER_OPCODE_FIND_LIVE_CHANNEL: 330 return "find_live_channel"; 331 case SHADER_OPCODE_BROADCAST: 332 return "broadcast"; 333 334 case SHADER_OPCODE_GET_BUFFER_SIZE: 335 return "get_buffer_size"; 336 337 case VEC4_OPCODE_MOV_BYTES: 338 return "mov_bytes"; 339 case VEC4_OPCODE_PACK_BYTES: 340 return "pack_bytes"; 341 case VEC4_OPCODE_UNPACK_UNIFORM: 342 return "unpack_uniform"; 343 case VEC4_OPCODE_DOUBLE_TO_F32: 344 return "double_to_f32"; 345 case VEC4_OPCODE_DOUBLE_TO_D32: 346 return "double_to_d32"; 347 case VEC4_OPCODE_DOUBLE_TO_U32: 348 return "double_to_u32"; 349 case VEC4_OPCODE_TO_DOUBLE: 350 return "single_to_double"; 351 case VEC4_OPCODE_PICK_LOW_32BIT: 352 return "pick_low_32bit"; 353 case VEC4_OPCODE_PICK_HIGH_32BIT: 354 return "pick_high_32bit"; 355 case VEC4_OPCODE_SET_LOW_32BIT: 356 return "set_low_32bit"; 357 case VEC4_OPCODE_SET_HIGH_32BIT: 358 return "set_high_32bit"; 359 360 case FS_OPCODE_DDX_COARSE: 361 return "ddx_coarse"; 362 case FS_OPCODE_DDX_FINE: 363 return "ddx_fine"; 364 case FS_OPCODE_DDY_COARSE: 365 return "ddy_coarse"; 366 case FS_OPCODE_DDY_FINE: 367 return "ddy_fine"; 368 369 case FS_OPCODE_CINTERP: 370 return "cinterp"; 371 case FS_OPCODE_LINTERP: 372 return "linterp"; 373 374 case FS_OPCODE_PIXEL_X: 375 return "pixel_x"; 376 case FS_OPCODE_PIXEL_Y: 377 return "pixel_y"; 378 379 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: 380 return "uniform_pull_const"; 381 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: 382 return "uniform_pull_const_gen7"; 383 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: 384 return "varying_pull_const_gen4"; 385 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: 386 return "varying_pull_const_gen7"; 387 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: 388 return "varying_pull_const_logical"; 389 390 case FS_OPCODE_MOV_DISPATCH_TO_FLAGS: 391 return "mov_dispatch_to_flags"; 392 case FS_OPCODE_DISCARD_JUMP: 393 return "discard_jump"; 394 395 case FS_OPCODE_SET_SAMPLE_ID: 396 return "set_sample_id"; 397 398 case FS_OPCODE_PACK_HALF_2x16_SPLIT: 399 return "pack_half_2x16_split"; 400 case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: 401 return "unpack_half_2x16_split_x"; 402 case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: 403 return "unpack_half_2x16_split_y"; 404 405 case FS_OPCODE_PLACEHOLDER_HALT: 406 return "placeholder_halt"; 407 408 case FS_OPCODE_INTERPOLATE_AT_SAMPLE: 409 return "interp_sample"; 410 case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: 411 return "interp_shared_offset"; 412 case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: 413 return "interp_per_slot_offset"; 414 415 case VS_OPCODE_URB_WRITE: 416 return "vs_urb_write"; 417 case VS_OPCODE_PULL_CONSTANT_LOAD: 418 return "pull_constant_load"; 419 case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: 420 return "pull_constant_load_gen7"; 421 422 case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: 423 return "set_simd4x2_header_gen9"; 424 425 case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: 426 return "unpack_flags_simd4x2"; 427 428 case GS_OPCODE_URB_WRITE: 429 return "gs_urb_write"; 430 case GS_OPCODE_URB_WRITE_ALLOCATE: 431 return "gs_urb_write_allocate"; 432 case GS_OPCODE_THREAD_END: 433 return "gs_thread_end"; 434 case GS_OPCODE_SET_WRITE_OFFSET: 435 return "set_write_offset"; 436 case GS_OPCODE_SET_VERTEX_COUNT: 437 return "set_vertex_count"; 438 case GS_OPCODE_SET_DWORD_2: 439 return "set_dword_2"; 440 case GS_OPCODE_PREPARE_CHANNEL_MASKS: 441 return "prepare_channel_masks"; 442 case GS_OPCODE_SET_CHANNEL_MASKS: 443 return "set_channel_masks"; 444 case GS_OPCODE_GET_INSTANCE_ID: 445 return "get_instance_id"; 446 case GS_OPCODE_FF_SYNC: 447 return "ff_sync"; 448 case GS_OPCODE_SET_PRIMITIVE_ID: 449 return "set_primitive_id"; 450 case GS_OPCODE_SVB_WRITE: 451 return "gs_svb_write"; 452 case GS_OPCODE_SVB_SET_DST_INDEX: 453 return "gs_svb_set_dst_index"; 454 case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: 455 return "gs_ff_sync_set_primitives"; 456 case CS_OPCODE_CS_TERMINATE: 457 return "cs_terminate"; 458 case SHADER_OPCODE_BARRIER: 459 return "barrier"; 460 case SHADER_OPCODE_MULH: 461 return "mulh"; 462 case SHADER_OPCODE_MOV_INDIRECT: 463 return "mov_indirect"; 464 465 case VEC4_OPCODE_URB_READ: 466 return "urb_read"; 467 case TCS_OPCODE_GET_INSTANCE_ID: 468 return "tcs_get_instance_id"; 469 case TCS_OPCODE_URB_WRITE: 470 return "tcs_urb_write"; 471 case TCS_OPCODE_SET_INPUT_URB_OFFSETS: 472 return "tcs_set_input_urb_offsets"; 473 case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS: 474 return "tcs_set_output_urb_offsets"; 475 case TCS_OPCODE_GET_PRIMITIVE_ID: 476 return "tcs_get_primitive_id"; 477 case TCS_OPCODE_CREATE_BARRIER_HEADER: 478 return "tcs_create_barrier_header"; 479 case TCS_OPCODE_SRC0_010_IS_ZERO: 480 return "tcs_src0<0,1,0>_is_zero"; 481 case TCS_OPCODE_RELEASE_INPUT: 482 return "tcs_release_input"; 483 case TCS_OPCODE_THREAD_END: 484 return "tcs_thread_end"; 485 case TES_OPCODE_CREATE_INPUT_READ_HEADER: 486 return "tes_create_input_read_header"; 487 case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: 488 return "tes_add_indirect_urb_offset"; 489 case TES_OPCODE_GET_PRIMITIVE_ID: 490 return "tes_get_primitive_id"; 491 492 case SHADER_OPCODE_RND_MODE: 493 return "rnd_mode"; 494 } 495 496 unreachable("not reached"); 497 } 498 499 bool 500 brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg) 501 { 502 union { 503 unsigned ud; 504 int d; 505 float f; 506 double df; 507 } imm, sat_imm = { 0 }; 508 509 const unsigned size = type_sz(type); 510 511 /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise 512 * irrelevant, so just check the size of the type and copy from/to an 513 * appropriately sized field. 514 */ 515 if (size < 8) 516 imm.ud = reg->ud; 517 else 518 imm.df = reg->df; 519 520 switch (type) { 521 case BRW_REGISTER_TYPE_UD: 522 case BRW_REGISTER_TYPE_D: 523 case BRW_REGISTER_TYPE_UW: 524 case BRW_REGISTER_TYPE_W: 525 case BRW_REGISTER_TYPE_UQ: 526 case BRW_REGISTER_TYPE_Q: 527 /* Nothing to do. */ 528 return false; 529 case BRW_REGISTER_TYPE_F: 530 sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f); 531 break; 532 case BRW_REGISTER_TYPE_DF: 533 sat_imm.df = CLAMP(imm.df, 0.0, 1.0); 534 break; 535 case BRW_REGISTER_TYPE_UB: 536 case BRW_REGISTER_TYPE_B: 537 unreachable("no UB/B immediates"); 538 case BRW_REGISTER_TYPE_V: 539 case BRW_REGISTER_TYPE_UV: 540 case BRW_REGISTER_TYPE_VF: 541 unreachable("unimplemented: saturate vector immediate"); 542 case BRW_REGISTER_TYPE_HF: 543 unreachable("unimplemented: saturate HF immediate"); 544 } 545 546 if (size < 8) { 547 if (imm.ud != sat_imm.ud) { 548 reg->ud = sat_imm.ud; 549 return true; 550 } 551 } else { 552 if (imm.df != sat_imm.df) { 553 reg->df = sat_imm.df; 554 return true; 555 } 556 } 557 return false; 558 } 559 560 bool 561 brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg) 562 { 563 switch (type) { 564 case BRW_REGISTER_TYPE_D: 565 case BRW_REGISTER_TYPE_UD: 566 reg->d = -reg->d; 567 return true; 568 case BRW_REGISTER_TYPE_W: 569 case BRW_REGISTER_TYPE_UW: 570 reg->d = -(int16_t)reg->ud; 571 return true; 572 case BRW_REGISTER_TYPE_F: 573 reg->f = -reg->f; 574 return true; 575 case BRW_REGISTER_TYPE_VF: 576 reg->ud ^= 0x80808080; 577 return true; 578 case BRW_REGISTER_TYPE_DF: 579 reg->df = -reg->df; 580 return true; 581 case BRW_REGISTER_TYPE_UQ: 582 case BRW_REGISTER_TYPE_Q: 583 reg->d64 = -reg->d64; 584 return true; 585 case BRW_REGISTER_TYPE_UB: 586 case BRW_REGISTER_TYPE_B: 587 unreachable("no UB/B immediates"); 588 case BRW_REGISTER_TYPE_UV: 589 case BRW_REGISTER_TYPE_V: 590 assert(!"unimplemented: negate UV/V immediate"); 591 case BRW_REGISTER_TYPE_HF: 592 assert(!"unimplemented: negate HF immediate"); 593 } 594 595 return false; 596 } 597 598 bool 599 brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) 600 { 601 switch (type) { 602 case BRW_REGISTER_TYPE_D: 603 reg->d = abs(reg->d); 604 return true; 605 case BRW_REGISTER_TYPE_W: 606 reg->d = abs((int16_t)reg->ud); 607 return true; 608 case BRW_REGISTER_TYPE_F: 609 reg->f = fabsf(reg->f); 610 return true; 611 case BRW_REGISTER_TYPE_DF: 612 reg->df = fabs(reg->df); 613 return true; 614 case BRW_REGISTER_TYPE_VF: 615 reg->ud &= ~0x80808080; 616 return true; 617 case BRW_REGISTER_TYPE_Q: 618 reg->d64 = imaxabs(reg->d64); 619 return true; 620 case BRW_REGISTER_TYPE_UB: 621 case BRW_REGISTER_TYPE_B: 622 unreachable("no UB/B immediates"); 623 case BRW_REGISTER_TYPE_UQ: 624 case BRW_REGISTER_TYPE_UD: 625 case BRW_REGISTER_TYPE_UW: 626 case BRW_REGISTER_TYPE_UV: 627 /* Presumably the absolute value modifier on an unsigned source is a 628 * nop, but it would be nice to confirm. 629 */ 630 assert(!"unimplemented: abs unsigned immediate"); 631 case BRW_REGISTER_TYPE_V: 632 assert(!"unimplemented: abs V immediate"); 633 case BRW_REGISTER_TYPE_HF: 634 assert(!"unimplemented: abs HF immediate"); 635 } 636 637 return false; 638 } 639 640 backend_shader::backend_shader(const struct brw_compiler *compiler, 641 void *log_data, 642 void *mem_ctx, 643 const nir_shader *shader, 644 struct brw_stage_prog_data *stage_prog_data) 645 : compiler(compiler), 646 log_data(log_data), 647 devinfo(compiler->devinfo), 648 nir(shader), 649 stage_prog_data(stage_prog_data), 650 mem_ctx(mem_ctx), 651 cfg(NULL), 652 stage(shader->info.stage) 653 { 654 debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); 655 stage_name = _mesa_shader_stage_to_string(stage); 656 stage_abbrev = _mesa_shader_stage_to_abbrev(stage); 657 } 658 659 backend_shader::~backend_shader() 660 { 661 } 662 663 bool 664 backend_reg::equals(const backend_reg &r) const 665 { 666 return brw_regs_equal(this, &r) && offset == r.offset; 667 } 668 669 bool 670 backend_reg::is_zero() const 671 { 672 if (file != IMM) 673 return false; 674 675 switch (type) { 676 case BRW_REGISTER_TYPE_F: 677 return f == 0; 678 case BRW_REGISTER_TYPE_DF: 679 return df == 0; 680 case BRW_REGISTER_TYPE_D: 681 case BRW_REGISTER_TYPE_UD: 682 return d == 0; 683 case BRW_REGISTER_TYPE_UQ: 684 case BRW_REGISTER_TYPE_Q: 685 return u64 == 0; 686 default: 687 return false; 688 } 689 } 690 691 bool 692 backend_reg::is_one() const 693 { 694 if (file != IMM) 695 return false; 696 697 switch (type) { 698 case BRW_REGISTER_TYPE_F: 699 return f == 1.0f; 700 case BRW_REGISTER_TYPE_DF: 701 return df == 1.0; 702 case BRW_REGISTER_TYPE_D: 703 case BRW_REGISTER_TYPE_UD: 704 return d == 1; 705 case BRW_REGISTER_TYPE_UQ: 706 case BRW_REGISTER_TYPE_Q: 707 return u64 == 1; 708 default: 709 return false; 710 } 711 } 712 713 bool 714 backend_reg::is_negative_one() const 715 { 716 if (file != IMM) 717 return false; 718 719 switch (type) { 720 case BRW_REGISTER_TYPE_F: 721 return f == -1.0; 722 case BRW_REGISTER_TYPE_DF: 723 return df == -1.0; 724 case BRW_REGISTER_TYPE_D: 725 return d == -1; 726 case BRW_REGISTER_TYPE_Q: 727 return d64 == -1; 728 default: 729 return false; 730 } 731 } 732 733 bool 734 backend_reg::is_null() const 735 { 736 return file == ARF && nr == BRW_ARF_NULL; 737 } 738 739 740 bool 741 backend_reg::is_accumulator() const 742 { 743 return file == ARF && nr == BRW_ARF_ACCUMULATOR; 744 } 745 746 bool 747 backend_instruction::is_commutative() const 748 { 749 switch (opcode) { 750 case BRW_OPCODE_AND: 751 case BRW_OPCODE_OR: 752 case BRW_OPCODE_XOR: 753 case BRW_OPCODE_ADD: 754 case BRW_OPCODE_MUL: 755 case SHADER_OPCODE_MULH: 756 return true; 757 case BRW_OPCODE_SEL: 758 /* MIN and MAX are commutative. */ 759 if (conditional_mod == BRW_CONDITIONAL_GE || 760 conditional_mod == BRW_CONDITIONAL_L) { 761 return true; 762 } 763 /* fallthrough */ 764 default: 765 return false; 766 } 767 } 768 769 bool 770 backend_instruction::is_3src(const struct gen_device_info *devinfo) const 771 { 772 return ::is_3src(devinfo, opcode); 773 } 774 775 bool 776 backend_instruction::is_tex() const 777 { 778 return (opcode == SHADER_OPCODE_TEX || 779 opcode == FS_OPCODE_TXB || 780 opcode == SHADER_OPCODE_TXD || 781 opcode == SHADER_OPCODE_TXF || 782 opcode == SHADER_OPCODE_TXF_LZ || 783 opcode == SHADER_OPCODE_TXF_CMS || 784 opcode == SHADER_OPCODE_TXF_CMS_W || 785 opcode == SHADER_OPCODE_TXF_UMS || 786 opcode == SHADER_OPCODE_TXF_MCS || 787 opcode == SHADER_OPCODE_TXL || 788 opcode == SHADER_OPCODE_TXL_LZ || 789 opcode == SHADER_OPCODE_TXS || 790 opcode == SHADER_OPCODE_LOD || 791 opcode == SHADER_OPCODE_TG4 || 792 opcode == SHADER_OPCODE_TG4_OFFSET || 793 opcode == SHADER_OPCODE_SAMPLEINFO); 794 } 795 796 bool 797 backend_instruction::is_math() const 798 { 799 return (opcode == SHADER_OPCODE_RCP || 800 opcode == SHADER_OPCODE_RSQ || 801 opcode == SHADER_OPCODE_SQRT || 802 opcode == SHADER_OPCODE_EXP2 || 803 opcode == SHADER_OPCODE_LOG2 || 804 opcode == SHADER_OPCODE_SIN || 805 opcode == SHADER_OPCODE_COS || 806 opcode == SHADER_OPCODE_INT_QUOTIENT || 807 opcode == SHADER_OPCODE_INT_REMAINDER || 808 opcode == SHADER_OPCODE_POW); 809 } 810 811 bool 812 backend_instruction::is_control_flow() const 813 { 814 switch (opcode) { 815 case BRW_OPCODE_DO: 816 case BRW_OPCODE_WHILE: 817 case BRW_OPCODE_IF: 818 case BRW_OPCODE_ELSE: 819 case BRW_OPCODE_ENDIF: 820 case BRW_OPCODE_BREAK: 821 case BRW_OPCODE_CONTINUE: 822 return true; 823 default: 824 return false; 825 } 826 } 827 828 bool 829 backend_instruction::can_do_source_mods() const 830 { 831 switch (opcode) { 832 case BRW_OPCODE_ADDC: 833 case BRW_OPCODE_BFE: 834 case BRW_OPCODE_BFI1: 835 case BRW_OPCODE_BFI2: 836 case BRW_OPCODE_BFREV: 837 case BRW_OPCODE_CBIT: 838 case BRW_OPCODE_FBH: 839 case BRW_OPCODE_FBL: 840 case BRW_OPCODE_SUBB: 841 case SHADER_OPCODE_BROADCAST: 842 case SHADER_OPCODE_MOV_INDIRECT: 843 return false; 844 default: 845 return true; 846 } 847 } 848 849 bool 850 backend_instruction::can_do_saturate() const 851 { 852 switch (opcode) { 853 case BRW_OPCODE_ADD: 854 case BRW_OPCODE_ASR: 855 case BRW_OPCODE_AVG: 856 case BRW_OPCODE_DP2: 857 case BRW_OPCODE_DP3: 858 case BRW_OPCODE_DP4: 859 case BRW_OPCODE_DPH: 860 case BRW_OPCODE_F16TO32: 861 case BRW_OPCODE_F32TO16: 862 case BRW_OPCODE_LINE: 863 case BRW_OPCODE_LRP: 864 case BRW_OPCODE_MAC: 865 case BRW_OPCODE_MAD: 866 case BRW_OPCODE_MATH: 867 case BRW_OPCODE_MOV: 868 case BRW_OPCODE_MUL: 869 case SHADER_OPCODE_MULH: 870 case BRW_OPCODE_PLN: 871 case BRW_OPCODE_RNDD: 872 case BRW_OPCODE_RNDE: 873 case BRW_OPCODE_RNDU: 874 case BRW_OPCODE_RNDZ: 875 case BRW_OPCODE_SEL: 876 case BRW_OPCODE_SHL: 877 case BRW_OPCODE_SHR: 878 case FS_OPCODE_LINTERP: 879 case SHADER_OPCODE_COS: 880 case SHADER_OPCODE_EXP2: 881 case SHADER_OPCODE_LOG2: 882 case SHADER_OPCODE_POW: 883 case SHADER_OPCODE_RCP: 884 case SHADER_OPCODE_RSQ: 885 case SHADER_OPCODE_SIN: 886 case SHADER_OPCODE_SQRT: 887 return true; 888 default: 889 return false; 890 } 891 } 892 893 bool 894 backend_instruction::can_do_cmod() const 895 { 896 switch (opcode) { 897 case BRW_OPCODE_ADD: 898 case BRW_OPCODE_ADDC: 899 case BRW_OPCODE_AND: 900 case BRW_OPCODE_ASR: 901 case BRW_OPCODE_AVG: 902 case BRW_OPCODE_CMP: 903 case BRW_OPCODE_CMPN: 904 case BRW_OPCODE_DP2: 905 case BRW_OPCODE_DP3: 906 case BRW_OPCODE_DP4: 907 case BRW_OPCODE_DPH: 908 case BRW_OPCODE_F16TO32: 909 case BRW_OPCODE_F32TO16: 910 case BRW_OPCODE_FRC: 911 case BRW_OPCODE_LINE: 912 case BRW_OPCODE_LRP: 913 case BRW_OPCODE_LZD: 914 case BRW_OPCODE_MAC: 915 case BRW_OPCODE_MACH: 916 case BRW_OPCODE_MAD: 917 case BRW_OPCODE_MOV: 918 case BRW_OPCODE_MUL: 919 case BRW_OPCODE_NOT: 920 case BRW_OPCODE_OR: 921 case BRW_OPCODE_PLN: 922 case BRW_OPCODE_RNDD: 923 case BRW_OPCODE_RNDE: 924 case BRW_OPCODE_RNDU: 925 case BRW_OPCODE_RNDZ: 926 case BRW_OPCODE_SAD2: 927 case BRW_OPCODE_SADA2: 928 case BRW_OPCODE_SHL: 929 case BRW_OPCODE_SHR: 930 case BRW_OPCODE_SUBB: 931 case BRW_OPCODE_XOR: 932 case FS_OPCODE_CINTERP: 933 case FS_OPCODE_LINTERP: 934 return true; 935 default: 936 return false; 937 } 938 } 939 940 bool 941 backend_instruction::reads_accumulator_implicitly() const 942 { 943 switch (opcode) { 944 case BRW_OPCODE_MAC: 945 case BRW_OPCODE_MACH: 946 case BRW_OPCODE_SADA2: 947 return true; 948 default: 949 return false; 950 } 951 } 952 953 bool 954 backend_instruction::writes_accumulator_implicitly(const struct gen_device_info *devinfo) const 955 { 956 return writes_accumulator || 957 (devinfo->gen < 6 && 958 ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) || 959 (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP && 960 opcode != FS_OPCODE_CINTERP))); 961 } 962 963 bool 964 backend_instruction::has_side_effects() const 965 { 966 switch (opcode) { 967 case SHADER_OPCODE_UNTYPED_ATOMIC: 968 case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: 969 case SHADER_OPCODE_GEN4_SCRATCH_WRITE: 970 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: 971 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: 972 case SHADER_OPCODE_BYTE_SCATTERED_WRITE: 973 case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: 974 case SHADER_OPCODE_TYPED_ATOMIC: 975 case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: 976 case SHADER_OPCODE_TYPED_SURFACE_WRITE: 977 case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: 978 case SHADER_OPCODE_MEMORY_FENCE: 979 case SHADER_OPCODE_URB_WRITE_SIMD8: 980 case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: 981 case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: 982 case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: 983 case FS_OPCODE_FB_WRITE: 984 case FS_OPCODE_FB_WRITE_LOGICAL: 985 case SHADER_OPCODE_BARRIER: 986 case TCS_OPCODE_URB_WRITE: 987 case TCS_OPCODE_RELEASE_INPUT: 988 case SHADER_OPCODE_RND_MODE: 989 return true; 990 default: 991 return eot; 992 } 993 } 994 995 bool 996 backend_instruction::is_volatile() const 997 { 998 switch (opcode) { 999 case SHADER_OPCODE_UNTYPED_SURFACE_READ: 1000 case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: 1001 case SHADER_OPCODE_TYPED_SURFACE_READ: 1002 case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: 1003 case SHADER_OPCODE_BYTE_SCATTERED_READ: 1004 case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: 1005 case SHADER_OPCODE_URB_READ_SIMD8: 1006 case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: 1007 case VEC4_OPCODE_URB_READ: 1008 return true; 1009 default: 1010 return false; 1011 } 1012 } 1013 1014 #ifndef NDEBUG 1015 static bool 1016 inst_is_in_block(const bblock_t *block, const backend_instruction *inst) 1017 { 1018 bool found = false; 1019 foreach_inst_in_block (backend_instruction, i, block) { 1020 if (inst == i) { 1021 found = true; 1022 } 1023 } 1024 return found; 1025 } 1026 #endif 1027 1028 static void 1029 adjust_later_block_ips(bblock_t *start_block, int ip_adjustment) 1030 { 1031 for (bblock_t *block_iter = start_block->next(); 1032 block_iter; 1033 block_iter = block_iter->next()) { 1034 block_iter->start_ip += ip_adjustment; 1035 block_iter->end_ip += ip_adjustment; 1036 } 1037 } 1038 1039 void 1040 backend_instruction::insert_after(bblock_t *block, backend_instruction *inst) 1041 { 1042 assert(this != inst); 1043 1044 if (!this->is_head_sentinel()) 1045 assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1046 1047 block->end_ip++; 1048 1049 adjust_later_block_ips(block, 1); 1050 1051 exec_node::insert_after(inst); 1052 } 1053 1054 void 1055 backend_instruction::insert_before(bblock_t *block, backend_instruction *inst) 1056 { 1057 assert(this != inst); 1058 1059 if (!this->is_tail_sentinel()) 1060 assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1061 1062 block->end_ip++; 1063 1064 adjust_later_block_ips(block, 1); 1065 1066 exec_node::insert_before(inst); 1067 } 1068 1069 void 1070 backend_instruction::insert_before(bblock_t *block, exec_list *list) 1071 { 1072 assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1073 1074 unsigned num_inst = list->length(); 1075 1076 block->end_ip += num_inst; 1077 1078 adjust_later_block_ips(block, num_inst); 1079 1080 exec_node::insert_before(list); 1081 } 1082 1083 void 1084 backend_instruction::remove(bblock_t *block) 1085 { 1086 assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1087 1088 adjust_later_block_ips(block, -1); 1089 1090 if (block->start_ip == block->end_ip) { 1091 block->cfg->remove_block(block); 1092 } else { 1093 block->end_ip--; 1094 } 1095 1096 exec_node::remove(); 1097 } 1098 1099 void 1100 backend_shader::dump_instructions() 1101 { 1102 dump_instructions(NULL); 1103 } 1104 1105 void 1106 backend_shader::dump_instructions(const char *name) 1107 { 1108 FILE *file = stderr; 1109 if (name && geteuid() != 0) { 1110 file = fopen(name, "w"); 1111 if (!file) 1112 file = stderr; 1113 } 1114 1115 if (cfg) { 1116 int ip = 0; 1117 foreach_block_and_inst(block, backend_instruction, inst, cfg) { 1118 if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) 1119 fprintf(file, "%4d: ", ip++); 1120 dump_instruction(inst, file); 1121 } 1122 } else { 1123 int ip = 0; 1124 foreach_in_list(backend_instruction, inst, &instructions) { 1125 if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) 1126 fprintf(file, "%4d: ", ip++); 1127 dump_instruction(inst, file); 1128 } 1129 } 1130 1131 if (file != stderr) { 1132 fclose(file); 1133 } 1134 } 1135 1136 void 1137 backend_shader::calculate_cfg() 1138 { 1139 if (this->cfg) 1140 return; 1141 cfg = new(mem_ctx) cfg_t(&this->instructions); 1142 } 1143 1144 extern "C" const unsigned * 1145 brw_compile_tes(const struct brw_compiler *compiler, 1146 void *log_data, 1147 void *mem_ctx, 1148 const struct brw_tes_prog_key *key, 1149 const struct brw_vue_map *input_vue_map, 1150 struct brw_tes_prog_data *prog_data, 1151 const nir_shader *src_shader, 1152 struct gl_program *prog, 1153 int shader_time_index, 1154 char **error_str) 1155 { 1156 const struct gen_device_info *devinfo = compiler->devinfo; 1157 const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL]; 1158 const unsigned *assembly; 1159 1160 nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); 1161 nir->info.inputs_read = key->inputs_read; 1162 nir->info.patch_inputs_read = key->patch_inputs_read; 1163 1164 nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar); 1165 brw_nir_lower_tes_inputs(nir, input_vue_map); 1166 brw_nir_lower_vue_outputs(nir, is_scalar); 1167 nir = brw_postprocess_nir(nir, compiler, is_scalar); 1168 1169 brw_compute_vue_map(devinfo, &prog_data->base.vue_map, 1170 nir->info.outputs_written, 1171 nir->info.separate_shader); 1172 1173 unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4; 1174 1175 assert(output_size_bytes >= 1); 1176 if (output_size_bytes > GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES) { 1177 if (error_str) 1178 *error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size"); 1179 return NULL; 1180 } 1181 1182 prog_data->base.clip_distance_mask = 1183 ((1 << nir->info.clip_distance_array_size) - 1); 1184 prog_data->base.cull_distance_mask = 1185 ((1 << nir->info.cull_distance_array_size) - 1) << 1186 nir->info.clip_distance_array_size; 1187 1188 /* URB entry sizes are stored as a multiple of 64 bytes. */ 1189 prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; 1190 1191 /* On Cannonlake software shall not program an allocation size that 1192 * specifies a size that is a multiple of 3 64B (512-bit) cachelines. 1193 */ 1194 if (devinfo->gen == 10 && 1195 prog_data->base.urb_entry_size % 3 == 0) 1196 prog_data->base.urb_entry_size++; 1197 1198 prog_data->base.urb_read_length = 0; 1199 1200 STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1); 1201 STATIC_ASSERT(BRW_TESS_PARTITIONING_ODD_FRACTIONAL == 1202 TESS_SPACING_FRACTIONAL_ODD - 1); 1203 STATIC_ASSERT(BRW_TESS_PARTITIONING_EVEN_FRACTIONAL == 1204 TESS_SPACING_FRACTIONAL_EVEN - 1); 1205 1206 prog_data->partitioning = 1207 (enum brw_tess_partitioning) (nir->info.tess.spacing - 1); 1208 1209 switch (nir->info.tess.primitive_mode) { 1210 case GL_QUADS: 1211 prog_data->domain = BRW_TESS_DOMAIN_QUAD; 1212 break; 1213 case GL_TRIANGLES: 1214 prog_data->domain = BRW_TESS_DOMAIN_TRI; 1215 break; 1216 case GL_ISOLINES: 1217 prog_data->domain = BRW_TESS_DOMAIN_ISOLINE; 1218 break; 1219 default: 1220 unreachable("invalid domain shader primitive mode"); 1221 } 1222 1223 if (nir->info.tess.point_mode) { 1224 prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT; 1225 } else if (nir->info.tess.primitive_mode == GL_ISOLINES) { 1226 prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE; 1227 } else { 1228 /* Hardware winding order is backwards from OpenGL */ 1229 prog_data->output_topology = 1230 nir->info.tess.ccw ? BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW 1231 : BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW; 1232 } 1233 1234 if (unlikely(INTEL_DEBUG & DEBUG_TES)) { 1235 fprintf(stderr, "TES Input "); 1236 brw_print_vue_map(stderr, input_vue_map); 1237 fprintf(stderr, "TES Output "); 1238 brw_print_vue_map(stderr, &prog_data->base.vue_map); 1239 } 1240 1241 if (is_scalar) { 1242 fs_visitor v(compiler, log_data, mem_ctx, (void *) key, 1243 &prog_data->base.base, NULL, nir, 8, 1244 shader_time_index, input_vue_map); 1245 if (!v.run_tes()) { 1246 if (error_str) 1247 *error_str = ralloc_strdup(mem_ctx, v.fail_msg); 1248 return NULL; 1249 } 1250 1251 prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; 1252 prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; 1253 1254 fs_generator g(compiler, log_data, mem_ctx, (void *) key, 1255 &prog_data->base.base, v.promoted_constants, false, 1256 MESA_SHADER_TESS_EVAL); 1257 if (unlikely(INTEL_DEBUG & DEBUG_TES)) { 1258 g.enable_debug(ralloc_asprintf(mem_ctx, 1259 "%s tessellation evaluation shader %s", 1260 nir->info.label ? nir->info.label 1261 : "unnamed", 1262 nir->info.name)); 1263 } 1264 1265 g.generate_code(v.cfg, 8); 1266 1267 assembly = g.get_assembly(&prog_data->base.base.program_size); 1268 } else { 1269 brw::vec4_tes_visitor v(compiler, log_data, key, prog_data, 1270 nir, mem_ctx, shader_time_index); 1271 if (!v.run()) { 1272 if (error_str) 1273 *error_str = ralloc_strdup(mem_ctx, v.fail_msg); 1274 return NULL; 1275 } 1276 1277 if (unlikely(INTEL_DEBUG & DEBUG_TES)) 1278 v.dump_instructions(); 1279 1280 assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, 1281 &prog_data->base, v.cfg, 1282 &prog_data->base.base.program_size); 1283 } 1284 1285 return assembly; 1286 } 1287