1 /* 2 * Copyright 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 /** @file brw_eu_validate.c 25 * 26 * This file implements a pass that validates shader assembly. 27 */ 28 29 #include "brw_eu.h" 30 31 /* We're going to do lots of string concatenation, so this should help. */ 32 struct string { 33 char *str; 34 size_t len; 35 }; 36 37 static void 38 cat(struct string *dest, const struct string src) 39 { 40 dest->str = realloc(dest->str, dest->len + src.len + 1); 41 memcpy(dest->str + dest->len, src.str, src.len); 42 dest->str[dest->len + src.len] = '\0'; 43 dest->len = dest->len + src.len; 44 } 45 #define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)}) 46 47 static bool 48 contains(const struct string haystack, const struct string needle) 49 { 50 return haystack.str && memmem(haystack.str, haystack.len, 51 needle.str, needle.len) != NULL; 52 } 53 #define CONTAINS(haystack, needle) \ 54 contains(haystack, (struct string){needle, strlen(needle)}) 55 56 #define error(str) "\tERROR: " str "\n" 57 #define ERROR_INDENT "\t " 58 59 #define ERROR(msg) ERROR_IF(true, msg) 60 #define ERROR_IF(cond, msg) \ 61 do { \ 62 if ((cond) && !CONTAINS(error_msg, error(msg))) { \ 63 CAT(error_msg, error(msg)); \ 64 } \ 65 } while(0) 66 67 #define CHECK(func, args...) \ 68 do { \ 69 struct string __msg = func(devinfo, inst, ##args); \ 70 if (__msg.str) { \ 71 cat(&error_msg, __msg); \ 72 free(__msg.str); \ 73 } \ 74 } while (0) 75 76 #define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0) 77 #define WIDTH(width) (1 << (width)) 78 79 static bool 80 inst_is_send(const struct gen_device_info *devinfo, const brw_inst *inst) 81 { 82 switch (brw_inst_opcode(devinfo, inst)) { 83 case BRW_OPCODE_SEND: 84 case BRW_OPCODE_SENDC: 85 case BRW_OPCODE_SENDS: 86 case BRW_OPCODE_SENDSC: 87 return true; 88 default: 89 return false; 90 } 91 } 92 93 static unsigned 94 signed_type(unsigned type) 95 { 96 switch (type) { 97 case BRW_REGISTER_TYPE_UD: return BRW_REGISTER_TYPE_D; 98 case BRW_REGISTER_TYPE_UW: return BRW_REGISTER_TYPE_W; 99 case BRW_REGISTER_TYPE_UB: return BRW_REGISTER_TYPE_B; 100 case BRW_REGISTER_TYPE_UQ: return BRW_REGISTER_TYPE_Q; 101 default: return type; 102 } 103 } 104 105 static bool 106 inst_is_raw_move(const struct gen_device_info *devinfo, const brw_inst *inst) 107 { 108 unsigned dst_type = signed_type(brw_inst_dst_type(devinfo, inst)); 109 unsigned src_type = signed_type(brw_inst_src0_type(devinfo, inst)); 110 111 if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) { 112 /* FIXME: not strictly true */ 113 if (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_VF || 114 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UV || 115 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_V) { 116 return false; 117 } 118 } else if (brw_inst_src0_negate(devinfo, inst) || 119 brw_inst_src0_abs(devinfo, inst)) { 120 return false; 121 } 122 123 return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV && 124 brw_inst_saturate(devinfo, inst) == 0 && 125 dst_type == src_type; 126 } 127 128 static bool 129 dst_is_null(const struct gen_device_info *devinfo, const brw_inst *inst) 130 { 131 return brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 132 brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 133 } 134 135 static bool 136 src0_is_null(const struct gen_device_info *devinfo, const brw_inst *inst) 137 { 138 return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 139 brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 140 } 141 142 static bool 143 src1_is_null(const struct gen_device_info *devinfo, const brw_inst *inst) 144 { 145 return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 146 brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 147 } 148 149 static bool 150 src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst) 151 { 152 return brw_inst_src0_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE; 153 } 154 155 static bool 156 src0_has_scalar_region(const struct gen_device_info *devinfo, const brw_inst *inst) 157 { 158 return brw_inst_src0_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 && 159 brw_inst_src0_width(devinfo, inst) == BRW_WIDTH_1 && 160 brw_inst_src0_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0; 161 } 162 163 static bool 164 src1_has_scalar_region(const struct gen_device_info *devinfo, const brw_inst *inst) 165 { 166 return brw_inst_src1_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 && 167 brw_inst_src1_width(devinfo, inst) == BRW_WIDTH_1 && 168 brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0; 169 } 170 171 static unsigned 172 num_sources_from_inst(const struct gen_device_info *devinfo, 173 const brw_inst *inst) 174 { 175 const struct opcode_desc *desc = 176 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 177 unsigned math_function; 178 179 if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) { 180 math_function = brw_inst_math_function(devinfo, inst); 181 } else if (devinfo->gen < 6 && 182 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) { 183 if (brw_inst_sfid(devinfo, inst) == BRW_SFID_MATH) { 184 /* src1 must be a descriptor (including the information to determine 185 * that the SEND is doing an extended math operation), but src0 can 186 * actually be null since it serves as the source of the implicit GRF 187 * to MRF move. 188 * 189 * If we stop using that functionality, we'll have to revisit this. 190 */ 191 return 2; 192 } else { 193 /* Send instructions are allowed to have null sources since they use 194 * the base_mrf field to specify which message register source. 195 */ 196 return 0; 197 } 198 } else { 199 assert(desc->nsrc < 4); 200 return desc->nsrc; 201 } 202 203 switch (math_function) { 204 case BRW_MATH_FUNCTION_INV: 205 case BRW_MATH_FUNCTION_LOG: 206 case BRW_MATH_FUNCTION_EXP: 207 case BRW_MATH_FUNCTION_SQRT: 208 case BRW_MATH_FUNCTION_RSQ: 209 case BRW_MATH_FUNCTION_SIN: 210 case BRW_MATH_FUNCTION_COS: 211 case BRW_MATH_FUNCTION_SINCOS: 212 case GEN8_MATH_FUNCTION_INVM: 213 case GEN8_MATH_FUNCTION_RSQRTM: 214 return 1; 215 case BRW_MATH_FUNCTION_FDIV: 216 case BRW_MATH_FUNCTION_POW: 217 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 218 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 219 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: 220 return 2; 221 default: 222 unreachable("not reached"); 223 } 224 } 225 226 static struct string 227 sources_not_null(const struct gen_device_info *devinfo, 228 const brw_inst *inst) 229 { 230 unsigned num_sources = num_sources_from_inst(devinfo, inst); 231 struct string error_msg = { .str = NULL, .len = 0 }; 232 233 /* Nothing to test. 3-src instructions can only have GRF sources, and 234 * there's no bit to control the file. 235 */ 236 if (num_sources == 3) 237 return (struct string){}; 238 239 if (num_sources >= 1) 240 ERROR_IF(src0_is_null(devinfo, inst), "src0 is null"); 241 242 if (num_sources == 2) 243 ERROR_IF(src1_is_null(devinfo, inst), "src1 is null"); 244 245 return error_msg; 246 } 247 248 static struct string 249 send_restrictions(const struct gen_device_info *devinfo, 250 const brw_inst *inst) 251 { 252 struct string error_msg = { .str = NULL, .len = 0 }; 253 254 if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) { 255 ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT, 256 "send must use direct addressing"); 257 258 if (devinfo->gen >= 7) { 259 ERROR_IF(!src0_is_grf(devinfo, inst), "send from non-GRF"); 260 ERROR_IF(brw_inst_eot(devinfo, inst) && 261 brw_inst_src0_da_reg_nr(devinfo, inst) < 112, 262 "send with EOT must use g112-g127"); 263 } 264 } 265 266 return error_msg; 267 } 268 269 static bool 270 is_unsupported_inst(const struct gen_device_info *devinfo, 271 const brw_inst *inst) 272 { 273 return brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)) == NULL; 274 } 275 276 static enum brw_reg_type 277 execution_type_for_type(enum brw_reg_type type) 278 { 279 switch (type) { 280 case BRW_REGISTER_TYPE_DF: 281 case BRW_REGISTER_TYPE_F: 282 case BRW_REGISTER_TYPE_HF: 283 return type; 284 285 case BRW_REGISTER_TYPE_VF: 286 return BRW_REGISTER_TYPE_F; 287 288 case BRW_REGISTER_TYPE_Q: 289 case BRW_REGISTER_TYPE_UQ: 290 return BRW_REGISTER_TYPE_Q; 291 292 case BRW_REGISTER_TYPE_D: 293 case BRW_REGISTER_TYPE_UD: 294 return BRW_REGISTER_TYPE_D; 295 296 case BRW_REGISTER_TYPE_W: 297 case BRW_REGISTER_TYPE_UW: 298 case BRW_REGISTER_TYPE_B: 299 case BRW_REGISTER_TYPE_UB: 300 case BRW_REGISTER_TYPE_V: 301 case BRW_REGISTER_TYPE_UV: 302 return BRW_REGISTER_TYPE_W; 303 } 304 unreachable("not reached"); 305 } 306 307 /** 308 * Returns the execution type of an instruction \p inst 309 */ 310 static enum brw_reg_type 311 execution_type(const struct gen_device_info *devinfo, const brw_inst *inst) 312 { 313 unsigned num_sources = num_sources_from_inst(devinfo, inst); 314 enum brw_reg_type src0_exec_type, src1_exec_type; 315 316 /* Execution data type is independent of destination data type, except in 317 * mixed F/HF instructions on CHV and SKL+. 318 */ 319 enum brw_reg_type dst_exec_type = brw_inst_dst_type(devinfo, inst); 320 321 src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst)); 322 if (num_sources == 1) { 323 if ((devinfo->gen >= 9 || devinfo->is_cherryview) && 324 src0_exec_type == BRW_REGISTER_TYPE_HF) { 325 return dst_exec_type; 326 } 327 return src0_exec_type; 328 } 329 330 src1_exec_type = execution_type_for_type(brw_inst_src1_type(devinfo, inst)); 331 if (src0_exec_type == src1_exec_type) 332 return src0_exec_type; 333 334 /* Mixed operand types where one is float is float on Gen < 6 335 * (and not allowed on later platforms) 336 */ 337 if (devinfo->gen < 6 && 338 (src0_exec_type == BRW_REGISTER_TYPE_F || 339 src1_exec_type == BRW_REGISTER_TYPE_F)) 340 return BRW_REGISTER_TYPE_F; 341 342 if (src0_exec_type == BRW_REGISTER_TYPE_Q || 343 src1_exec_type == BRW_REGISTER_TYPE_Q) 344 return BRW_REGISTER_TYPE_Q; 345 346 if (src0_exec_type == BRW_REGISTER_TYPE_D || 347 src1_exec_type == BRW_REGISTER_TYPE_D) 348 return BRW_REGISTER_TYPE_D; 349 350 if (src0_exec_type == BRW_REGISTER_TYPE_W || 351 src1_exec_type == BRW_REGISTER_TYPE_W) 352 return BRW_REGISTER_TYPE_W; 353 354 if (src0_exec_type == BRW_REGISTER_TYPE_DF || 355 src1_exec_type == BRW_REGISTER_TYPE_DF) 356 return BRW_REGISTER_TYPE_DF; 357 358 if (devinfo->gen >= 9 || devinfo->is_cherryview) { 359 if (dst_exec_type == BRW_REGISTER_TYPE_F || 360 src0_exec_type == BRW_REGISTER_TYPE_F || 361 src1_exec_type == BRW_REGISTER_TYPE_F) { 362 return BRW_REGISTER_TYPE_F; 363 } else { 364 return BRW_REGISTER_TYPE_HF; 365 } 366 } 367 368 assert(src0_exec_type == BRW_REGISTER_TYPE_F); 369 return BRW_REGISTER_TYPE_F; 370 } 371 372 /** 373 * Returns whether a region is packed 374 * 375 * A region is packed if its elements are adjacent in memory, with no 376 * intervening space, no overlap, and no replicated values. 377 */ 378 static bool 379 is_packed(unsigned vstride, unsigned width, unsigned hstride) 380 { 381 if (vstride == width) { 382 if (vstride == 1) { 383 return hstride == 0; 384 } else { 385 return hstride == 1; 386 } 387 } 388 389 return false; 390 } 391 392 /** 393 * Checks restrictions listed in "General Restrictions Based on Operand Types" 394 * in the "Register Region Restrictions" section. 395 */ 396 static struct string 397 general_restrictions_based_on_operand_types(const struct gen_device_info *devinfo, 398 const brw_inst *inst) 399 { 400 const struct opcode_desc *desc = 401 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 402 unsigned num_sources = num_sources_from_inst(devinfo, inst); 403 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 404 struct string error_msg = { .str = NULL, .len = 0 }; 405 406 if (num_sources == 3) 407 return (struct string){}; 408 409 if (inst_is_send(devinfo, inst)) 410 return (struct string){}; 411 412 if (exec_size == 1) 413 return (struct string){}; 414 415 if (desc->ndst == 0) 416 return (struct string){}; 417 418 /* The PRMs say: 419 * 420 * Where n is the largest element size in bytes for any source or 421 * destination operand type, ExecSize * n must be <= 64. 422 * 423 * But we do not attempt to enforce it, because it is implied by other 424 * rules: 425 * 426 * - that the destination stride must match the execution data type 427 * - sources may not span more than two adjacent GRF registers 428 * - destination may not span more than two adjacent GRF registers 429 * 430 * In fact, checking it would weaken testing of the other rules. 431 */ 432 433 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 434 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 435 bool dst_type_is_byte = 436 brw_inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_B || 437 brw_inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_UB; 438 439 if (dst_type_is_byte) { 440 if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) { 441 if (!inst_is_raw_move(devinfo, inst)) { 442 ERROR("Only raw MOV supports a packed-byte destination"); 443 return error_msg; 444 } else { 445 return (struct string){}; 446 } 447 } 448 } 449 450 unsigned exec_type = execution_type(devinfo, inst); 451 unsigned exec_type_size = brw_reg_type_to_size(exec_type); 452 unsigned dst_type_size = brw_reg_type_to_size(dst_type); 453 454 /* On IVB/BYT, region parameters and execution size for DF are in terms of 455 * 32-bit elements, so they are doubled. For evaluating the validity of an 456 * instruction, we halve them. 457 */ 458 if (devinfo->gen == 7 && !devinfo->is_haswell && 459 exec_type_size == 8 && dst_type_size == 4) 460 dst_type_size = 8; 461 462 if (exec_type_size > dst_type_size) { 463 ERROR_IF(dst_stride * dst_type_size != exec_type_size, 464 "Destination stride must be equal to the ratio of the sizes of " 465 "the execution data type to the destination type"); 466 467 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 468 469 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 && 470 brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { 471 /* The i965 PRM says: 472 * 473 * Implementation Restriction: The relaxed alignment rule for byte 474 * destination (#10.5) is not supported. 475 */ 476 if ((devinfo->gen > 4 || devinfo->is_g4x) && dst_type_is_byte) { 477 ERROR_IF(subreg % exec_type_size != 0 && 478 subreg % exec_type_size != 1, 479 "Destination subreg must be aligned to the size of the " 480 "execution data type (or to the next lowest byte for byte " 481 "destinations)"); 482 } else { 483 ERROR_IF(subreg % exec_type_size != 0, 484 "Destination subreg must be aligned to the size of the " 485 "execution data type"); 486 } 487 } 488 } 489 490 return error_msg; 491 } 492 493 /** 494 * Checks restrictions listed in "General Restrictions on Regioning Parameters" 495 * in the "Register Region Restrictions" section. 496 */ 497 static struct string 498 general_restrictions_on_region_parameters(const struct gen_device_info *devinfo, 499 const brw_inst *inst) 500 { 501 const struct opcode_desc *desc = 502 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 503 unsigned num_sources = num_sources_from_inst(devinfo, inst); 504 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 505 struct string error_msg = { .str = NULL, .len = 0 }; 506 507 if (num_sources == 3) 508 return (struct string){}; 509 510 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) { 511 if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) 512 ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1, 513 "Destination Horizontal Stride must be 1"); 514 515 if (num_sources >= 1) { 516 if (devinfo->is_haswell || devinfo->gen >= 8) { 517 ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 518 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 519 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 && 520 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 521 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed"); 522 } else { 523 ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 524 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 525 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 526 "In Align16 mode, only VertStride of 0 or 4 is allowed"); 527 } 528 } 529 530 if (num_sources == 2) { 531 if (devinfo->is_haswell || devinfo->gen >= 8) { 532 ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 533 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 534 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 && 535 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 536 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed"); 537 } else { 538 ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 539 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 540 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 541 "In Align16 mode, only VertStride of 0 or 4 is allowed"); 542 } 543 } 544 545 return error_msg; 546 } 547 548 for (unsigned i = 0; i < num_sources; i++) { 549 unsigned vstride, width, hstride, element_size, subreg; 550 enum brw_reg_type type; 551 552 #define DO_SRC(n) \ 553 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 554 BRW_IMMEDIATE_VALUE) \ 555 continue; \ 556 \ 557 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 558 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 559 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 560 type = brw_inst_src ## n ## _type(devinfo, inst); \ 561 element_size = brw_reg_type_to_size(type); \ 562 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst) 563 564 if (i == 0) { 565 DO_SRC(0); 566 } else { 567 DO_SRC(1); 568 } 569 #undef DO_SRC 570 571 /* On IVB/BYT, region parameters and execution size for DF are in terms of 572 * 32-bit elements, so they are doubled. For evaluating the validity of an 573 * instruction, we halve them. 574 */ 575 if (devinfo->gen == 7 && !devinfo->is_haswell && 576 element_size == 8) 577 element_size = 4; 578 579 /* ExecSize must be greater than or equal to Width. */ 580 ERROR_IF(exec_size < width, "ExecSize must be greater than or equal " 581 "to Width"); 582 583 /* If ExecSize = Width and HorzStride 0, 584 * VertStride must be set to Width * HorzStride. 585 */ 586 if (exec_size == width && hstride != 0) { 587 ERROR_IF(vstride != width * hstride, 588 "If ExecSize = Width and HorzStride 0, " 589 "VertStride must be set to Width * HorzStride"); 590 } 591 592 /* If Width = 1, HorzStride must be 0 regardless of the values of 593 * ExecSize and VertStride. 594 */ 595 if (width == 1) { 596 ERROR_IF(hstride != 0, 597 "If Width = 1, HorzStride must be 0 regardless " 598 "of the values of ExecSize and VertStride"); 599 } 600 601 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */ 602 if (exec_size == 1 && width == 1) { 603 ERROR_IF(vstride != 0 || hstride != 0, 604 "If ExecSize = Width = 1, both VertStride " 605 "and HorzStride must be 0"); 606 } 607 608 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the 609 * value of ExecSize. 610 */ 611 if (vstride == 0 && hstride == 0) { 612 ERROR_IF(width != 1, 613 "If VertStride = HorzStride = 0, Width must be " 614 "1 regardless of the value of ExecSize"); 615 } 616 617 /* VertStride must be used to cross GRF register boundaries. This rule 618 * implies that elements within a 'Width' cannot cross GRF boundaries. 619 */ 620 const uint64_t mask = (1ULL << element_size) - 1; 621 unsigned rowbase = subreg; 622 623 for (int y = 0; y < exec_size / width; y++) { 624 uint64_t access_mask = 0; 625 unsigned offset = rowbase; 626 627 for (int x = 0; x < width; x++) { 628 access_mask |= mask << offset; 629 offset += hstride * element_size; 630 } 631 632 rowbase += vstride * element_size; 633 634 if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) { 635 ERROR("VertStride must be used to cross GRF register boundaries"); 636 break; 637 } 638 } 639 } 640 641 /* Dst.HorzStride must not be 0. */ 642 if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) { 643 ERROR_IF(brw_inst_dst_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0, 644 "Destination Horizontal Stride must not be 0"); 645 } 646 647 return error_msg; 648 } 649 650 /** 651 * Creates an \p access_mask for an \p exec_size, \p element_size, and a region 652 * 653 * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is 654 * a bitmask of bytes accessed by the region. 655 * 656 * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4 657 * instruction would be 658 * 659 * access_mask[0] = 0x00000000000000F0 660 * access_mask[1] = 0x000000000000F000 661 * access_mask[2] = 0x0000000000F00000 662 * access_mask[3] = 0x00000000F0000000 663 * access_mask[4-31] = 0 664 * 665 * because the first execution channel accesses bytes 7-4 and the second 666 * execution channel accesses bytes 15-12, etc. 667 */ 668 static void 669 align1_access_mask(uint64_t access_mask[static 32], 670 unsigned exec_size, unsigned element_size, unsigned subreg, 671 unsigned vstride, unsigned width, unsigned hstride) 672 { 673 const uint64_t mask = (1ULL << element_size) - 1; 674 unsigned rowbase = subreg; 675 unsigned element = 0; 676 677 for (int y = 0; y < exec_size / width; y++) { 678 unsigned offset = rowbase; 679 680 for (int x = 0; x < width; x++) { 681 access_mask[element++] = mask << offset; 682 offset += hstride * element_size; 683 } 684 685 rowbase += vstride * element_size; 686 } 687 688 assert(element == 0 || element == exec_size); 689 } 690 691 /** 692 * Returns the number of registers accessed according to the \p access_mask 693 */ 694 static int 695 registers_read(const uint64_t access_mask[static 32]) 696 { 697 int regs_read = 0; 698 699 for (unsigned i = 0; i < 32; i++) { 700 if (access_mask[i] > 0xFFFFFFFF) { 701 return 2; 702 } else if (access_mask[i]) { 703 regs_read = 1; 704 } 705 } 706 707 return regs_read; 708 } 709 710 /** 711 * Checks restrictions listed in "Region Alignment Rules" in the "Register 712 * Region Restrictions" section. 713 */ 714 static struct string 715 region_alignment_rules(const struct gen_device_info *devinfo, 716 const brw_inst *inst) 717 { 718 const struct opcode_desc *desc = 719 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 720 unsigned num_sources = num_sources_from_inst(devinfo, inst); 721 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 722 uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32]; 723 struct string error_msg = { .str = NULL, .len = 0 }; 724 725 if (num_sources == 3) 726 return (struct string){}; 727 728 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) 729 return (struct string){}; 730 731 if (inst_is_send(devinfo, inst)) 732 return (struct string){}; 733 734 memset(dst_access_mask, 0, sizeof(dst_access_mask)); 735 memset(src0_access_mask, 0, sizeof(src0_access_mask)); 736 memset(src1_access_mask, 0, sizeof(src1_access_mask)); 737 738 for (unsigned i = 0; i < num_sources; i++) { 739 unsigned vstride, width, hstride, element_size, subreg; 740 enum brw_reg_type type; 741 742 /* In Direct Addressing mode, a source cannot span more than 2 adjacent 743 * GRF registers. 744 */ 745 746 #define DO_SRC(n) \ 747 if (brw_inst_src ## n ## _address_mode(devinfo, inst) != \ 748 BRW_ADDRESS_DIRECT) \ 749 continue; \ 750 \ 751 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 752 BRW_IMMEDIATE_VALUE) \ 753 continue; \ 754 \ 755 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 756 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 757 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 758 type = brw_inst_src ## n ## _type(devinfo, inst); \ 759 element_size = brw_reg_type_to_size(type); \ 760 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 761 align1_access_mask(src ## n ## _access_mask, \ 762 exec_size, element_size, subreg, \ 763 vstride, width, hstride) 764 765 if (i == 0) { 766 DO_SRC(0); 767 } else { 768 DO_SRC(1); 769 } 770 #undef DO_SRC 771 772 unsigned num_vstride = exec_size / width; 773 unsigned num_hstride = width; 774 unsigned vstride_elements = (num_vstride - 1) * vstride; 775 unsigned hstride_elements = (num_hstride - 1) * hstride; 776 unsigned offset = (vstride_elements + hstride_elements) * element_size + 777 subreg; 778 ERROR_IF(offset >= 64, 779 "A source cannot span more than 2 adjacent GRF registers"); 780 } 781 782 if (desc->ndst == 0 || dst_is_null(devinfo, inst)) 783 return error_msg; 784 785 unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 786 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 787 unsigned element_size = brw_reg_type_to_size(dst_type); 788 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 789 unsigned offset = ((exec_size - 1) * stride * element_size) + subreg; 790 ERROR_IF(offset >= 64, 791 "A destination cannot span more than 2 adjacent GRF registers"); 792 793 if (error_msg.str) 794 return error_msg; 795 796 /* On IVB/BYT, region parameters and execution size for DF are in terms of 797 * 32-bit elements, so they are doubled. For evaluating the validity of an 798 * instruction, we halve them. 799 */ 800 if (devinfo->gen == 7 && !devinfo->is_haswell && 801 element_size == 8) 802 element_size = 4; 803 804 align1_access_mask(dst_access_mask, exec_size, element_size, subreg, 805 exec_size == 1 ? 0 : exec_size * stride, 806 exec_size == 1 ? 1 : exec_size, 807 exec_size == 1 ? 0 : stride); 808 809 unsigned dst_regs = registers_read(dst_access_mask); 810 unsigned src0_regs = registers_read(src0_access_mask); 811 unsigned src1_regs = registers_read(src1_access_mask); 812 813 /* The SNB, IVB, HSW, BDW, and CHV PRMs say: 814 * 815 * When an instruction has a source region spanning two registers and a 816 * destination region contained in one register, the number of elements 817 * must be the same between two sources and one of the following must be 818 * true: 819 * 820 * 1. The destination region is entirely contained in the lower OWord 821 * of a register. 822 * 2. The destination region is entirely contained in the upper OWord 823 * of a register. 824 * 3. The destination elements are evenly split between the two OWords 825 * of a register. 826 */ 827 if (devinfo->gen <= 8) { 828 if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) { 829 unsigned upper_oword_writes = 0, lower_oword_writes = 0; 830 831 for (unsigned i = 0; i < exec_size; i++) { 832 if (dst_access_mask[i] > 0x0000FFFF) { 833 upper_oword_writes++; 834 } else { 835 assert(dst_access_mask[i] != 0); 836 lower_oword_writes++; 837 } 838 } 839 840 ERROR_IF(lower_oword_writes != 0 && 841 upper_oword_writes != 0 && 842 upper_oword_writes != lower_oword_writes, 843 "Writes must be to only one OWord or " 844 "evenly split between OWords"); 845 } 846 } 847 848 /* The IVB and HSW PRMs say: 849 * 850 * When an instruction has a source region that spans two registers and 851 * the destination spans two registers, the destination elements must be 852 * evenly split between the two registers [...] 853 * 854 * The SNB PRM contains similar wording (but written in a much more 855 * confusing manner). 856 * 857 * The BDW PRM says: 858 * 859 * When destination spans two registers, the source may be one or two 860 * registers. The destination elements must be evenly split between the 861 * two registers. 862 * 863 * The SKL PRM says: 864 * 865 * When destination of MATH instruction spans two registers, the 866 * destination elements must be evenly split between the two registers. 867 * 868 * It is not known whether this restriction applies to KBL other Gens after 869 * SKL. 870 */ 871 if (devinfo->gen <= 8 || 872 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) { 873 874 /* Nothing explicitly states that on Gen < 8 elements must be evenly 875 * split between two destination registers in the two exceptional 876 * source-region-spans-one-register cases, but since Broadwell requires 877 * evenly split writes regardless of source region, we assume that it was 878 * an oversight and require it. 879 */ 880 if (dst_regs == 2) { 881 unsigned upper_reg_writes = 0, lower_reg_writes = 0; 882 883 for (unsigned i = 0; i < exec_size; i++) { 884 if (dst_access_mask[i] > 0xFFFFFFFF) { 885 upper_reg_writes++; 886 } else { 887 assert(dst_access_mask[i] != 0); 888 lower_reg_writes++; 889 } 890 } 891 892 ERROR_IF(upper_reg_writes != lower_reg_writes, 893 "Writes must be evenly split between the two " 894 "destination registers"); 895 } 896 } 897 898 /* The IVB and HSW PRMs say: 899 * 900 * When an instruction has a source region that spans two registers and 901 * the destination spans two registers, the destination elements must be 902 * evenly split between the two registers and each destination register 903 * must be entirely derived from one source register. 904 * 905 * Note: In such cases, the regioning parameters must ensure that the 906 * offset from the two source registers is the same. 907 * 908 * The SNB PRM contains similar wording (but written in a much more 909 * confusing manner). 910 * 911 * There are effectively three rules stated here: 912 * 913 * For an instruction with a source and a destination spanning two 914 * registers, 915 * 916 * (1) destination elements must be evenly split between the two 917 * registers 918 * (2) all destination elements in a register must be derived 919 * from one source register 920 * (3) the offset (i.e. the starting location in each of the two 921 * registers spanned by a region) must be the same in the two 922 * registers spanned by a region 923 * 924 * It is impossible to violate rule (1) without violating (2) or (3), so we 925 * do not attempt to validate it. 926 */ 927 if (devinfo->gen <= 7 && dst_regs == 2) { 928 for (unsigned i = 0; i < num_sources; i++) { 929 #define DO_SRC(n) \ 930 if (src ## n ## _regs <= 1) \ 931 continue; \ 932 \ 933 for (unsigned i = 0; i < exec_size; i++) { \ 934 if ((dst_access_mask[i] > 0xFFFFFFFF) != \ 935 (src ## n ## _access_mask[i] > 0xFFFFFFFF)) { \ 936 ERROR("Each destination register must be entirely derived " \ 937 "from one source register"); \ 938 break; \ 939 } \ 940 } \ 941 \ 942 unsigned offset_0 = \ 943 brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 944 unsigned offset_1 = offset_0; \ 945 \ 946 for (unsigned i = 0; i < exec_size; i++) { \ 947 if (src ## n ## _access_mask[i] > 0xFFFFFFFF) { \ 948 offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32; \ 949 break; \ 950 } \ 951 } \ 952 \ 953 ERROR_IF(num_sources == 2 && offset_0 != offset_1, \ 954 "The offset from the two source registers " \ 955 "must be the same") 956 957 if (i == 0) { 958 DO_SRC(0); 959 } else { 960 DO_SRC(1); 961 } 962 #undef DO_SRC 963 } 964 } 965 966 /* The IVB and HSW PRMs say: 967 * 968 * When destination spans two registers, the source MUST span two 969 * registers. The exception to the above rule: 970 * 1. When source is scalar, the source registers are not 971 * incremented. 972 * 2. When source is packed integer Word and destination is packed 973 * integer DWord, the source register is not incremented by the 974 * source sub register is incremented. 975 * 976 * The SNB PRM does not contain this rule, but the internal documentation 977 * indicates that it applies to SNB as well. We assume that the rule applies 978 * to Gen <= 5 although their PRMs do not state it. 979 * 980 * While the documentation explicitly says in exception (2) that the 981 * destination must be an integer DWord, the hardware allows at least a 982 * float destination type as well. We emit such instructions from 983 * 984 * fs_visitor::emit_interpolation_setup_gen6 985 * fs_visitor::emit_fragcoord_interpolation 986 * 987 * and have for years with no ill effects. 988 * 989 * Additionally the simulator source code indicates that the real condition 990 * is that the size of the destination type is 4 bytes. 991 */ 992 if (devinfo->gen <= 7 && dst_regs == 2) { 993 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 994 bool dst_is_packed_dword = 995 is_packed(exec_size * stride, exec_size, stride) && 996 brw_reg_type_to_size(dst_type) == 4; 997 998 for (unsigned i = 0; i < num_sources; i++) { 999 #define DO_SRC(n) \ 1000 unsigned vstride, width, hstride; \ 1001 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 1002 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 1003 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 1004 bool src ## n ## _is_packed_word = \ 1005 is_packed(vstride, width, hstride) && \ 1006 (brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_W || \ 1007 brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_UW); \ 1008 \ 1009 ERROR_IF(src ## n ## _regs == 1 && \ 1010 !src ## n ## _has_scalar_region(devinfo, inst) && \ 1011 !(dst_is_packed_dword && src ## n ## _is_packed_word), \ 1012 "When the destination spans two registers, the source must " \ 1013 "span two registers\n" ERROR_INDENT "(exceptions for scalar " \ 1014 "source and packed-word to packed-dword expansion)") 1015 1016 if (i == 0) { 1017 DO_SRC(0); 1018 } else { 1019 DO_SRC(1); 1020 } 1021 #undef DO_SRC 1022 } 1023 } 1024 1025 return error_msg; 1026 } 1027 1028 static struct string 1029 vector_immediate_restrictions(const struct gen_device_info *devinfo, 1030 const brw_inst *inst) 1031 { 1032 unsigned num_sources = num_sources_from_inst(devinfo, inst); 1033 struct string error_msg = { .str = NULL, .len = 0 }; 1034 1035 if (num_sources == 3 || num_sources == 0) 1036 return (struct string){}; 1037 1038 unsigned file = num_sources == 1 ? 1039 brw_inst_src0_reg_file(devinfo, inst) : 1040 brw_inst_src1_reg_file(devinfo, inst); 1041 if (file != BRW_IMMEDIATE_VALUE) 1042 return (struct string){}; 1043 1044 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 1045 unsigned dst_type_size = brw_reg_type_to_size(dst_type); 1046 unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ? 1047 brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0; 1048 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1049 enum brw_reg_type type = num_sources == 1 ? 1050 brw_inst_src0_type(devinfo, inst) : 1051 brw_inst_src1_type(devinfo, inst); 1052 1053 /* The PRMs say: 1054 * 1055 * When an immediate vector is used in an instruction, the destination 1056 * must be 128-bit aligned with destination horizontal stride equivalent 1057 * to a word for an immediate integer vector (v) and equivalent to a 1058 * DWord for an immediate float vector (vf). 1059 * 1060 * The text has not been updated for the addition of the immediate unsigned 1061 * integer vector type (uv) on SNB, but presumably the same restriction 1062 * applies. 1063 */ 1064 switch (type) { 1065 case BRW_REGISTER_TYPE_V: 1066 case BRW_REGISTER_TYPE_UV: 1067 case BRW_REGISTER_TYPE_VF: 1068 ERROR_IF(dst_subreg % (128 / 8) != 0, 1069 "Destination must be 128-bit aligned in order to use immediate " 1070 "vector types"); 1071 1072 if (type == BRW_REGISTER_TYPE_VF) { 1073 ERROR_IF(dst_type_size * dst_stride != 4, 1074 "Destination must have stride equivalent to dword in order " 1075 "to use the VF type"); 1076 } else { 1077 ERROR_IF(dst_type_size * dst_stride != 2, 1078 "Destination must have stride equivalent to word in order " 1079 "to use the V or UV type"); 1080 } 1081 break; 1082 default: 1083 break; 1084 } 1085 1086 return error_msg; 1087 } 1088 1089 static struct string 1090 special_requirements_for_handling_double_precision_data_types( 1091 const struct gen_device_info *devinfo, 1092 const brw_inst *inst) 1093 { 1094 unsigned num_sources = num_sources_from_inst(devinfo, inst); 1095 struct string error_msg = { .str = NULL, .len = 0 }; 1096 1097 if (num_sources == 3 || num_sources == 0) 1098 return (struct string){}; 1099 1100 enum brw_reg_type exec_type = execution_type(devinfo, inst); 1101 unsigned exec_type_size = brw_reg_type_to_size(exec_type); 1102 1103 enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst); 1104 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 1105 unsigned dst_type_size = brw_reg_type_to_size(dst_type); 1106 unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1107 unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst); 1108 unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 1109 unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst); 1110 1111 bool is_integer_dword_multiply = 1112 devinfo->gen >= 8 && 1113 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL && 1114 (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D || 1115 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) && 1116 (brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_D || 1117 brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_UD); 1118 1119 if (dst_type_size != 8 && exec_type_size != 8 && !is_integer_dword_multiply) 1120 return (struct string){}; 1121 1122 for (unsigned i = 0; i < num_sources; i++) { 1123 unsigned vstride, width, hstride, type_size, reg, subreg, address_mode; 1124 bool is_scalar_region; 1125 enum brw_reg_file file; 1126 enum brw_reg_type type; 1127 1128 #define DO_SRC(n) \ 1129 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 1130 BRW_IMMEDIATE_VALUE) \ 1131 continue; \ 1132 \ 1133 is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst); \ 1134 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 1135 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 1136 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 1137 file = brw_inst_src ## n ## _reg_file(devinfo, inst); \ 1138 type = brw_inst_src ## n ## _type(devinfo, inst); \ 1139 type_size = brw_reg_type_to_size(type); \ 1140 reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst); \ 1141 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 1142 address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst) 1143 1144 if (i == 0) { 1145 DO_SRC(0); 1146 } else { 1147 DO_SRC(1); 1148 } 1149 #undef DO_SRC 1150 1151 /* The PRMs say that for CHV, BXT: 1152 * 1153 * When source or destination datatype is 64b or operation is integer 1154 * DWord multiply, regioning in Align1 must follow these rules: 1155 * 1156 * 1. Source and Destination horizontal stride must be aligned to the 1157 * same qword. 1158 * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. 1159 * 3. Source and Destination offset must be the same, except the case 1160 * of scalar source. 1161 * 1162 * We assume that the restriction applies to GLK as well. 1163 */ 1164 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 && 1165 (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) { 1166 unsigned src_stride = hstride * type_size; 1167 unsigned dst_stride = dst_hstride * dst_type_size; 1168 1169 ERROR_IF(!is_scalar_region && 1170 (src_stride % 8 != 0 || 1171 dst_stride % 8 != 0 || 1172 src_stride != dst_stride), 1173 "Source and destination horizontal stride must equal and a " 1174 "multiple of a qword when the execution type is 64-bit"); 1175 1176 ERROR_IF(vstride != width * hstride, 1177 "Vstride must be Width * Hstride when the execution type is " 1178 "64-bit"); 1179 1180 ERROR_IF(!is_scalar_region && dst_subreg != subreg, 1181 "Source and destination offset must be the same when the " 1182 "execution type is 64-bit"); 1183 } 1184 1185 /* The PRMs say that for CHV, BXT: 1186 * 1187 * When source or destination datatype is 64b or operation is integer 1188 * DWord multiply, indirect addressing must not be used. 1189 * 1190 * We assume that the restriction applies to GLK as well. 1191 */ 1192 if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) { 1193 ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode || 1194 BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode, 1195 "Indirect addressing is not allowed when the execution type " 1196 "is 64-bit"); 1197 } 1198 1199 /* The PRMs say that for CHV, BXT: 1200 * 1201 * ARF registers must never be used with 64b datatype or when 1202 * operation is integer DWord multiply. 1203 * 1204 * We assume that the restriction applies to GLK as well. 1205 * 1206 * We assume that the restriction does not apply to the null register. 1207 */ 1208 if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) { 1209 ERROR_IF(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MAC || 1210 brw_inst_acc_wr_control(devinfo, inst) || 1211 (BRW_ARCHITECTURE_REGISTER_FILE == file && 1212 reg != BRW_ARF_NULL) || 1213 (BRW_ARCHITECTURE_REGISTER_FILE == dst_file && 1214 dst_reg != BRW_ARF_NULL), 1215 "Architecture registers cannot be used when the execution " 1216 "type is 64-bit"); 1217 } 1218 } 1219 1220 /* The PRMs say that for BDW, SKL: 1221 * 1222 * If Align16 is required for an operation with QW destination and non-QW 1223 * source datatypes, the execution size cannot exceed 2. 1224 * 1225 * We assume that the restriction applies to all Gen8+ parts. 1226 */ 1227 if (devinfo->gen >= 8) { 1228 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 1229 enum brw_reg_type src1_type = 1230 num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type; 1231 unsigned src0_type_size = brw_reg_type_to_size(src0_type); 1232 unsigned src1_type_size = brw_reg_type_to_size(src1_type); 1233 1234 ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 && 1235 dst_type_size == 8 && 1236 (src0_type_size != 8 || src1_type_size != 8) && 1237 brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2, 1238 "In Align16 exec size cannot exceed 2 with a QWord destination " 1239 "and a non-QWord source"); 1240 } 1241 1242 /* The PRMs say that for CHV, BXT: 1243 * 1244 * When source or destination datatype is 64b or operation is integer 1245 * DWord multiply, DepCtrl must not be used. 1246 * 1247 * We assume that the restriction applies to GLK as well. 1248 */ 1249 if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) { 1250 ERROR_IF(brw_inst_no_dd_check(devinfo, inst) || 1251 brw_inst_no_dd_clear(devinfo, inst), 1252 "DepCtrl is not allowed when the execution type is 64-bit"); 1253 } 1254 1255 return error_msg; 1256 } 1257 1258 bool 1259 brw_validate_instructions(const struct gen_device_info *devinfo, 1260 const void *assembly, int start_offset, int end_offset, 1261 struct disasm_info *disasm) 1262 { 1263 bool valid = true; 1264 1265 for (int src_offset = start_offset; src_offset < end_offset;) { 1266 struct string error_msg = { .str = NULL, .len = 0 }; 1267 const brw_inst *inst = assembly + src_offset; 1268 bool is_compact = brw_inst_cmpt_control(devinfo, inst); 1269 brw_inst uncompacted; 1270 1271 if (is_compact) { 1272 brw_compact_inst *compacted = (void *)inst; 1273 brw_uncompact_instruction(devinfo, &uncompacted, compacted); 1274 inst = &uncompacted; 1275 } 1276 1277 if (is_unsupported_inst(devinfo, inst)) { 1278 ERROR("Instruction not supported on this Gen"); 1279 } else { 1280 CHECK(sources_not_null); 1281 CHECK(send_restrictions); 1282 CHECK(general_restrictions_based_on_operand_types); 1283 CHECK(general_restrictions_on_region_parameters); 1284 CHECK(region_alignment_rules); 1285 CHECK(vector_immediate_restrictions); 1286 CHECK(special_requirements_for_handling_double_precision_data_types); 1287 } 1288 1289 if (error_msg.str && disasm) { 1290 disasm_insert_error(disasm, src_offset, error_msg.str); 1291 } 1292 valid = valid && error_msg.len == 0; 1293 free(error_msg.str); 1294 1295 if (is_compact) { 1296 src_offset += sizeof(brw_compact_inst); 1297 } else { 1298 src_offset += sizeof(brw_inst); 1299 } 1300 } 1301 1302 return valid; 1303 } 1304