1 /* -*- c++ -*- */ 2 /* 3 * Copyright 2010-2015 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25 #ifndef BRW_IR_FS_H 26 #define BRW_IR_FS_H 27 28 #include "brw_shader.h" 29 30 class fs_inst; 31 32 class fs_reg : public backend_reg { 33 public: 34 DECLARE_RALLOC_CXX_OPERATORS(fs_reg) 35 36 void init(); 37 38 fs_reg(); 39 fs_reg(struct ::brw_reg reg); 40 fs_reg(enum brw_reg_file file, int nr); 41 fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type); 42 43 bool equals(const fs_reg &r) const; 44 bool is_contiguous() const; 45 46 /** 47 * Return the size in bytes of a single logical component of the 48 * register assuming the given execution width. 49 */ 50 unsigned component_size(unsigned width) const; 51 52 /** Register region horizontal stride */ 53 uint8_t stride; 54 }; 55 56 static inline fs_reg 57 negate(fs_reg reg) 58 { 59 assert(reg.file != IMM); 60 reg.negate = !reg.negate; 61 return reg; 62 } 63 64 static inline fs_reg 65 retype(fs_reg reg, enum brw_reg_type type) 66 { 67 reg.type = type; 68 return reg; 69 } 70 71 static inline fs_reg 72 byte_offset(fs_reg reg, unsigned delta) 73 { 74 switch (reg.file) { 75 case BAD_FILE: 76 break; 77 case VGRF: 78 case ATTR: 79 case UNIFORM: 80 reg.offset += delta; 81 break; 82 case MRF: { 83 const unsigned suboffset = reg.offset + delta; 84 reg.nr += suboffset / REG_SIZE; 85 reg.offset = suboffset % REG_SIZE; 86 break; 87 } 88 case ARF: 89 case FIXED_GRF: { 90 const unsigned suboffset = reg.subnr + delta; 91 reg.nr += suboffset / REG_SIZE; 92 reg.subnr = suboffset % REG_SIZE; 93 break; 94 } 95 case IMM: 96 default: 97 assert(delta == 0); 98 } 99 return reg; 100 } 101 102 static inline fs_reg 103 horiz_offset(const fs_reg ®, unsigned delta) 104 { 105 switch (reg.file) { 106 case BAD_FILE: 107 case UNIFORM: 108 case IMM: 109 /* These only have a single component that is implicitly splatted. A 110 * horizontal offset should be a harmless no-op. 111 * XXX - Handle vector immediates correctly. 112 */ 113 return reg; 114 case VGRF: 115 case MRF: 116 case ATTR: 117 return byte_offset(reg, delta * reg.stride * type_sz(reg.type)); 118 case ARF: 119 case FIXED_GRF: 120 if (reg.is_null()) { 121 return reg; 122 } else { 123 const unsigned stride = reg.hstride ? 1 << (reg.hstride - 1) : 0; 124 return byte_offset(reg, delta * stride * type_sz(reg.type)); 125 } 126 } 127 unreachable("Invalid register file"); 128 } 129 130 static inline fs_reg 131 offset(fs_reg reg, unsigned width, unsigned delta) 132 { 133 switch (reg.file) { 134 case BAD_FILE: 135 break; 136 case ARF: 137 case FIXED_GRF: 138 case MRF: 139 case VGRF: 140 case ATTR: 141 case UNIFORM: 142 return byte_offset(reg, delta * reg.component_size(width)); 143 case IMM: 144 assert(delta == 0); 145 } 146 return reg; 147 } 148 149 /** 150 * Get the scalar channel of \p reg given by \p idx and replicate it to all 151 * channels of the result. 152 */ 153 static inline fs_reg 154 component(fs_reg reg, unsigned idx) 155 { 156 reg = horiz_offset(reg, idx); 157 reg.stride = 0; 158 return reg; 159 } 160 161 /** 162 * Return an integer identifying the discrete address space a register is 163 * contained in. A register is by definition fully contained in the single 164 * reg_space it belongs to, so two registers with different reg_space ids are 165 * guaranteed not to overlap. Most register files are a single reg_space of 166 * its own, only the VGRF file is composed of multiple discrete address 167 * spaces, one for each VGRF allocation. 168 */ 169 static inline uint32_t 170 reg_space(const fs_reg &r) 171 { 172 return r.file << 16 | (r.file == VGRF ? r.nr : 0); 173 } 174 175 /** 176 * Return the base offset in bytes of a register relative to the start of its 177 * reg_space(). 178 */ 179 static inline unsigned 180 reg_offset(const fs_reg &r) 181 { 182 return (r.file == VGRF || r.file == IMM ? 0 : r.nr) * 183 (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset + 184 (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0); 185 } 186 187 /** 188 * Return the amount of padding in bytes left unused between individual 189 * components of register \p r due to a (horizontal) stride value greater than 190 * one, or zero if components are tightly packed in the register file. 191 */ 192 static inline unsigned 193 reg_padding(const fs_reg &r) 194 { 195 const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride : 196 r.hstride == 0 ? 0 : 197 1 << (r.hstride - 1)); 198 return (MAX2(1, stride) - 1) * type_sz(r.type); 199 } 200 201 /** 202 * Return whether the register region starting at \p r and spanning \p dr 203 * bytes could potentially overlap the register region starting at \p s and 204 * spanning \p ds bytes. 205 */ 206 static inline bool 207 regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds) 208 { 209 if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) { 210 fs_reg t = r; 211 t.nr &= ~BRW_MRF_COMPR4; 212 /* COMPR4 regions are translated by the hardware during decompression 213 * into two separate half-regions 4 MRFs apart from each other. 214 */ 215 return regions_overlap(t, dr / 2, s, ds) || 216 regions_overlap(byte_offset(t, 4 * REG_SIZE), dr / 2, s, ds); 217 218 } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) { 219 return regions_overlap(s, ds, r, dr); 220 221 } else { 222 return reg_space(r) == reg_space(s) && 223 !(reg_offset(r) + dr <= reg_offset(s) || 224 reg_offset(s) + ds <= reg_offset(r)); 225 } 226 } 227 228 /** 229 * Check that the register region given by r [r.offset, r.offset + dr[ 230 * is fully contained inside the register region given by s 231 * [s.offset, s.offset + ds[. 232 */ 233 static inline bool 234 region_contained_in(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds) 235 { 236 return reg_space(r) == reg_space(s) && 237 reg_offset(r) >= reg_offset(s) && 238 reg_offset(r) + dr <= reg_offset(s) + ds; 239 } 240 241 /** 242 * Return whether the given register region is n-periodic, i.e. whether the 243 * original region remains invariant after shifting it by \p n scalar 244 * channels. 245 */ 246 static inline bool 247 is_periodic(const fs_reg ®, unsigned n) 248 { 249 if (reg.file == BAD_FILE || reg.is_null()) { 250 return true; 251 252 } else if (reg.file == IMM) { 253 const unsigned period = (reg.type == BRW_REGISTER_TYPE_UV || 254 reg.type == BRW_REGISTER_TYPE_V ? 8 : 255 reg.type == BRW_REGISTER_TYPE_VF ? 4 : 256 1); 257 return n % period == 0; 258 259 } else if (reg.file == ARF || reg.file == FIXED_GRF) { 260 const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 : 261 reg.vstride == 0 ? 1 << reg.width : 262 ~0); 263 return n % period == 0; 264 265 } else { 266 return reg.stride == 0; 267 } 268 } 269 270 static inline bool 271 is_uniform(const fs_reg ®) 272 { 273 return is_periodic(reg, 1); 274 } 275 276 /** 277 * Get the specified 8-component quarter of a register. 278 * XXX - Maybe come up with a less misleading name for this (e.g. quarter())? 279 */ 280 static inline fs_reg 281 half(const fs_reg ®, unsigned idx) 282 { 283 assert(idx < 2); 284 return horiz_offset(reg, 8 * idx); 285 } 286 287 /** 288 * Reinterpret each channel of register \p reg as a vector of values of the 289 * given smaller type and take the i-th subcomponent from each. 290 */ 291 static inline fs_reg 292 subscript(fs_reg reg, brw_reg_type type, unsigned i) 293 { 294 assert((i + 1) * type_sz(type) <= type_sz(reg.type)); 295 296 if (reg.file == ARF || reg.file == FIXED_GRF) { 297 /* The stride is encoded inconsistently for fixed GRF and ARF registers 298 * as the log2 of the actual vertical and horizontal strides. 299 */ 300 const int delta = _mesa_logbase2(type_sz(reg.type)) - 301 _mesa_logbase2(type_sz(type)); 302 reg.hstride += (reg.hstride ? delta : 0); 303 reg.vstride += (reg.vstride ? delta : 0); 304 305 } else if (reg.file == IMM) { 306 assert(reg.type == type); 307 308 } else { 309 reg.stride *= type_sz(reg.type) / type_sz(type); 310 } 311 312 return byte_offset(retype(reg, type), i * type_sz(type)); 313 } 314 315 static const fs_reg reg_undef; 316 317 class fs_inst : public backend_instruction { 318 fs_inst &operator=(const fs_inst &); 319 320 void init(enum opcode opcode, uint8_t exec_width, const fs_reg &dst, 321 const fs_reg *src, unsigned sources); 322 323 public: 324 DECLARE_RALLOC_CXX_OPERATORS(fs_inst) 325 326 fs_inst(); 327 fs_inst(enum opcode opcode, uint8_t exec_size); 328 fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst); 329 fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, 330 const fs_reg &src0); 331 fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, 332 const fs_reg &src0, const fs_reg &src1); 333 fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, 334 const fs_reg &src0, const fs_reg &src1, const fs_reg &src2); 335 fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, 336 const fs_reg src[], unsigned sources); 337 fs_inst(const fs_inst &that); 338 ~fs_inst(); 339 340 void resize_sources(uint8_t num_sources); 341 342 bool equals(fs_inst *inst) const; 343 bool is_send_from_grf() const; 344 bool is_partial_write() const; 345 bool is_copy_payload(const brw::simple_allocator &grf_alloc) const; 346 unsigned components_read(unsigned i) const; 347 unsigned size_read(int arg) const; 348 bool can_do_source_mods(const struct gen_device_info *devinfo); 349 bool can_change_types() const; 350 bool has_source_and_destination_hazard() const; 351 352 /** 353 * Return the subset of flag registers read by the instruction as a bitset 354 * with byte granularity. 355 */ 356 unsigned flags_read(const gen_device_info *devinfo) const; 357 358 /** 359 * Return the subset of flag registers updated by the instruction (either 360 * partially or fully) as a bitset with byte granularity. 361 */ 362 unsigned flags_written() const; 363 364 fs_reg dst; 365 fs_reg *src; 366 367 uint8_t sources; /**< Number of fs_reg sources. */ 368 369 bool pi_noperspective:1; /**< Pixel interpolator noperspective flag */ 370 }; 371 372 /** 373 * Make the execution of \p inst dependent on the evaluation of a possibly 374 * inverted predicate. 375 */ 376 static inline fs_inst * 377 set_predicate_inv(enum brw_predicate pred, bool inverse, 378 fs_inst *inst) 379 { 380 inst->predicate = pred; 381 inst->predicate_inverse = inverse; 382 return inst; 383 } 384 385 /** 386 * Make the execution of \p inst dependent on the evaluation of a predicate. 387 */ 388 static inline fs_inst * 389 set_predicate(enum brw_predicate pred, fs_inst *inst) 390 { 391 return set_predicate_inv(pred, false, inst); 392 } 393 394 /** 395 * Write the result of evaluating the condition given by \p mod to a flag 396 * register. 397 */ 398 static inline fs_inst * 399 set_condmod(enum brw_conditional_mod mod, fs_inst *inst) 400 { 401 inst->conditional_mod = mod; 402 return inst; 403 } 404 405 /** 406 * Clamp the result of \p inst to the saturation range of its destination 407 * datatype. 408 */ 409 static inline fs_inst * 410 set_saturate(bool saturate, fs_inst *inst) 411 { 412 inst->saturate = saturate; 413 return inst; 414 } 415 416 /** 417 * Return the number of dataflow registers written by the instruction (either 418 * fully or partially) counted from 'floor(reg_offset(inst->dst) / 419 * register_size)'. The somewhat arbitrary register size unit is 4B for the 420 * UNIFORM and IMM files and 32B for all other files. 421 */ 422 inline unsigned 423 regs_written(const fs_inst *inst) 424 { 425 assert(inst->dst.file != UNIFORM && inst->dst.file != IMM); 426 return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE + 427 inst->size_written - 428 MIN2(inst->size_written, reg_padding(inst->dst)), 429 REG_SIZE); 430 } 431 432 /** 433 * Return the number of dataflow registers read by the instruction (either 434 * fully or partially) counted from 'floor(reg_offset(inst->src[i]) / 435 * register_size)'. The somewhat arbitrary register size unit is 4B for the 436 * UNIFORM and IMM files and 32B for all other files. 437 */ 438 inline unsigned 439 regs_read(const fs_inst *inst, unsigned i) 440 { 441 const unsigned reg_size = 442 inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 4 : REG_SIZE; 443 return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + 444 inst->size_read(i) - 445 MIN2(inst->size_read(i), reg_padding(inst->src[i])), 446 reg_size); 447 } 448 449 static inline enum brw_reg_type 450 get_exec_type(const fs_inst *inst) 451 { 452 brw_reg_type exec_type = BRW_REGISTER_TYPE_B; 453 454 for (int i = 0; i < inst->sources; i++) { 455 if (inst->src[i].file != BAD_FILE) { 456 const brw_reg_type t = get_exec_type(inst->src[i].type); 457 if (type_sz(t) > type_sz(exec_type)) 458 exec_type = t; 459 else if (type_sz(t) == type_sz(exec_type) && 460 brw_reg_type_is_floating_point(t)) 461 exec_type = t; 462 } 463 } 464 465 if (exec_type == BRW_REGISTER_TYPE_B) 466 exec_type = inst->dst.type; 467 468 assert(exec_type != BRW_REGISTER_TYPE_B); 469 470 return exec_type; 471 } 472 473 static inline unsigned 474 get_exec_type_size(const fs_inst *inst) 475 { 476 return type_sz(get_exec_type(inst)); 477 } 478 479 #endif 480