1 /* -*- c++ -*- */ 2 /* 3 * Copyright 2011-2015 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25 #ifndef BRW_IR_VEC4_H 26 #define BRW_IR_VEC4_H 27 28 #include "brw_shader.h" 29 30 namespace brw { 31 32 class dst_reg; 33 34 class src_reg : public backend_reg 35 { 36 public: 37 DECLARE_RALLOC_CXX_OPERATORS(src_reg) 38 39 void init(); 40 41 src_reg(enum brw_reg_file file, int nr, const glsl_type *type); 42 src_reg(); 43 src_reg(struct ::brw_reg reg); 44 45 bool equals(const src_reg &r) const; 46 47 src_reg(class vec4_visitor *v, const struct glsl_type *type); 48 src_reg(class vec4_visitor *v, const struct glsl_type *type, int size); 49 50 explicit src_reg(const dst_reg ®); 51 52 src_reg *reladdr; 53 }; 54 55 static inline src_reg 56 retype(src_reg reg, enum brw_reg_type type) 57 { 58 reg.type = type; 59 return reg; 60 } 61 62 namespace detail { 63 64 static inline void 65 add_byte_offset(backend_reg *reg, unsigned bytes) 66 { 67 switch (reg->file) { 68 case BAD_FILE: 69 break; 70 case VGRF: 71 case ATTR: 72 case UNIFORM: 73 reg->offset += bytes; 74 assert(reg->offset % 16 == 0); 75 break; 76 case MRF: { 77 const unsigned suboffset = reg->offset + bytes; 78 reg->nr += suboffset / REG_SIZE; 79 reg->offset = suboffset % REG_SIZE; 80 assert(reg->offset % 16 == 0); 81 break; 82 } 83 case ARF: 84 case FIXED_GRF: { 85 const unsigned suboffset = reg->subnr + bytes; 86 reg->nr += suboffset / REG_SIZE; 87 reg->subnr = suboffset % REG_SIZE; 88 assert(reg->subnr % 16 == 0); 89 break; 90 } 91 default: 92 assert(bytes == 0); 93 } 94 } 95 96 } /* namepace detail */ 97 98 static inline src_reg 99 byte_offset(src_reg reg, unsigned bytes) 100 { 101 detail::add_byte_offset(®, bytes); 102 return reg; 103 } 104 105 static inline src_reg 106 offset(src_reg reg, unsigned width, unsigned delta) 107 { 108 const unsigned stride = (reg.file == UNIFORM ? 0 : 4); 109 const unsigned num_components = MAX2(width / 4 * stride, 4); 110 return byte_offset(reg, num_components * type_sz(reg.type) * delta); 111 } 112 113 static inline src_reg 114 horiz_offset(src_reg reg, unsigned delta) 115 { 116 return byte_offset(reg, delta * type_sz(reg.type)); 117 } 118 119 /** 120 * Reswizzle a given source register. 121 * \sa brw_swizzle(). 122 */ 123 static inline src_reg 124 swizzle(src_reg reg, unsigned swizzle) 125 { 126 if (reg.file == IMM) 127 reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swizzle); 128 else 129 reg.swizzle = brw_compose_swizzle(swizzle, reg.swizzle); 130 131 return reg; 132 } 133 134 static inline src_reg 135 negate(src_reg reg) 136 { 137 assert(reg.file != IMM); 138 reg.negate = !reg.negate; 139 return reg; 140 } 141 142 static inline bool 143 is_uniform(const src_reg ®) 144 { 145 return (reg.file == IMM || reg.file == UNIFORM || reg.is_null()) && 146 (!reg.reladdr || is_uniform(*reg.reladdr)); 147 } 148 149 class dst_reg : public backend_reg 150 { 151 public: 152 DECLARE_RALLOC_CXX_OPERATORS(dst_reg) 153 154 void init(); 155 156 dst_reg(); 157 dst_reg(enum brw_reg_file file, int nr); 158 dst_reg(enum brw_reg_file file, int nr, const glsl_type *type, 159 unsigned writemask); 160 dst_reg(enum brw_reg_file file, int nr, brw_reg_type type, 161 unsigned writemask); 162 dst_reg(struct ::brw_reg reg); 163 dst_reg(class vec4_visitor *v, const struct glsl_type *type); 164 165 explicit dst_reg(const src_reg ®); 166 167 bool equals(const dst_reg &r) const; 168 169 src_reg *reladdr; 170 }; 171 172 static inline dst_reg 173 retype(dst_reg reg, enum brw_reg_type type) 174 { 175 reg.type = type; 176 return reg; 177 } 178 179 static inline dst_reg 180 byte_offset(dst_reg reg, unsigned bytes) 181 { 182 detail::add_byte_offset(®, bytes); 183 return reg; 184 } 185 186 static inline dst_reg 187 offset(dst_reg reg, unsigned width, unsigned delta) 188 { 189 const unsigned stride = (reg.file == UNIFORM ? 0 : 4); 190 const unsigned num_components = MAX2(width / 4 * stride, 4); 191 return byte_offset(reg, num_components * type_sz(reg.type) * delta); 192 } 193 194 static inline dst_reg 195 horiz_offset(const dst_reg ®, unsigned delta) 196 { 197 if (is_uniform(src_reg(reg))) 198 return reg; 199 else 200 return byte_offset(reg, delta * type_sz(reg.type)); 201 } 202 203 static inline dst_reg 204 writemask(dst_reg reg, unsigned mask) 205 { 206 assert(reg.file != IMM); 207 assert((reg.writemask & mask) != 0); 208 reg.writemask &= mask; 209 return reg; 210 } 211 212 /** 213 * Return an integer identifying the discrete address space a register is 214 * contained in. A register is by definition fully contained in the single 215 * reg_space it belongs to, so two registers with different reg_space ids are 216 * guaranteed not to overlap. Most register files are a single reg_space of 217 * its own, only the VGRF file is composed of multiple discrete address 218 * spaces, one for each VGRF allocation. 219 */ 220 static inline uint32_t 221 reg_space(const backend_reg &r) 222 { 223 return r.file << 16 | (r.file == VGRF ? r.nr : 0); 224 } 225 226 /** 227 * Return the base offset in bytes of a register relative to the start of its 228 * reg_space(). 229 */ 230 static inline unsigned 231 reg_offset(const backend_reg &r) 232 { 233 return (r.file == VGRF || r.file == IMM ? 0 : r.nr) * 234 (r.file == UNIFORM ? 16 : REG_SIZE) + r.offset + 235 (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0); 236 } 237 238 /** 239 * Return whether the register region starting at \p r and spanning \p dr 240 * bytes could potentially overlap the register region starting at \p s and 241 * spanning \p ds bytes. 242 */ 243 static inline bool 244 regions_overlap(const backend_reg &r, unsigned dr, 245 const backend_reg &s, unsigned ds) 246 { 247 if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) { 248 /* COMPR4 regions are translated by the hardware during decompression 249 * into two separate half-regions 4 MRFs apart from each other. 250 */ 251 backend_reg t0 = r; 252 t0.nr &= ~BRW_MRF_COMPR4; 253 backend_reg t1 = t0; 254 t1.offset += 4 * REG_SIZE; 255 return regions_overlap(t0, dr / 2, s, ds) || 256 regions_overlap(t1, dr / 2, s, ds); 257 258 } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) { 259 return regions_overlap(s, ds, r, dr); 260 261 } else { 262 return reg_space(r) == reg_space(s) && 263 !(reg_offset(r) + dr <= reg_offset(s) || 264 reg_offset(s) + ds <= reg_offset(r)); 265 } 266 } 267 268 class vec4_instruction : public backend_instruction { 269 public: 270 DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction) 271 272 vec4_instruction(enum opcode opcode, 273 const dst_reg &dst = dst_reg(), 274 const src_reg &src0 = src_reg(), 275 const src_reg &src1 = src_reg(), 276 const src_reg &src2 = src_reg()); 277 278 dst_reg dst; 279 src_reg src[3]; 280 281 enum brw_urb_write_flags urb_write_flags; 282 283 unsigned sol_binding; /**< gen6: SOL binding table index */ 284 bool sol_final_write; /**< gen6: send commit message */ 285 unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header */ 286 287 bool is_send_from_grf(); 288 unsigned size_read(unsigned arg) const; 289 bool can_reswizzle(const struct gen_device_info *devinfo, int dst_writemask, 290 int swizzle, int swizzle_mask); 291 void reswizzle(int dst_writemask, int swizzle); 292 bool can_do_source_mods(const struct gen_device_info *devinfo); 293 bool can_do_writemask(const struct gen_device_info *devinfo); 294 bool can_change_types() const; 295 bool has_source_and_destination_hazard() const; 296 297 bool is_align1_partial_write() 298 { 299 return opcode == VEC4_OPCODE_SET_LOW_32BIT || 300 opcode == VEC4_OPCODE_SET_HIGH_32BIT; 301 } 302 303 bool reads_flag() 304 { 305 return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2; 306 } 307 308 bool reads_flag(unsigned c) 309 { 310 if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2) 311 return true; 312 313 switch (predicate) { 314 case BRW_PREDICATE_NONE: 315 return false; 316 case BRW_PREDICATE_ALIGN16_REPLICATE_X: 317 return c == 0; 318 case BRW_PREDICATE_ALIGN16_REPLICATE_Y: 319 return c == 1; 320 case BRW_PREDICATE_ALIGN16_REPLICATE_Z: 321 return c == 2; 322 case BRW_PREDICATE_ALIGN16_REPLICATE_W: 323 return c == 3; 324 default: 325 return true; 326 } 327 } 328 329 bool writes_flag() 330 { 331 return (conditional_mod && (opcode != BRW_OPCODE_SEL && 332 opcode != BRW_OPCODE_IF && 333 opcode != BRW_OPCODE_WHILE)); 334 } 335 336 bool reads_g0_implicitly() const 337 { 338 switch (opcode) { 339 case SHADER_OPCODE_TEX: 340 case SHADER_OPCODE_TXL: 341 case SHADER_OPCODE_TXD: 342 case SHADER_OPCODE_TXF: 343 case SHADER_OPCODE_TXF_CMS_W: 344 case SHADER_OPCODE_TXF_CMS: 345 case SHADER_OPCODE_TXF_MCS: 346 case SHADER_OPCODE_TXS: 347 case SHADER_OPCODE_TG4: 348 case SHADER_OPCODE_TG4_OFFSET: 349 case SHADER_OPCODE_SAMPLEINFO: 350 case VS_OPCODE_PULL_CONSTANT_LOAD: 351 case GS_OPCODE_SET_PRIMITIVE_ID: 352 case GS_OPCODE_GET_INSTANCE_ID: 353 case SHADER_OPCODE_GEN4_SCRATCH_READ: 354 case SHADER_OPCODE_GEN4_SCRATCH_WRITE: 355 return true; 356 default: 357 return false; 358 } 359 } 360 }; 361 362 /** 363 * Make the execution of \p inst dependent on the evaluation of a possibly 364 * inverted predicate. 365 */ 366 inline vec4_instruction * 367 set_predicate_inv(enum brw_predicate pred, bool inverse, 368 vec4_instruction *inst) 369 { 370 inst->predicate = pred; 371 inst->predicate_inverse = inverse; 372 return inst; 373 } 374 375 /** 376 * Make the execution of \p inst dependent on the evaluation of a predicate. 377 */ 378 inline vec4_instruction * 379 set_predicate(enum brw_predicate pred, vec4_instruction *inst) 380 { 381 return set_predicate_inv(pred, false, inst); 382 } 383 384 /** 385 * Write the result of evaluating the condition given by \p mod to a flag 386 * register. 387 */ 388 inline vec4_instruction * 389 set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst) 390 { 391 inst->conditional_mod = mod; 392 return inst; 393 } 394 395 /** 396 * Clamp the result of \p inst to the saturation range of its destination 397 * datatype. 398 */ 399 inline vec4_instruction * 400 set_saturate(bool saturate, vec4_instruction *inst) 401 { 402 inst->saturate = saturate; 403 return inst; 404 } 405 406 /** 407 * Return the number of dataflow registers written by the instruction (either 408 * fully or partially) counted from 'floor(reg_offset(inst->dst) / 409 * register_size)'. The somewhat arbitrary register size unit is 16B for the 410 * UNIFORM and IMM files and 32B for all other files. 411 */ 412 inline unsigned 413 regs_written(const vec4_instruction *inst) 414 { 415 assert(inst->dst.file != UNIFORM && inst->dst.file != IMM); 416 return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE + inst->size_written, 417 REG_SIZE); 418 } 419 420 /** 421 * Return the number of dataflow registers read by the instruction (either 422 * fully or partially) counted from 'floor(reg_offset(inst->src[i]) / 423 * register_size)'. The somewhat arbitrary register size unit is 16B for the 424 * UNIFORM and IMM files and 32B for all other files. 425 */ 426 inline unsigned 427 regs_read(const vec4_instruction *inst, unsigned i) 428 { 429 const unsigned reg_size = 430 inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 16 : REG_SIZE; 431 return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + inst->size_read(i), 432 reg_size); 433 } 434 435 static inline enum brw_reg_type 436 get_exec_type(const vec4_instruction *inst) 437 { 438 enum brw_reg_type exec_type = BRW_REGISTER_TYPE_B; 439 440 for (int i = 0; i < 3; i++) { 441 if (inst->src[i].file != BAD_FILE) { 442 const brw_reg_type t = get_exec_type(brw_reg_type(inst->src[i].type)); 443 if (type_sz(t) > type_sz(exec_type)) 444 exec_type = t; 445 else if (type_sz(t) == type_sz(exec_type) && 446 brw_reg_type_is_floating_point(t)) 447 exec_type = t; 448 } 449 } 450 451 if (exec_type == BRW_REGISTER_TYPE_B) 452 exec_type = inst->dst.type; 453 454 /* TODO: We need to handle half-float conversions. */ 455 assert(exec_type != BRW_REGISTER_TYPE_HF || 456 inst->dst.type == BRW_REGISTER_TYPE_HF); 457 assert(exec_type != BRW_REGISTER_TYPE_B); 458 459 return exec_type; 460 } 461 462 static inline unsigned 463 get_exec_type_size(const vec4_instruction *inst) 464 { 465 return type_sz(get_exec_type(inst)); 466 } 467 468 } /* namespace brw */ 469 470 #endif 471