1 /* -*- c++ -*- */ 2 /* 3 * Copyright 2011-2015 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25 #ifndef BRW_IR_VEC4_H 26 #define BRW_IR_VEC4_H 27 28 #include "brw_shader.h" 29 #include "brw_context.h" 30 31 namespace brw { 32 33 class dst_reg; 34 35 class src_reg : public backend_reg 36 { 37 public: 38 DECLARE_RALLOC_CXX_OPERATORS(src_reg) 39 40 void init(); 41 42 src_reg(enum brw_reg_file file, int nr, const glsl_type *type); 43 src_reg(); 44 src_reg(struct ::brw_reg reg); 45 46 bool equals(const src_reg &r) const; 47 48 src_reg(class vec4_visitor *v, const struct glsl_type *type); 49 src_reg(class vec4_visitor *v, const struct glsl_type *type, int size); 50 51 explicit src_reg(const dst_reg ®); 52 53 src_reg *reladdr; 54 }; 55 56 static inline src_reg 57 retype(src_reg reg, enum brw_reg_type type) 58 { 59 reg.type = type; 60 return reg; 61 } 62 63 namespace detail { 64 65 static inline void 66 add_byte_offset(backend_reg *reg, unsigned bytes) 67 { 68 switch (reg->file) { 69 case BAD_FILE: 70 break; 71 case VGRF: 72 case ATTR: 73 case UNIFORM: 74 reg->offset += bytes; 75 assert(reg->offset % 16 == 0); 76 break; 77 case MRF: { 78 const unsigned suboffset = reg->offset + bytes; 79 reg->nr += suboffset / REG_SIZE; 80 reg->offset = suboffset % REG_SIZE; 81 assert(reg->offset % 16 == 0); 82 break; 83 } 84 case ARF: 85 case FIXED_GRF: { 86 const unsigned suboffset = reg->subnr + bytes; 87 reg->nr += suboffset / REG_SIZE; 88 reg->subnr = suboffset % REG_SIZE; 89 assert(reg->subnr % 16 == 0); 90 break; 91 } 92 default: 93 assert(bytes == 0); 94 } 95 } 96 97 } /* namepace detail */ 98 99 static inline src_reg 100 byte_offset(src_reg reg, unsigned bytes) 101 { 102 detail::add_byte_offset(®, bytes); 103 return reg; 104 } 105 106 static inline src_reg 107 offset(src_reg reg, unsigned width, unsigned delta) 108 { 109 const unsigned stride = (reg.file == UNIFORM ? 0 : 4); 110 const unsigned num_components = MAX2(width / 4 * stride, 4); 111 return byte_offset(reg, num_components * type_sz(reg.type) * delta); 112 } 113 114 static inline src_reg 115 horiz_offset(src_reg reg, unsigned delta) 116 { 117 return byte_offset(reg, delta * type_sz(reg.type)); 118 } 119 120 /** 121 * Reswizzle a given source register. 122 * \sa brw_swizzle(). 123 */ 124 static inline src_reg 125 swizzle(src_reg reg, unsigned swizzle) 126 { 127 if (reg.file == IMM) 128 reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swizzle); 129 else 130 reg.swizzle = brw_compose_swizzle(swizzle, reg.swizzle); 131 132 return reg; 133 } 134 135 static inline src_reg 136 negate(src_reg reg) 137 { 138 assert(reg.file != IMM); 139 reg.negate = !reg.negate; 140 return reg; 141 } 142 143 static inline bool 144 is_uniform(const src_reg ®) 145 { 146 return (reg.file == IMM || reg.file == UNIFORM || reg.is_null()) && 147 (!reg.reladdr || is_uniform(*reg.reladdr)); 148 } 149 150 class dst_reg : public backend_reg 151 { 152 public: 153 DECLARE_RALLOC_CXX_OPERATORS(dst_reg) 154 155 void init(); 156 157 dst_reg(); 158 dst_reg(enum brw_reg_file file, int nr); 159 dst_reg(enum brw_reg_file file, int nr, const glsl_type *type, 160 unsigned writemask); 161 dst_reg(enum brw_reg_file file, int nr, brw_reg_type type, 162 unsigned writemask); 163 dst_reg(struct ::brw_reg reg); 164 dst_reg(class vec4_visitor *v, const struct glsl_type *type); 165 166 explicit dst_reg(const src_reg ®); 167 168 bool equals(const dst_reg &r) const; 169 170 src_reg *reladdr; 171 }; 172 173 static inline dst_reg 174 retype(dst_reg reg, enum brw_reg_type type) 175 { 176 reg.type = type; 177 return reg; 178 } 179 180 static inline dst_reg 181 byte_offset(dst_reg reg, unsigned bytes) 182 { 183 detail::add_byte_offset(®, bytes); 184 return reg; 185 } 186 187 static inline dst_reg 188 offset(dst_reg reg, unsigned width, unsigned delta) 189 { 190 const unsigned stride = (reg.file == UNIFORM ? 0 : 4); 191 const unsigned num_components = MAX2(width / 4 * stride, 4); 192 return byte_offset(reg, num_components * type_sz(reg.type) * delta); 193 } 194 195 static inline dst_reg 196 horiz_offset(dst_reg reg, unsigned delta) 197 { 198 return byte_offset(reg, delta * type_sz(reg.type)); 199 } 200 201 static inline dst_reg 202 writemask(dst_reg reg, unsigned mask) 203 { 204 assert(reg.file != IMM); 205 assert((reg.writemask & mask) != 0); 206 reg.writemask &= mask; 207 return reg; 208 } 209 210 /** 211 * Return an integer identifying the discrete address space a register is 212 * contained in. A register is by definition fully contained in the single 213 * reg_space it belongs to, so two registers with different reg_space ids are 214 * guaranteed not to overlap. Most register files are a single reg_space of 215 * its own, only the VGRF file is composed of multiple discrete address 216 * spaces, one for each VGRF allocation. 217 */ 218 static inline uint32_t 219 reg_space(const backend_reg &r) 220 { 221 return r.file << 16 | (r.file == VGRF ? r.nr : 0); 222 } 223 224 /** 225 * Return the base offset in bytes of a register relative to the start of its 226 * reg_space(). 227 */ 228 static inline unsigned 229 reg_offset(const backend_reg &r) 230 { 231 return (r.file == VGRF || r.file == IMM ? 0 : r.nr) * 232 (r.file == UNIFORM ? 16 : REG_SIZE) + r.offset + 233 (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0); 234 } 235 236 /** 237 * Return whether the register region starting at \p r and spanning \p dr 238 * bytes could potentially overlap the register region starting at \p s and 239 * spanning \p ds bytes. 240 */ 241 static inline bool 242 regions_overlap(const backend_reg &r, unsigned dr, 243 const backend_reg &s, unsigned ds) 244 { 245 if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) { 246 /* COMPR4 regions are translated by the hardware during decompression 247 * into two separate half-regions 4 MRFs apart from each other. 248 */ 249 backend_reg t0 = r; 250 t0.nr &= ~BRW_MRF_COMPR4; 251 backend_reg t1 = t0; 252 t1.offset += 4 * REG_SIZE; 253 return regions_overlap(t0, dr / 2, s, ds) || 254 regions_overlap(t1, dr / 2, s, ds); 255 256 } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) { 257 return regions_overlap(s, ds, r, dr); 258 259 } else { 260 return reg_space(r) == reg_space(s) && 261 !(reg_offset(r) + dr <= reg_offset(s) || 262 reg_offset(s) + ds <= reg_offset(r)); 263 } 264 } 265 266 class vec4_instruction : public backend_instruction { 267 public: 268 DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction) 269 270 vec4_instruction(enum opcode opcode, 271 const dst_reg &dst = dst_reg(), 272 const src_reg &src0 = src_reg(), 273 const src_reg &src1 = src_reg(), 274 const src_reg &src2 = src_reg()); 275 276 dst_reg dst; 277 src_reg src[3]; 278 279 enum brw_urb_write_flags urb_write_flags; 280 281 unsigned sol_binding; /**< gen6: SOL binding table index */ 282 bool sol_final_write; /**< gen6: send commit message */ 283 unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header */ 284 285 bool is_send_from_grf(); 286 unsigned size_read(unsigned arg) const; 287 bool can_reswizzle(const struct gen_device_info *devinfo, int dst_writemask, 288 int swizzle, int swizzle_mask); 289 void reswizzle(int dst_writemask, int swizzle); 290 bool can_do_source_mods(const struct gen_device_info *devinfo); 291 bool can_do_writemask(const struct gen_device_info *devinfo); 292 bool can_change_types() const; 293 bool has_source_and_destination_hazard() const; 294 295 bool is_align1_partial_write() 296 { 297 return opcode == VEC4_OPCODE_SET_LOW_32BIT || 298 opcode == VEC4_OPCODE_SET_HIGH_32BIT; 299 } 300 301 bool reads_flag() 302 { 303 return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2; 304 } 305 306 bool reads_flag(unsigned c) 307 { 308 if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2) 309 return true; 310 311 switch (predicate) { 312 case BRW_PREDICATE_NONE: 313 return false; 314 case BRW_PREDICATE_ALIGN16_REPLICATE_X: 315 return c == 0; 316 case BRW_PREDICATE_ALIGN16_REPLICATE_Y: 317 return c == 1; 318 case BRW_PREDICATE_ALIGN16_REPLICATE_Z: 319 return c == 2; 320 case BRW_PREDICATE_ALIGN16_REPLICATE_W: 321 return c == 3; 322 default: 323 return true; 324 } 325 } 326 327 bool writes_flag() 328 { 329 return (conditional_mod && (opcode != BRW_OPCODE_SEL && 330 opcode != BRW_OPCODE_IF && 331 opcode != BRW_OPCODE_WHILE)); 332 } 333 }; 334 335 /** 336 * Make the execution of \p inst dependent on the evaluation of a possibly 337 * inverted predicate. 338 */ 339 inline vec4_instruction * 340 set_predicate_inv(enum brw_predicate pred, bool inverse, 341 vec4_instruction *inst) 342 { 343 inst->predicate = pred; 344 inst->predicate_inverse = inverse; 345 return inst; 346 } 347 348 /** 349 * Make the execution of \p inst dependent on the evaluation of a predicate. 350 */ 351 inline vec4_instruction * 352 set_predicate(enum brw_predicate pred, vec4_instruction *inst) 353 { 354 return set_predicate_inv(pred, false, inst); 355 } 356 357 /** 358 * Write the result of evaluating the condition given by \p mod to a flag 359 * register. 360 */ 361 inline vec4_instruction * 362 set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst) 363 { 364 inst->conditional_mod = mod; 365 return inst; 366 } 367 368 /** 369 * Clamp the result of \p inst to the saturation range of its destination 370 * datatype. 371 */ 372 inline vec4_instruction * 373 set_saturate(bool saturate, vec4_instruction *inst) 374 { 375 inst->saturate = saturate; 376 return inst; 377 } 378 379 /** 380 * Return the number of dataflow registers written by the instruction (either 381 * fully or partially) counted from 'floor(reg_offset(inst->dst) / 382 * register_size)'. The somewhat arbitrary register size unit is 16B for the 383 * UNIFORM and IMM files and 32B for all other files. 384 */ 385 inline unsigned 386 regs_written(const vec4_instruction *inst) 387 { 388 assert(inst->dst.file != UNIFORM && inst->dst.file != IMM); 389 return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE + inst->size_written, 390 REG_SIZE); 391 } 392 393 /** 394 * Return the number of dataflow registers read by the instruction (either 395 * fully or partially) counted from 'floor(reg_offset(inst->src[i]) / 396 * register_size)'. The somewhat arbitrary register size unit is 16B for the 397 * UNIFORM and IMM files and 32B for all other files. 398 */ 399 inline unsigned 400 regs_read(const vec4_instruction *inst, unsigned i) 401 { 402 const unsigned reg_size = 403 inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 16 : REG_SIZE; 404 return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + inst->size_read(i), 405 reg_size); 406 } 407 408 } /* namespace brw */ 409 410 #endif 411