Home | History | Annotate | Download | only in i965
      1 /* -*- c++ -*- */
      2 /*
      3  * Copyright  2011-2015 Intel Corporation
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9  * and/or sell copies of the Software, and to permit persons to whom the
     10  * Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice (including the next
     13  * paragraph) shall be included in all copies or substantial portions of the
     14  * Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     22  * IN THE SOFTWARE.
     23  */
     24 
     25 #ifndef BRW_IR_VEC4_H
     26 #define BRW_IR_VEC4_H
     27 
     28 #include "brw_shader.h"
     29 #include "brw_context.h"
     30 
     31 namespace brw {
     32 
     33 class dst_reg;
     34 
     35 class src_reg : public backend_reg
     36 {
     37 public:
     38    DECLARE_RALLOC_CXX_OPERATORS(src_reg)
     39 
     40    void init();
     41 
     42    src_reg(enum brw_reg_file file, int nr, const glsl_type *type);
     43    src_reg();
     44    src_reg(struct ::brw_reg reg);
     45 
     46    bool equals(const src_reg &r) const;
     47 
     48    src_reg(class vec4_visitor *v, const struct glsl_type *type);
     49    src_reg(class vec4_visitor *v, const struct glsl_type *type, int size);
     50 
     51    explicit src_reg(const dst_reg &reg);
     52 
     53    src_reg *reladdr;
     54 };
     55 
     56 static inline src_reg
     57 retype(src_reg reg, enum brw_reg_type type)
     58 {
     59    reg.type = type;
     60    return reg;
     61 }
     62 
     63 namespace detail {
     64 
     65 static inline void
     66 add_byte_offset(backend_reg *reg, unsigned bytes)
     67 {
     68    switch (reg->file) {
     69       case BAD_FILE:
     70          break;
     71       case VGRF:
     72       case ATTR:
     73       case UNIFORM:
     74          reg->offset += bytes;
     75          assert(reg->offset % 16 == 0);
     76          break;
     77       case MRF: {
     78          const unsigned suboffset = reg->offset + bytes;
     79          reg->nr += suboffset / REG_SIZE;
     80          reg->offset = suboffset % REG_SIZE;
     81          assert(reg->offset % 16 == 0);
     82          break;
     83       }
     84       case ARF:
     85       case FIXED_GRF: {
     86          const unsigned suboffset = reg->subnr + bytes;
     87          reg->nr += suboffset / REG_SIZE;
     88          reg->subnr = suboffset % REG_SIZE;
     89          assert(reg->subnr % 16 == 0);
     90          break;
     91       }
     92       default:
     93          assert(bytes == 0);
     94    }
     95 }
     96 
     97 } /* namepace detail */
     98 
     99 static inline src_reg
    100 byte_offset(src_reg reg, unsigned bytes)
    101 {
    102    detail::add_byte_offset(&reg, bytes);
    103    return reg;
    104 }
    105 
    106 static inline src_reg
    107 offset(src_reg reg, unsigned width, unsigned delta)
    108 {
    109    const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
    110    const unsigned num_components = MAX2(width / 4 * stride, 4);
    111    return byte_offset(reg, num_components * type_sz(reg.type) * delta);
    112 }
    113 
    114 static inline src_reg
    115 horiz_offset(src_reg reg, unsigned delta)
    116 {
    117    return byte_offset(reg, delta * type_sz(reg.type));
    118 }
    119 
    120 /**
    121  * Reswizzle a given source register.
    122  * \sa brw_swizzle().
    123  */
    124 static inline src_reg
    125 swizzle(src_reg reg, unsigned swizzle)
    126 {
    127    if (reg.file == IMM)
    128       reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swizzle);
    129    else
    130       reg.swizzle = brw_compose_swizzle(swizzle, reg.swizzle);
    131 
    132    return reg;
    133 }
    134 
    135 static inline src_reg
    136 negate(src_reg reg)
    137 {
    138    assert(reg.file != IMM);
    139    reg.negate = !reg.negate;
    140    return reg;
    141 }
    142 
    143 static inline bool
    144 is_uniform(const src_reg &reg)
    145 {
    146    return (reg.file == IMM || reg.file == UNIFORM || reg.is_null()) &&
    147           (!reg.reladdr || is_uniform(*reg.reladdr));
    148 }
    149 
    150 class dst_reg : public backend_reg
    151 {
    152 public:
    153    DECLARE_RALLOC_CXX_OPERATORS(dst_reg)
    154 
    155    void init();
    156 
    157    dst_reg();
    158    dst_reg(enum brw_reg_file file, int nr);
    159    dst_reg(enum brw_reg_file file, int nr, const glsl_type *type,
    160            unsigned writemask);
    161    dst_reg(enum brw_reg_file file, int nr, brw_reg_type type,
    162            unsigned writemask);
    163    dst_reg(struct ::brw_reg reg);
    164    dst_reg(class vec4_visitor *v, const struct glsl_type *type);
    165 
    166    explicit dst_reg(const src_reg &reg);
    167 
    168    bool equals(const dst_reg &r) const;
    169 
    170    src_reg *reladdr;
    171 };
    172 
    173 static inline dst_reg
    174 retype(dst_reg reg, enum brw_reg_type type)
    175 {
    176    reg.type = type;
    177    return reg;
    178 }
    179 
    180 static inline dst_reg
    181 byte_offset(dst_reg reg, unsigned bytes)
    182 {
    183    detail::add_byte_offset(&reg, bytes);
    184    return reg;
    185 }
    186 
    187 static inline dst_reg
    188 offset(dst_reg reg, unsigned width, unsigned delta)
    189 {
    190    const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
    191    const unsigned num_components = MAX2(width / 4 * stride, 4);
    192    return byte_offset(reg, num_components * type_sz(reg.type) * delta);
    193 }
    194 
    195 static inline dst_reg
    196 horiz_offset(dst_reg reg, unsigned delta)
    197 {
    198    return byte_offset(reg, delta * type_sz(reg.type));
    199 }
    200 
    201 static inline dst_reg
    202 writemask(dst_reg reg, unsigned mask)
    203 {
    204    assert(reg.file != IMM);
    205    assert((reg.writemask & mask) != 0);
    206    reg.writemask &= mask;
    207    return reg;
    208 }
    209 
    210 /**
    211  * Return an integer identifying the discrete address space a register is
    212  * contained in.  A register is by definition fully contained in the single
    213  * reg_space it belongs to, so two registers with different reg_space ids are
    214  * guaranteed not to overlap.  Most register files are a single reg_space of
    215  * its own, only the VGRF file is composed of multiple discrete address
    216  * spaces, one for each VGRF allocation.
    217  */
    218 static inline uint32_t
    219 reg_space(const backend_reg &r)
    220 {
    221    return r.file << 16 | (r.file == VGRF ? r.nr : 0);
    222 }
    223 
    224 /**
    225  * Return the base offset in bytes of a register relative to the start of its
    226  * reg_space().
    227  */
    228 static inline unsigned
    229 reg_offset(const backend_reg &r)
    230 {
    231    return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
    232           (r.file == UNIFORM ? 16 : REG_SIZE) + r.offset +
    233           (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
    234 }
    235 
    236 /**
    237  * Return whether the register region starting at \p r and spanning \p dr
    238  * bytes could potentially overlap the register region starting at \p s and
    239  * spanning \p ds bytes.
    240  */
    241 static inline bool
    242 regions_overlap(const backend_reg &r, unsigned dr,
    243                 const backend_reg &s, unsigned ds)
    244 {
    245    if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
    246       /* COMPR4 regions are translated by the hardware during decompression
    247        * into two separate half-regions 4 MRFs apart from each other.
    248        */
    249       backend_reg t0 = r;
    250       t0.nr &= ~BRW_MRF_COMPR4;
    251       backend_reg t1 = t0;
    252       t1.offset += 4 * REG_SIZE;
    253       return regions_overlap(t0, dr / 2, s, ds) ||
    254              regions_overlap(t1, dr / 2, s, ds);
    255 
    256    } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
    257       return regions_overlap(s, ds, r, dr);
    258 
    259    } else {
    260       return reg_space(r) == reg_space(s) &&
    261              !(reg_offset(r) + dr <= reg_offset(s) ||
    262                reg_offset(s) + ds <= reg_offset(r));
    263    }
    264 }
    265 
    266 class vec4_instruction : public backend_instruction {
    267 public:
    268    DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction)
    269 
    270    vec4_instruction(enum opcode opcode,
    271                     const dst_reg &dst = dst_reg(),
    272                     const src_reg &src0 = src_reg(),
    273                     const src_reg &src1 = src_reg(),
    274                     const src_reg &src2 = src_reg());
    275 
    276    dst_reg dst;
    277    src_reg src[3];
    278 
    279    enum brw_urb_write_flags urb_write_flags;
    280 
    281    unsigned sol_binding; /**< gen6: SOL binding table index */
    282    bool sol_final_write; /**< gen6: send commit message */
    283    unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header */
    284 
    285    bool is_send_from_grf();
    286    unsigned size_read(unsigned arg) const;
    287    bool can_reswizzle(const struct gen_device_info *devinfo, int dst_writemask,
    288                       int swizzle, int swizzle_mask);
    289    void reswizzle(int dst_writemask, int swizzle);
    290    bool can_do_source_mods(const struct gen_device_info *devinfo);
    291    bool can_do_writemask(const struct gen_device_info *devinfo);
    292    bool can_change_types() const;
    293    bool has_source_and_destination_hazard() const;
    294 
    295    bool is_align1_partial_write()
    296    {
    297       return opcode == VEC4_OPCODE_SET_LOW_32BIT ||
    298              opcode == VEC4_OPCODE_SET_HIGH_32BIT;
    299    }
    300 
    301    bool reads_flag()
    302    {
    303       return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2;
    304    }
    305 
    306    bool reads_flag(unsigned c)
    307    {
    308       if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
    309          return true;
    310 
    311       switch (predicate) {
    312       case BRW_PREDICATE_NONE:
    313          return false;
    314       case BRW_PREDICATE_ALIGN16_REPLICATE_X:
    315          return c == 0;
    316       case BRW_PREDICATE_ALIGN16_REPLICATE_Y:
    317          return c == 1;
    318       case BRW_PREDICATE_ALIGN16_REPLICATE_Z:
    319          return c == 2;
    320       case BRW_PREDICATE_ALIGN16_REPLICATE_W:
    321          return c == 3;
    322       default:
    323          return true;
    324       }
    325    }
    326 
    327    bool writes_flag()
    328    {
    329       return (conditional_mod && (opcode != BRW_OPCODE_SEL &&
    330                                   opcode != BRW_OPCODE_IF &&
    331                                   opcode != BRW_OPCODE_WHILE));
    332    }
    333 };
    334 
    335 /**
    336  * Make the execution of \p inst dependent on the evaluation of a possibly
    337  * inverted predicate.
    338  */
    339 inline vec4_instruction *
    340 set_predicate_inv(enum brw_predicate pred, bool inverse,
    341                   vec4_instruction *inst)
    342 {
    343    inst->predicate = pred;
    344    inst->predicate_inverse = inverse;
    345    return inst;
    346 }
    347 
    348 /**
    349  * Make the execution of \p inst dependent on the evaluation of a predicate.
    350  */
    351 inline vec4_instruction *
    352 set_predicate(enum brw_predicate pred, vec4_instruction *inst)
    353 {
    354    return set_predicate_inv(pred, false, inst);
    355 }
    356 
    357 /**
    358  * Write the result of evaluating the condition given by \p mod to a flag
    359  * register.
    360  */
    361 inline vec4_instruction *
    362 set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst)
    363 {
    364    inst->conditional_mod = mod;
    365    return inst;
    366 }
    367 
    368 /**
    369  * Clamp the result of \p inst to the saturation range of its destination
    370  * datatype.
    371  */
    372 inline vec4_instruction *
    373 set_saturate(bool saturate, vec4_instruction *inst)
    374 {
    375    inst->saturate = saturate;
    376    return inst;
    377 }
    378 
    379 /**
    380  * Return the number of dataflow registers written by the instruction (either
    381  * fully or partially) counted from 'floor(reg_offset(inst->dst) /
    382  * register_size)'.  The somewhat arbitrary register size unit is 16B for the
    383  * UNIFORM and IMM files and 32B for all other files.
    384  */
    385 inline unsigned
    386 regs_written(const vec4_instruction *inst)
    387 {
    388    assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
    389    return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE + inst->size_written,
    390                        REG_SIZE);
    391 }
    392 
    393 /**
    394  * Return the number of dataflow registers read by the instruction (either
    395  * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
    396  * register_size)'.  The somewhat arbitrary register size unit is 16B for the
    397  * UNIFORM and IMM files and 32B for all other files.
    398  */
    399 inline unsigned
    400 regs_read(const vec4_instruction *inst, unsigned i)
    401 {
    402    const unsigned reg_size =
    403       inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 16 : REG_SIZE;
    404    return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + inst->size_read(i),
    405                        reg_size);
    406 }
    407 
    408 } /* namespace brw */
    409 
    410 #endif
    411