Home | History | Annotate | Download | only in i965
      1 /* -*- c++ -*- */
      2 /*
      3  * Copyright  2010-2015 Intel Corporation
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9  * and/or sell copies of the Software, and to permit persons to whom the
     10  * Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice (including the next
     13  * paragraph) shall be included in all copies or substantial portions of the
     14  * Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     22  * IN THE SOFTWARE.
     23  */
     24 
     25 #ifndef BRW_IR_FS_H
     26 #define BRW_IR_FS_H
     27 
     28 #include "brw_shader.h"
     29 
     30 class fs_inst;
     31 
     32 class fs_reg : public backend_reg {
     33 public:
     34    DECLARE_RALLOC_CXX_OPERATORS(fs_reg)
     35 
     36    void init();
     37 
     38    fs_reg();
     39    fs_reg(struct ::brw_reg reg);
     40    fs_reg(enum brw_reg_file file, int nr);
     41    fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type);
     42 
     43    bool equals(const fs_reg &r) const;
     44    bool is_contiguous() const;
     45 
     46    /**
     47     * Return the size in bytes of a single logical component of the
     48     * register assuming the given execution width.
     49     */
     50    unsigned component_size(unsigned width) const;
     51 
     52    /** Register region horizontal stride */
     53    uint8_t stride;
     54 };
     55 
     56 static inline fs_reg
     57 negate(fs_reg reg)
     58 {
     59    assert(reg.file != IMM);
     60    reg.negate = !reg.negate;
     61    return reg;
     62 }
     63 
     64 static inline fs_reg
     65 retype(fs_reg reg, enum brw_reg_type type)
     66 {
     67    reg.type = type;
     68    return reg;
     69 }
     70 
     71 static inline fs_reg
     72 byte_offset(fs_reg reg, unsigned delta)
     73 {
     74    switch (reg.file) {
     75    case BAD_FILE:
     76       break;
     77    case VGRF:
     78    case ATTR:
     79    case UNIFORM:
     80       reg.offset += delta;
     81       break;
     82    case MRF: {
     83       const unsigned suboffset = reg.offset + delta;
     84       reg.nr += suboffset / REG_SIZE;
     85       reg.offset = suboffset % REG_SIZE;
     86       break;
     87    }
     88    case ARF:
     89    case FIXED_GRF: {
     90       const unsigned suboffset = reg.subnr + delta;
     91       reg.nr += suboffset / REG_SIZE;
     92       reg.subnr = suboffset % REG_SIZE;
     93       break;
     94    }
     95    case IMM:
     96    default:
     97       assert(delta == 0);
     98    }
     99    return reg;
    100 }
    101 
    102 static inline fs_reg
    103 horiz_offset(const fs_reg &reg, unsigned delta)
    104 {
    105    switch (reg.file) {
    106    case BAD_FILE:
    107    case UNIFORM:
    108    case IMM:
    109       /* These only have a single component that is implicitly splatted.  A
    110        * horizontal offset should be a harmless no-op.
    111        * XXX - Handle vector immediates correctly.
    112        */
    113       return reg;
    114    case VGRF:
    115    case MRF:
    116    case ATTR:
    117       return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
    118    case ARF:
    119    case FIXED_GRF:
    120       if (reg.is_null()) {
    121          return reg;
    122       } else {
    123          const unsigned stride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
    124          return byte_offset(reg, delta * stride * type_sz(reg.type));
    125       }
    126    }
    127    unreachable("Invalid register file");
    128 }
    129 
    130 static inline fs_reg
    131 offset(fs_reg reg, unsigned width, unsigned delta)
    132 {
    133    switch (reg.file) {
    134    case BAD_FILE:
    135       break;
    136    case ARF:
    137    case FIXED_GRF:
    138    case MRF:
    139    case VGRF:
    140    case ATTR:
    141    case UNIFORM:
    142       return byte_offset(reg, delta * reg.component_size(width));
    143    case IMM:
    144       assert(delta == 0);
    145    }
    146    return reg;
    147 }
    148 
    149 /**
    150  * Get the scalar channel of \p reg given by \p idx and replicate it to all
    151  * channels of the result.
    152  */
    153 static inline fs_reg
    154 component(fs_reg reg, unsigned idx)
    155 {
    156    reg = horiz_offset(reg, idx);
    157    reg.stride = 0;
    158    return reg;
    159 }
    160 
    161 /**
    162  * Return an integer identifying the discrete address space a register is
    163  * contained in.  A register is by definition fully contained in the single
    164  * reg_space it belongs to, so two registers with different reg_space ids are
    165  * guaranteed not to overlap.  Most register files are a single reg_space of
    166  * its own, only the VGRF file is composed of multiple discrete address
    167  * spaces, one for each VGRF allocation.
    168  */
    169 static inline uint32_t
    170 reg_space(const fs_reg &r)
    171 {
    172    return r.file << 16 | (r.file == VGRF ? r.nr : 0);
    173 }
    174 
    175 /**
    176  * Return the base offset in bytes of a register relative to the start of its
    177  * reg_space().
    178  */
    179 static inline unsigned
    180 reg_offset(const fs_reg &r)
    181 {
    182    return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
    183           (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
    184           (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
    185 }
    186 
    187 /**
    188  * Return the amount of padding in bytes left unused between individual
    189  * components of register \p r due to a (horizontal) stride value greater than
    190  * one, or zero if components are tightly packed in the register file.
    191  */
    192 static inline unsigned
    193 reg_padding(const fs_reg &r)
    194 {
    195    const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride :
    196                             r.hstride == 0 ? 0 :
    197                             1 << (r.hstride - 1));
    198    return (MAX2(1, stride) - 1) * type_sz(r.type);
    199 }
    200 
    201 /**
    202  * Return whether the register region starting at \p r and spanning \p dr
    203  * bytes could potentially overlap the register region starting at \p s and
    204  * spanning \p ds bytes.
    205  */
    206 static inline bool
    207 regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
    208 {
    209    if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
    210       fs_reg t = r;
    211       t.nr &= ~BRW_MRF_COMPR4;
    212       /* COMPR4 regions are translated by the hardware during decompression
    213        * into two separate half-regions 4 MRFs apart from each other.
    214        */
    215       return regions_overlap(t, dr / 2, s, ds) ||
    216              regions_overlap(byte_offset(t, 4 * REG_SIZE), dr / 2, s, ds);
    217 
    218    } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
    219       return regions_overlap(s, ds, r, dr);
    220 
    221    } else {
    222       return reg_space(r) == reg_space(s) &&
    223              !(reg_offset(r) + dr <= reg_offset(s) ||
    224                reg_offset(s) + ds <= reg_offset(r));
    225    }
    226 }
    227 
    228 /**
    229  * Check that the register region given by r [r.offset, r.offset + dr[
    230  * is fully contained inside the register region given by s
    231  * [s.offset, s.offset + ds[.
    232  */
    233 static inline bool
    234 region_contained_in(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
    235 {
    236    return reg_space(r) == reg_space(s) &&
    237           reg_offset(r) >= reg_offset(s) &&
    238           reg_offset(r) + dr <= reg_offset(s) + ds;
    239 }
    240 
    241 /**
    242  * Return whether the given register region is n-periodic, i.e. whether the
    243  * original region remains invariant after shifting it by \p n scalar
    244  * channels.
    245  */
    246 static inline bool
    247 is_periodic(const fs_reg &reg, unsigned n)
    248 {
    249    if (reg.file == BAD_FILE || reg.is_null()) {
    250       return true;
    251 
    252    } else if (reg.file == IMM) {
    253       const unsigned period = (reg.type == BRW_REGISTER_TYPE_UV ||
    254                                reg.type == BRW_REGISTER_TYPE_V ? 8 :
    255                                reg.type == BRW_REGISTER_TYPE_VF ? 4 :
    256                                1);
    257       return n % period == 0;
    258 
    259    } else if (reg.file == ARF || reg.file == FIXED_GRF) {
    260       const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 :
    261                                reg.vstride == 0 ? 1 << reg.width :
    262                                ~0);
    263       return n % period == 0;
    264 
    265    } else {
    266       return reg.stride == 0;
    267    }
    268 }
    269 
    270 static inline bool
    271 is_uniform(const fs_reg &reg)
    272 {
    273    return is_periodic(reg, 1);
    274 }
    275 
    276 /**
    277  * Get the specified 8-component quarter of a register.
    278  * XXX - Maybe come up with a less misleading name for this (e.g. quarter())?
    279  */
    280 static inline fs_reg
    281 half(const fs_reg &reg, unsigned idx)
    282 {
    283    assert(idx < 2);
    284    return horiz_offset(reg, 8 * idx);
    285 }
    286 
    287 /**
    288  * Reinterpret each channel of register \p reg as a vector of values of the
    289  * given smaller type and take the i-th subcomponent from each.
    290  */
    291 static inline fs_reg
    292 subscript(fs_reg reg, brw_reg_type type, unsigned i)
    293 {
    294    assert((i + 1) * type_sz(type) <= type_sz(reg.type));
    295 
    296    if (reg.file == ARF || reg.file == FIXED_GRF) {
    297       /* The stride is encoded inconsistently for fixed GRF and ARF registers
    298        * as the log2 of the actual vertical and horizontal strides.
    299        */
    300       const int delta = _mesa_logbase2(type_sz(reg.type)) -
    301                         _mesa_logbase2(type_sz(type));
    302       reg.hstride += (reg.hstride ? delta : 0);
    303       reg.vstride += (reg.vstride ? delta : 0);
    304 
    305    } else if (reg.file == IMM) {
    306       assert(reg.type == type);
    307 
    308    } else {
    309       reg.stride *= type_sz(reg.type) / type_sz(type);
    310    }
    311 
    312    return byte_offset(retype(reg, type), i * type_sz(type));
    313 }
    314 
    315 static const fs_reg reg_undef;
    316 
    317 class fs_inst : public backend_instruction {
    318    fs_inst &operator=(const fs_inst &);
    319 
    320    void init(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
    321              const fs_reg *src, unsigned sources);
    322 
    323 public:
    324    DECLARE_RALLOC_CXX_OPERATORS(fs_inst)
    325 
    326    fs_inst();
    327    fs_inst(enum opcode opcode, uint8_t exec_size);
    328    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst);
    329    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
    330            const fs_reg &src0);
    331    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
    332            const fs_reg &src0, const fs_reg &src1);
    333    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
    334            const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
    335    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
    336            const fs_reg src[], unsigned sources);
    337    fs_inst(const fs_inst &that);
    338    ~fs_inst();
    339 
    340    void resize_sources(uint8_t num_sources);
    341 
    342    bool equals(fs_inst *inst) const;
    343    bool is_send_from_grf() const;
    344    bool is_partial_write() const;
    345    bool is_copy_payload(const brw::simple_allocator &grf_alloc) const;
    346    unsigned components_read(unsigned i) const;
    347    unsigned size_read(int arg) const;
    348    bool can_do_source_mods(const struct gen_device_info *devinfo);
    349    bool can_change_types() const;
    350    bool has_side_effects() const;
    351    bool has_source_and_destination_hazard() const;
    352 
    353    /**
    354     * Return the subset of flag registers read by the instruction as a bitset
    355     * with byte granularity.
    356     */
    357    unsigned flags_read(const gen_device_info *devinfo) const;
    358 
    359    /**
    360     * Return the subset of flag registers updated by the instruction (either
    361     * partially or fully) as a bitset with byte granularity.
    362     */
    363    unsigned flags_written() const;
    364 
    365    fs_reg dst;
    366    fs_reg *src;
    367 
    368    uint8_t sources; /**< Number of fs_reg sources. */
    369 
    370    bool eot:1;
    371    bool pi_noperspective:1;   /**< Pixel interpolator noperspective flag */
    372 };
    373 
    374 /**
    375  * Make the execution of \p inst dependent on the evaluation of a possibly
    376  * inverted predicate.
    377  */
    378 static inline fs_inst *
    379 set_predicate_inv(enum brw_predicate pred, bool inverse,
    380                   fs_inst *inst)
    381 {
    382    inst->predicate = pred;
    383    inst->predicate_inverse = inverse;
    384    return inst;
    385 }
    386 
    387 /**
    388  * Make the execution of \p inst dependent on the evaluation of a predicate.
    389  */
    390 static inline fs_inst *
    391 set_predicate(enum brw_predicate pred, fs_inst *inst)
    392 {
    393    return set_predicate_inv(pred, false, inst);
    394 }
    395 
    396 /**
    397  * Write the result of evaluating the condition given by \p mod to a flag
    398  * register.
    399  */
    400 static inline fs_inst *
    401 set_condmod(enum brw_conditional_mod mod, fs_inst *inst)
    402 {
    403    inst->conditional_mod = mod;
    404    return inst;
    405 }
    406 
    407 /**
    408  * Clamp the result of \p inst to the saturation range of its destination
    409  * datatype.
    410  */
    411 static inline fs_inst *
    412 set_saturate(bool saturate, fs_inst *inst)
    413 {
    414    inst->saturate = saturate;
    415    return inst;
    416 }
    417 
    418 /**
    419  * Return the number of dataflow registers written by the instruction (either
    420  * fully or partially) counted from 'floor(reg_offset(inst->dst) /
    421  * register_size)'.  The somewhat arbitrary register size unit is 4B for the
    422  * UNIFORM and IMM files and 32B for all other files.
    423  */
    424 inline unsigned
    425 regs_written(const fs_inst *inst)
    426 {
    427    assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
    428    return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE +
    429                        inst->size_written -
    430                        MIN2(inst->size_written, reg_padding(inst->dst)),
    431                        REG_SIZE);
    432 }
    433 
    434 /**
    435  * Return the number of dataflow registers read by the instruction (either
    436  * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
    437  * register_size)'.  The somewhat arbitrary register size unit is 4B for the
    438  * UNIFORM and IMM files and 32B for all other files.
    439  */
    440 inline unsigned
    441 regs_read(const fs_inst *inst, unsigned i)
    442 {
    443    const unsigned reg_size =
    444       inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 4 : REG_SIZE;
    445    return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size +
    446                        inst->size_read(i) -
    447                        MIN2(inst->size_read(i), reg_padding(inst->src[i])),
    448                        reg_size);
    449 }
    450 
    451 #endif
    452