Home | History | Annotate | Download | only in compiler
      1 /* -*- c++ -*- */
      2 /*
      3  * Copyright  2011-2015 Intel Corporation
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9  * and/or sell copies of the Software, and to permit persons to whom the
     10  * Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice (including the next
     13  * paragraph) shall be included in all copies or substantial portions of the
     14  * Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     22  * IN THE SOFTWARE.
     23  */
     24 
     25 #ifndef BRW_IR_VEC4_H
     26 #define BRW_IR_VEC4_H
     27 
     28 #include "brw_shader.h"
     29 
     30 namespace brw {
     31 
     32 class dst_reg;
     33 
     34 class src_reg : public backend_reg
     35 {
     36 public:
     37    DECLARE_RALLOC_CXX_OPERATORS(src_reg)
     38 
     39    void init();
     40 
     41    src_reg(enum brw_reg_file file, int nr, const glsl_type *type);
     42    src_reg();
     43    src_reg(struct ::brw_reg reg);
     44 
     45    bool equals(const src_reg &r) const;
     46 
     47    src_reg(class vec4_visitor *v, const struct glsl_type *type);
     48    src_reg(class vec4_visitor *v, const struct glsl_type *type, int size);
     49 
     50    explicit src_reg(const dst_reg &reg);
     51 
     52    src_reg *reladdr;
     53 };
     54 
     55 static inline src_reg
     56 retype(src_reg reg, enum brw_reg_type type)
     57 {
     58    reg.type = type;
     59    return reg;
     60 }
     61 
     62 namespace detail {
     63 
     64 static inline void
     65 add_byte_offset(backend_reg *reg, unsigned bytes)
     66 {
     67    switch (reg->file) {
     68       case BAD_FILE:
     69          break;
     70       case VGRF:
     71       case ATTR:
     72       case UNIFORM:
     73          reg->offset += bytes;
     74          assert(reg->offset % 16 == 0);
     75          break;
     76       case MRF: {
     77          const unsigned suboffset = reg->offset + bytes;
     78          reg->nr += suboffset / REG_SIZE;
     79          reg->offset = suboffset % REG_SIZE;
     80          assert(reg->offset % 16 == 0);
     81          break;
     82       }
     83       case ARF:
     84       case FIXED_GRF: {
     85          const unsigned suboffset = reg->subnr + bytes;
     86          reg->nr += suboffset / REG_SIZE;
     87          reg->subnr = suboffset % REG_SIZE;
     88          assert(reg->subnr % 16 == 0);
     89          break;
     90       }
     91       default:
     92          assert(bytes == 0);
     93    }
     94 }
     95 
     96 } /* namepace detail */
     97 
     98 static inline src_reg
     99 byte_offset(src_reg reg, unsigned bytes)
    100 {
    101    detail::add_byte_offset(&reg, bytes);
    102    return reg;
    103 }
    104 
    105 static inline src_reg
    106 offset(src_reg reg, unsigned width, unsigned delta)
    107 {
    108    const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
    109    const unsigned num_components = MAX2(width / 4 * stride, 4);
    110    return byte_offset(reg, num_components * type_sz(reg.type) * delta);
    111 }
    112 
    113 static inline src_reg
    114 horiz_offset(src_reg reg, unsigned delta)
    115 {
    116    return byte_offset(reg, delta * type_sz(reg.type));
    117 }
    118 
    119 /**
    120  * Reswizzle a given source register.
    121  * \sa brw_swizzle().
    122  */
    123 static inline src_reg
    124 swizzle(src_reg reg, unsigned swizzle)
    125 {
    126    if (reg.file == IMM)
    127       reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swizzle);
    128    else
    129       reg.swizzle = brw_compose_swizzle(swizzle, reg.swizzle);
    130 
    131    return reg;
    132 }
    133 
    134 static inline src_reg
    135 negate(src_reg reg)
    136 {
    137    assert(reg.file != IMM);
    138    reg.negate = !reg.negate;
    139    return reg;
    140 }
    141 
    142 static inline bool
    143 is_uniform(const src_reg &reg)
    144 {
    145    return (reg.file == IMM || reg.file == UNIFORM || reg.is_null()) &&
    146           (!reg.reladdr || is_uniform(*reg.reladdr));
    147 }
    148 
    149 class dst_reg : public backend_reg
    150 {
    151 public:
    152    DECLARE_RALLOC_CXX_OPERATORS(dst_reg)
    153 
    154    void init();
    155 
    156    dst_reg();
    157    dst_reg(enum brw_reg_file file, int nr);
    158    dst_reg(enum brw_reg_file file, int nr, const glsl_type *type,
    159            unsigned writemask);
    160    dst_reg(enum brw_reg_file file, int nr, brw_reg_type type,
    161            unsigned writemask);
    162    dst_reg(struct ::brw_reg reg);
    163    dst_reg(class vec4_visitor *v, const struct glsl_type *type);
    164 
    165    explicit dst_reg(const src_reg &reg);
    166 
    167    bool equals(const dst_reg &r) const;
    168 
    169    src_reg *reladdr;
    170 };
    171 
    172 static inline dst_reg
    173 retype(dst_reg reg, enum brw_reg_type type)
    174 {
    175    reg.type = type;
    176    return reg;
    177 }
    178 
    179 static inline dst_reg
    180 byte_offset(dst_reg reg, unsigned bytes)
    181 {
    182    detail::add_byte_offset(&reg, bytes);
    183    return reg;
    184 }
    185 
    186 static inline dst_reg
    187 offset(dst_reg reg, unsigned width, unsigned delta)
    188 {
    189    const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
    190    const unsigned num_components = MAX2(width / 4 * stride, 4);
    191    return byte_offset(reg, num_components * type_sz(reg.type) * delta);
    192 }
    193 
    194 static inline dst_reg
    195 horiz_offset(const dst_reg &reg, unsigned delta)
    196 {
    197    if (is_uniform(src_reg(reg)))
    198       return reg;
    199    else
    200       return byte_offset(reg, delta * type_sz(reg.type));
    201 }
    202 
    203 static inline dst_reg
    204 writemask(dst_reg reg, unsigned mask)
    205 {
    206    assert(reg.file != IMM);
    207    assert((reg.writemask & mask) != 0);
    208    reg.writemask &= mask;
    209    return reg;
    210 }
    211 
    212 /**
    213  * Return an integer identifying the discrete address space a register is
    214  * contained in.  A register is by definition fully contained in the single
    215  * reg_space it belongs to, so two registers with different reg_space ids are
    216  * guaranteed not to overlap.  Most register files are a single reg_space of
    217  * its own, only the VGRF file is composed of multiple discrete address
    218  * spaces, one for each VGRF allocation.
    219  */
    220 static inline uint32_t
    221 reg_space(const backend_reg &r)
    222 {
    223    return r.file << 16 | (r.file == VGRF ? r.nr : 0);
    224 }
    225 
    226 /**
    227  * Return the base offset in bytes of a register relative to the start of its
    228  * reg_space().
    229  */
    230 static inline unsigned
    231 reg_offset(const backend_reg &r)
    232 {
    233    return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
    234           (r.file == UNIFORM ? 16 : REG_SIZE) + r.offset +
    235           (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
    236 }
    237 
    238 /**
    239  * Return whether the register region starting at \p r and spanning \p dr
    240  * bytes could potentially overlap the register region starting at \p s and
    241  * spanning \p ds bytes.
    242  */
    243 static inline bool
    244 regions_overlap(const backend_reg &r, unsigned dr,
    245                 const backend_reg &s, unsigned ds)
    246 {
    247    if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
    248       /* COMPR4 regions are translated by the hardware during decompression
    249        * into two separate half-regions 4 MRFs apart from each other.
    250        */
    251       backend_reg t0 = r;
    252       t0.nr &= ~BRW_MRF_COMPR4;
    253       backend_reg t1 = t0;
    254       t1.offset += 4 * REG_SIZE;
    255       return regions_overlap(t0, dr / 2, s, ds) ||
    256              regions_overlap(t1, dr / 2, s, ds);
    257 
    258    } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
    259       return regions_overlap(s, ds, r, dr);
    260 
    261    } else {
    262       return reg_space(r) == reg_space(s) &&
    263              !(reg_offset(r) + dr <= reg_offset(s) ||
    264                reg_offset(s) + ds <= reg_offset(r));
    265    }
    266 }
    267 
    268 class vec4_instruction : public backend_instruction {
    269 public:
    270    DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction)
    271 
    272    vec4_instruction(enum opcode opcode,
    273                     const dst_reg &dst = dst_reg(),
    274                     const src_reg &src0 = src_reg(),
    275                     const src_reg &src1 = src_reg(),
    276                     const src_reg &src2 = src_reg());
    277 
    278    dst_reg dst;
    279    src_reg src[3];
    280 
    281    enum brw_urb_write_flags urb_write_flags;
    282 
    283    unsigned sol_binding; /**< gen6: SOL binding table index */
    284    bool sol_final_write; /**< gen6: send commit message */
    285    unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header */
    286 
    287    bool is_send_from_grf();
    288    unsigned size_read(unsigned arg) const;
    289    bool can_reswizzle(const struct gen_device_info *devinfo, int dst_writemask,
    290                       int swizzle, int swizzle_mask);
    291    void reswizzle(int dst_writemask, int swizzle);
    292    bool can_do_source_mods(const struct gen_device_info *devinfo);
    293    bool can_do_writemask(const struct gen_device_info *devinfo);
    294    bool can_change_types() const;
    295    bool has_source_and_destination_hazard() const;
    296 
    297    bool is_align1_partial_write()
    298    {
    299       return opcode == VEC4_OPCODE_SET_LOW_32BIT ||
    300              opcode == VEC4_OPCODE_SET_HIGH_32BIT;
    301    }
    302 
    303    bool reads_flag()
    304    {
    305       return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2;
    306    }
    307 
    308    bool reads_flag(unsigned c)
    309    {
    310       if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
    311          return true;
    312 
    313       switch (predicate) {
    314       case BRW_PREDICATE_NONE:
    315          return false;
    316       case BRW_PREDICATE_ALIGN16_REPLICATE_X:
    317          return c == 0;
    318       case BRW_PREDICATE_ALIGN16_REPLICATE_Y:
    319          return c == 1;
    320       case BRW_PREDICATE_ALIGN16_REPLICATE_Z:
    321          return c == 2;
    322       case BRW_PREDICATE_ALIGN16_REPLICATE_W:
    323          return c == 3;
    324       default:
    325          return true;
    326       }
    327    }
    328 
    329    bool writes_flag()
    330    {
    331       return (conditional_mod && (opcode != BRW_OPCODE_SEL &&
    332                                   opcode != BRW_OPCODE_IF &&
    333                                   opcode != BRW_OPCODE_WHILE));
    334    }
    335 
    336    bool reads_g0_implicitly() const
    337    {
    338       switch (opcode) {
    339       case SHADER_OPCODE_TEX:
    340       case SHADER_OPCODE_TXL:
    341       case SHADER_OPCODE_TXD:
    342       case SHADER_OPCODE_TXF:
    343       case SHADER_OPCODE_TXF_CMS_W:
    344       case SHADER_OPCODE_TXF_CMS:
    345       case SHADER_OPCODE_TXF_MCS:
    346       case SHADER_OPCODE_TXS:
    347       case SHADER_OPCODE_TG4:
    348       case SHADER_OPCODE_TG4_OFFSET:
    349       case SHADER_OPCODE_SAMPLEINFO:
    350       case VS_OPCODE_PULL_CONSTANT_LOAD:
    351       case GS_OPCODE_SET_PRIMITIVE_ID:
    352       case GS_OPCODE_GET_INSTANCE_ID:
    353       case SHADER_OPCODE_GEN4_SCRATCH_READ:
    354       case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
    355          return true;
    356       default:
    357          return false;
    358       }
    359    }
    360 };
    361 
    362 /**
    363  * Make the execution of \p inst dependent on the evaluation of a possibly
    364  * inverted predicate.
    365  */
    366 inline vec4_instruction *
    367 set_predicate_inv(enum brw_predicate pred, bool inverse,
    368                   vec4_instruction *inst)
    369 {
    370    inst->predicate = pred;
    371    inst->predicate_inverse = inverse;
    372    return inst;
    373 }
    374 
    375 /**
    376  * Make the execution of \p inst dependent on the evaluation of a predicate.
    377  */
    378 inline vec4_instruction *
    379 set_predicate(enum brw_predicate pred, vec4_instruction *inst)
    380 {
    381    return set_predicate_inv(pred, false, inst);
    382 }
    383 
    384 /**
    385  * Write the result of evaluating the condition given by \p mod to a flag
    386  * register.
    387  */
    388 inline vec4_instruction *
    389 set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst)
    390 {
    391    inst->conditional_mod = mod;
    392    return inst;
    393 }
    394 
    395 /**
    396  * Clamp the result of \p inst to the saturation range of its destination
    397  * datatype.
    398  */
    399 inline vec4_instruction *
    400 set_saturate(bool saturate, vec4_instruction *inst)
    401 {
    402    inst->saturate = saturate;
    403    return inst;
    404 }
    405 
    406 /**
    407  * Return the number of dataflow registers written by the instruction (either
    408  * fully or partially) counted from 'floor(reg_offset(inst->dst) /
    409  * register_size)'.  The somewhat arbitrary register size unit is 16B for the
    410  * UNIFORM and IMM files and 32B for all other files.
    411  */
    412 inline unsigned
    413 regs_written(const vec4_instruction *inst)
    414 {
    415    assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
    416    return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE + inst->size_written,
    417                        REG_SIZE);
    418 }
    419 
    420 /**
    421  * Return the number of dataflow registers read by the instruction (either
    422  * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
    423  * register_size)'.  The somewhat arbitrary register size unit is 16B for the
    424  * UNIFORM and IMM files and 32B for all other files.
    425  */
    426 inline unsigned
    427 regs_read(const vec4_instruction *inst, unsigned i)
    428 {
    429    const unsigned reg_size =
    430       inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 16 : REG_SIZE;
    431    return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + inst->size_read(i),
    432                        reg_size);
    433 }
    434 
    435 static inline enum brw_reg_type
    436 get_exec_type(const vec4_instruction *inst)
    437 {
    438    enum brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
    439 
    440    for (int i = 0; i < 3; i++) {
    441       if (inst->src[i].file != BAD_FILE) {
    442          const brw_reg_type t = get_exec_type(brw_reg_type(inst->src[i].type));
    443          if (type_sz(t) > type_sz(exec_type))
    444             exec_type = t;
    445          else if (type_sz(t) == type_sz(exec_type) &&
    446                   brw_reg_type_is_floating_point(t))
    447             exec_type = t;
    448       }
    449    }
    450 
    451    if (exec_type == BRW_REGISTER_TYPE_B)
    452       exec_type = inst->dst.type;
    453 
    454    /* TODO: We need to handle half-float conversions. */
    455    assert(exec_type != BRW_REGISTER_TYPE_HF ||
    456           inst->dst.type == BRW_REGISTER_TYPE_HF);
    457    assert(exec_type != BRW_REGISTER_TYPE_B);
    458 
    459    return exec_type;
    460 }
    461 
    462 static inline unsigned
    463 get_exec_type_size(const vec4_instruction *inst)
    464 {
    465    return type_sz(get_exec_type(inst));
    466 }
    467 
    468 } /* namespace brw */
    469 
    470 #endif
    471