Home | History | Annotate | Download | only in compiler
      1 /* -*- c++ -*- */
      2 /*
      3  * Copyright  2010-2015 Intel Corporation
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9  * and/or sell copies of the Software, and to permit persons to whom the
     10  * Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice (including the next
     13  * paragraph) shall be included in all copies or substantial portions of the
     14  * Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     22  * IN THE SOFTWARE.
     23  */
     24 
     25 #ifndef BRW_FS_BUILDER_H
     26 #define BRW_FS_BUILDER_H
     27 
     28 #include "brw_ir_fs.h"
     29 #include "brw_shader.h"
     30 
     31 namespace brw {
     32    /**
     33     * Toolbox to assemble an FS IR program out of individual instructions.
     34     *
     35     * This object is meant to have an interface consistent with
     36     * brw::vec4_builder.  They cannot be fully interchangeable because
     37     * brw::fs_builder generates scalar code while brw::vec4_builder generates
     38     * vector code.
     39     */
     40    class fs_builder {
     41    public:
     42       /** Type used in this IR to represent a source of an instruction. */
     43       typedef fs_reg src_reg;
     44 
     45       /** Type used in this IR to represent the destination of an instruction. */
     46       typedef fs_reg dst_reg;
     47 
     48       /** Type used in this IR to represent an instruction. */
     49       typedef fs_inst instruction;
     50 
     51       /**
     52        * Construct an fs_builder that inserts instructions into \p shader.
     53        * \p dispatch_width gives the native execution width of the program.
     54        */
     55       fs_builder(backend_shader *shader,
     56                  unsigned dispatch_width) :
     57          shader(shader), block(NULL), cursor(NULL),
     58          _dispatch_width(dispatch_width),
     59          _group(0),
     60          force_writemask_all(false),
     61          annotation()
     62       {
     63       }
     64 
     65       /**
     66        * Construct an fs_builder that inserts instructions into \p shader
     67        * before instruction \p inst in basic block \p block.  The default
     68        * execution controls and debug annotation are initialized from the
     69        * instruction passed as argument.
     70        */
     71       fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) :
     72          shader(shader), block(block), cursor(inst),
     73          _dispatch_width(inst->exec_size),
     74          _group(inst->group),
     75          force_writemask_all(inst->force_writemask_all)
     76       {
     77          annotation.str = inst->annotation;
     78          annotation.ir = inst->ir;
     79       }
     80 
     81       /**
     82        * Construct an fs_builder that inserts instructions before \p cursor in
     83        * basic block \p block, inheriting other code generation parameters
     84        * from this.
     85        */
     86       fs_builder
     87       at(bblock_t *block, exec_node *cursor) const
     88       {
     89          fs_builder bld = *this;
     90          bld.block = block;
     91          bld.cursor = cursor;
     92          return bld;
     93       }
     94 
     95       /**
     96        * Construct an fs_builder appending instructions at the end of the
     97        * instruction list of the shader, inheriting other code generation
     98        * parameters from this.
     99        */
    100       fs_builder
    101       at_end() const
    102       {
    103          return at(NULL, (exec_node *)&shader->instructions.tail_sentinel);
    104       }
    105 
    106       /**
    107        * Construct a builder specifying the default SIMD width and group of
    108        * channel enable signals, inheriting other code generation parameters
    109        * from this.
    110        *
    111        * \p n gives the default SIMD width, \p i gives the slot group used for
    112        * predication and control flow masking in multiples of \p n channels.
    113        */
    114       fs_builder
    115       group(unsigned n, unsigned i) const
    116       {
    117          assert(force_writemask_all ||
    118                 (n <= dispatch_width() && i < dispatch_width() / n));
    119          fs_builder bld = *this;
    120          bld._dispatch_width = n;
    121          bld._group += i * n;
    122          return bld;
    123       }
    124 
    125       /**
    126        * Alias for group() with width equal to eight.
    127        */
    128       fs_builder
    129       half(unsigned i) const
    130       {
    131          return group(8, i);
    132       }
    133 
    134       /**
    135        * Construct a builder with per-channel control flow execution masking
    136        * disabled if \p b is true.  If control flow execution masking is
    137        * already disabled this has no effect.
    138        */
    139       fs_builder
    140       exec_all(bool b = true) const
    141       {
    142          fs_builder bld = *this;
    143          if (b)
    144             bld.force_writemask_all = true;
    145          return bld;
    146       }
    147 
    148       /**
    149        * Construct a builder with the given debug annotation info.
    150        */
    151       fs_builder
    152       annotate(const char *str, const void *ir = NULL) const
    153       {
    154          fs_builder bld = *this;
    155          bld.annotation.str = str;
    156          bld.annotation.ir = ir;
    157          return bld;
    158       }
    159 
    160       /**
    161        * Get the SIMD width in use.
    162        */
    163       unsigned
    164       dispatch_width() const
    165       {
    166          return _dispatch_width;
    167       }
    168 
    169       /**
    170        * Get the channel group in use.
    171        */
    172       unsigned
    173       group() const
    174       {
    175          return _group;
    176       }
    177 
    178       /**
    179        * Allocate a virtual register of natural vector size (one for this IR)
    180        * and SIMD width.  \p n gives the amount of space to allocate in
    181        * dispatch_width units (which is just enough space for one logical
    182        * component in this IR).
    183        */
    184       dst_reg
    185       vgrf(enum brw_reg_type type, unsigned n = 1) const
    186       {
    187          assert(dispatch_width() <= 32);
    188 
    189          if (n > 0)
    190             return dst_reg(VGRF, shader->alloc.allocate(
    191                               DIV_ROUND_UP(n * type_sz(type) * dispatch_width(),
    192                                            REG_SIZE)),
    193                            type);
    194          else
    195             return retype(null_reg_ud(), type);
    196       }
    197 
    198       /**
    199        * Create a null register of floating type.
    200        */
    201       dst_reg
    202       null_reg_f() const
    203       {
    204          return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_F));
    205       }
    206 
    207       dst_reg
    208       null_reg_df() const
    209       {
    210          return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
    211       }
    212 
    213       /**
    214        * Create a null register of signed integer type.
    215        */
    216       dst_reg
    217       null_reg_d() const
    218       {
    219          return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
    220       }
    221 
    222       /**
    223        * Create a null register of unsigned integer type.
    224        */
    225       dst_reg
    226       null_reg_ud() const
    227       {
    228          return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
    229       }
    230 
    231       /**
    232        * Get the mask of SIMD channels enabled by dispatch and not yet
    233        * disabled by discard.
    234        */
    235       src_reg
    236       sample_mask_reg() const
    237       {
    238          assert(shader->stage != MESA_SHADER_FRAGMENT ||
    239                 group() + dispatch_width() <= 16);
    240          if (shader->stage != MESA_SHADER_FRAGMENT) {
    241             return brw_imm_d(0xffffffff);
    242          } else if (brw_wm_prog_data(shader->stage_prog_data)->uses_kill) {
    243             return brw_flag_reg(0, 1);
    244          } else {
    245             return retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD);
    246          }
    247       }
    248 
    249       /**
    250        * Insert an instruction into the program.
    251        */
    252       instruction *
    253       emit(const instruction &inst) const
    254       {
    255          return emit(new(shader->mem_ctx) instruction(inst));
    256       }
    257 
    258       /**
    259        * Create and insert a nullary control instruction into the program.
    260        */
    261       instruction *
    262       emit(enum opcode opcode) const
    263       {
    264          return emit(instruction(opcode, dispatch_width()));
    265       }
    266 
    267       /**
    268        * Create and insert a nullary instruction into the program.
    269        */
    270       instruction *
    271       emit(enum opcode opcode, const dst_reg &dst) const
    272       {
    273          return emit(instruction(opcode, dispatch_width(), dst));
    274       }
    275 
    276       /**
    277        * Create and insert a unary instruction into the program.
    278        */
    279       instruction *
    280       emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
    281       {
    282          switch (opcode) {
    283          case SHADER_OPCODE_RCP:
    284          case SHADER_OPCODE_RSQ:
    285          case SHADER_OPCODE_SQRT:
    286          case SHADER_OPCODE_EXP2:
    287          case SHADER_OPCODE_LOG2:
    288          case SHADER_OPCODE_SIN:
    289          case SHADER_OPCODE_COS:
    290             return emit(instruction(opcode, dispatch_width(), dst,
    291                                     fix_math_operand(src0)));
    292 
    293          default:
    294             return emit(instruction(opcode, dispatch_width(), dst, src0));
    295          }
    296       }
    297 
    298       /**
    299        * Create and insert a binary instruction into the program.
    300        */
    301       instruction *
    302       emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
    303            const src_reg &src1) const
    304       {
    305          switch (opcode) {
    306          case SHADER_OPCODE_POW:
    307          case SHADER_OPCODE_INT_QUOTIENT:
    308          case SHADER_OPCODE_INT_REMAINDER:
    309             return emit(instruction(opcode, dispatch_width(), dst,
    310                                     fix_math_operand(src0),
    311                                     fix_math_operand(src1)));
    312 
    313          default:
    314             return emit(instruction(opcode, dispatch_width(), dst, src0, src1));
    315 
    316          }
    317       }
    318 
    319       /**
    320        * Create and insert a ternary instruction into the program.
    321        */
    322       instruction *
    323       emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
    324            const src_reg &src1, const src_reg &src2) const
    325       {
    326          switch (opcode) {
    327          case BRW_OPCODE_BFE:
    328          case BRW_OPCODE_BFI2:
    329          case BRW_OPCODE_MAD:
    330          case BRW_OPCODE_LRP:
    331             return emit(instruction(opcode, dispatch_width(), dst,
    332                                     fix_3src_operand(src0),
    333                                     fix_3src_operand(src1),
    334                                     fix_3src_operand(src2)));
    335 
    336          default:
    337             return emit(instruction(opcode, dispatch_width(), dst,
    338                                     src0, src1, src2));
    339          }
    340       }
    341 
    342       /**
    343        * Create and insert an instruction with a variable number of sources
    344        * into the program.
    345        */
    346       instruction *
    347       emit(enum opcode opcode, const dst_reg &dst, const src_reg srcs[],
    348            unsigned n) const
    349       {
    350          return emit(instruction(opcode, dispatch_width(), dst, srcs, n));
    351       }
    352 
    353       /**
    354        * Insert a preallocated instruction into the program.
    355        */
    356       instruction *
    357       emit(instruction *inst) const
    358       {
    359          assert(inst->exec_size <= 32);
    360          assert(inst->exec_size == dispatch_width() ||
    361                 force_writemask_all);
    362 
    363          inst->group = _group;
    364          inst->force_writemask_all = force_writemask_all;
    365          inst->annotation = annotation.str;
    366          inst->ir = annotation.ir;
    367 
    368          if (block)
    369             static_cast<instruction *>(cursor)->insert_before(block, inst);
    370          else
    371             cursor->insert_before(inst);
    372 
    373          return inst;
    374       }
    375 
    376       /**
    377        * Select \p src0 if the comparison of both sources with the given
    378        * conditional mod evaluates to true, otherwise select \p src1.
    379        *
    380        * Generally useful to get the minimum or maximum of two values.
    381        */
    382       instruction *
    383       emit_minmax(const dst_reg &dst, const src_reg &src0,
    384                   const src_reg &src1, brw_conditional_mod mod) const
    385       {
    386          assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
    387 
    388          return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
    389                                      fix_unsigned_negate(src1)));
    390       }
    391 
    392       /**
    393        * Copy any live channel from \p src to the first channel of the result.
    394        */
    395       src_reg
    396       emit_uniformize(const src_reg &src) const
    397       {
    398          /* FIXME: We use a vector chan_index and dst to allow constant and
    399           * copy propagration to move result all the way into the consuming
    400           * instruction (typically a surface index or sampler index for a
    401           * send). This uses 1 or 3 extra hw registers in 16 or 32 wide
    402           * dispatch. Once we teach const/copy propagation about scalars we
    403           * should go back to scalar destinations here.
    404           */
    405          const fs_builder ubld = exec_all();
    406          const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
    407          const dst_reg dst = vgrf(src.type);
    408 
    409          ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
    410          ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0));
    411 
    412          return src_reg(component(dst, 0));
    413       }
    414 
    415       /**
    416        * Assorted arithmetic ops.
    417        * @{
    418        */
    419 #define ALU1(op)                                        \
    420       instruction *                                     \
    421       op(const dst_reg &dst, const src_reg &src0) const \
    422       {                                                 \
    423          return emit(BRW_OPCODE_##op, dst, src0);       \
    424       }
    425 
    426 #define ALU2(op)                                                        \
    427       instruction *                                                     \
    428       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
    429       {                                                                 \
    430          return emit(BRW_OPCODE_##op, dst, src0, src1);                 \
    431       }
    432 
    433 #define ALU2_ACC(op)                                                    \
    434       instruction *                                                     \
    435       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
    436       {                                                                 \
    437          instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1);    \
    438          inst->writes_accumulator = true;                               \
    439          return inst;                                                   \
    440       }
    441 
    442 #define ALU3(op)                                                        \
    443       instruction *                                                     \
    444       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
    445          const src_reg &src2) const                                     \
    446       {                                                                 \
    447          return emit(BRW_OPCODE_##op, dst, src0, src1, src2);           \
    448       }
    449 
    450       ALU2(ADD)
    451       ALU2_ACC(ADDC)
    452       ALU2(AND)
    453       ALU2(ASR)
    454       ALU2(AVG)
    455       ALU3(BFE)
    456       ALU2(BFI1)
    457       ALU3(BFI2)
    458       ALU1(BFREV)
    459       ALU1(CBIT)
    460       ALU2(CMPN)
    461       ALU3(CSEL)
    462       ALU1(DIM)
    463       ALU2(DP2)
    464       ALU2(DP3)
    465       ALU2(DP4)
    466       ALU2(DPH)
    467       ALU1(F16TO32)
    468       ALU1(F32TO16)
    469       ALU1(FBH)
    470       ALU1(FBL)
    471       ALU1(FRC)
    472       ALU2(LINE)
    473       ALU1(LZD)
    474       ALU2(MAC)
    475       ALU2_ACC(MACH)
    476       ALU3(MAD)
    477       ALU1(MOV)
    478       ALU2(MUL)
    479       ALU1(NOT)
    480       ALU2(OR)
    481       ALU2(PLN)
    482       ALU1(RNDD)
    483       ALU1(RNDE)
    484       ALU1(RNDU)
    485       ALU1(RNDZ)
    486       ALU2(SAD2)
    487       ALU2_ACC(SADA2)
    488       ALU2(SEL)
    489       ALU2(SHL)
    490       ALU2(SHR)
    491       ALU2_ACC(SUBB)
    492       ALU2(XOR)
    493 
    494 #undef ALU3
    495 #undef ALU2_ACC
    496 #undef ALU2
    497 #undef ALU1
    498       /** @} */
    499 
    500       /**
    501        * CMP: Sets the low bit of the destination channels with the result
    502        * of the comparison, while the upper bits are undefined, and updates
    503        * the flag register with the packed 16 bits of the result.
    504        */
    505       instruction *
    506       CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
    507           brw_conditional_mod condition) const
    508       {
    509          /* Take the instruction:
    510           *
    511           * CMP null<d> src0<f> src1<f>
    512           *
    513           * Original gen4 does type conversion to the destination type
    514           * before comparison, producing garbage results for floating
    515           * point comparisons.
    516           *
    517           * The destination type doesn't matter on newer generations,
    518           * so we set the type to match src0 so we can compact the
    519           * instruction.
    520           */
    521          return set_condmod(condition,
    522                             emit(BRW_OPCODE_CMP, retype(dst, src0.type),
    523                                  fix_unsigned_negate(src0),
    524                                  fix_unsigned_negate(src1)));
    525       }
    526 
    527       /**
    528        * Gen4 predicated IF.
    529        */
    530       instruction *
    531       IF(brw_predicate predicate) const
    532       {
    533          return set_predicate(predicate, emit(BRW_OPCODE_IF));
    534       }
    535 
    536       /**
    537        * Emit a linear interpolation instruction.
    538        */
    539       instruction *
    540       LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
    541           const src_reg &a) const
    542       {
    543          if (shader->devinfo->gen >= 6) {
    544             /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
    545              * we need to reorder the operands.
    546              */
    547             return emit(BRW_OPCODE_LRP, dst, a, y, x);
    548 
    549          } else {
    550             /* We can't use the LRP instruction.  Emit x*(1-a) + y*a. */
    551             const dst_reg y_times_a = vgrf(dst.type);
    552             const dst_reg one_minus_a = vgrf(dst.type);
    553             const dst_reg x_times_one_minus_a = vgrf(dst.type);
    554 
    555             MUL(y_times_a, y, a);
    556             ADD(one_minus_a, negate(a), brw_imm_f(1.0f));
    557             MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
    558             return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
    559          }
    560       }
    561 
    562       /**
    563        * Collect a number of registers in a contiguous range of registers.
    564        */
    565       instruction *
    566       LOAD_PAYLOAD(const dst_reg &dst, const src_reg *src,
    567                    unsigned sources, unsigned header_size) const
    568       {
    569          instruction *inst = emit(SHADER_OPCODE_LOAD_PAYLOAD, dst, src, sources);
    570          inst->header_size = header_size;
    571          inst->size_written = header_size * REG_SIZE;
    572          for (unsigned i = header_size; i < sources; i++) {
    573             inst->size_written +=
    574                ALIGN(dispatch_width() * type_sz(src[i].type) * dst.stride,
    575                      REG_SIZE);
    576          }
    577 
    578          return inst;
    579       }
    580 
    581       backend_shader *shader;
    582 
    583    private:
    584       /**
    585        * Workaround for negation of UD registers.  See comment in
    586        * fs_generator::generate_code() for more details.
    587        */
    588       src_reg
    589       fix_unsigned_negate(const src_reg &src) const
    590       {
    591          if (src.type == BRW_REGISTER_TYPE_UD &&
    592              src.negate) {
    593             dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
    594             MOV(temp, src);
    595             return src_reg(temp);
    596          } else {
    597             return src;
    598          }
    599       }
    600 
    601       /**
    602        * Workaround for source register modes not supported by the ternary
    603        * instruction encoding.
    604        */
    605       src_reg
    606       fix_3src_operand(const src_reg &src) const
    607       {
    608          if (src.file == VGRF || src.file == UNIFORM || src.stride > 1) {
    609             return src;
    610          } else {
    611             dst_reg expanded = vgrf(src.type);
    612             MOV(expanded, src);
    613             return expanded;
    614          }
    615       }
    616 
    617       /**
    618        * Workaround for source register modes not supported by the math
    619        * instruction.
    620        */
    621       src_reg
    622       fix_math_operand(const src_reg &src) const
    623       {
    624          /* Can't do hstride == 0 args on gen6 math, so expand it out. We
    625           * might be able to do better by doing execsize = 1 math and then
    626           * expanding that result out, but we would need to be careful with
    627           * masking.
    628           *
    629           * Gen6 hardware ignores source modifiers (negate and abs) on math
    630           * instructions, so we also move to a temp to set those up.
    631           *
    632           * Gen7 relaxes most of the above restrictions, but still can't use IMM
    633           * operands to math
    634           */
    635          if ((shader->devinfo->gen == 6 &&
    636               (src.file == IMM || src.file == UNIFORM ||
    637                src.abs || src.negate)) ||
    638              (shader->devinfo->gen == 7 && src.file == IMM)) {
    639             const dst_reg tmp = vgrf(src.type);
    640             MOV(tmp, src);
    641             return tmp;
    642          } else {
    643             return src;
    644          }
    645       }
    646 
    647       bblock_t *block;
    648       exec_node *cursor;
    649 
    650       unsigned _dispatch_width;
    651       unsigned _group;
    652       bool force_writemask_all;
    653 
    654       /** Debug annotation info. */
    655       struct {
    656          const char *str;
    657          const void *ir;
    658       } annotation;
    659    };
    660 }
    661 
    662 #endif
    663