Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Copyright  2013-2015 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "brw_vec4_surface_builder.h"
     25 
     26 using namespace brw;
     27 
     28 namespace {
     29    namespace array_utils {
     30       /**
     31        * Copy one every \p src_stride logical components of the argument into
     32        * one every \p dst_stride logical components of the result.
     33        */
     34       static src_reg
     35       emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size,
     36                   unsigned dst_stride, unsigned src_stride)
     37       {
     38          if (src_stride == 1 && dst_stride == 1) {
     39             return src;
     40          } else {
     41             const dst_reg dst = bld.vgrf(src.type,
     42                                          DIV_ROUND_UP(size * dst_stride, 4));
     43 
     44             for (unsigned i = 0; i < size; ++i)
     45                bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4),
     46                                  1 << (i * dst_stride % 4)),
     47                        swizzle(offset(src, 8, i * src_stride / 4),
     48                                brw_swizzle_for_mask(1 << (i * src_stride % 4))));
     49 
     50             return src_reg(dst);
     51          }
     52       }
     53 
     54       /**
     55        * Convert a VEC4 into an array of registers with the layout expected by
     56        * the recipient shared unit.  If \p has_simd4x2 is true the argument is
     57        * left unmodified in SIMD4x2 form, otherwise it will be rearranged into
     58        * a SIMD8 vector.
     59        */
     60       static src_reg
     61       emit_insert(const vec4_builder &bld, const src_reg &src,
     62                   unsigned n, bool has_simd4x2)
     63       {
     64          if (src.file == BAD_FILE || n == 0) {
     65             return src_reg();
     66 
     67          } else {
     68             /* Pad unused components with zeroes. */
     69             const unsigned mask = (1 << n) - 1;
     70             const dst_reg tmp = bld.vgrf(src.type);
     71 
     72             bld.MOV(writemask(tmp, mask), src);
     73             if (n < 4)
     74                bld.MOV(writemask(tmp, ~mask), brw_imm_d(0));
     75 
     76             return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
     77          }
     78       }
     79 
     80       /**
     81        * Convert an array of registers back into a VEC4 according to the
     82        * layout expected from some shared unit.  If \p has_simd4x2 is true the
     83        * argument is left unmodified in SIMD4x2 form, otherwise it will be
     84        * rearranged from SIMD8 form.
     85        */
     86       static src_reg
     87       emit_extract(const vec4_builder &bld, const src_reg src,
     88                    unsigned n, bool has_simd4x2)
     89       {
     90          if (src.file == BAD_FILE || n == 0) {
     91             return src_reg();
     92 
     93          } else {
     94             return emit_stride(bld, src, n, 1, has_simd4x2 ? 1 : 4);
     95          }
     96       }
     97    }
     98 }
     99 
    100 namespace brw {
    101    namespace surface_access {
    102       namespace {
    103          using namespace array_utils;
    104 
    105          /**
    106           * Generate a send opcode for a surface message and return the
    107           * result.
    108           */
    109          src_reg
    110          emit_send(const vec4_builder &bld, enum opcode op,
    111                    const src_reg &header,
    112                    const src_reg &addr, unsigned addr_sz,
    113                    const src_reg &src, unsigned src_sz,
    114                    const src_reg &surface,
    115                    unsigned arg, unsigned ret_sz,
    116                    brw_predicate pred = BRW_PREDICATE_NONE)
    117          {
    118             /* Calculate the total number of components of the payload. */
    119             const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1);
    120             const unsigned sz = header_sz + addr_sz + src_sz;
    121 
    122             /* Construct the payload. */
    123             const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
    124             unsigned n = 0;
    125 
    126             if (header_sz)
    127                bld.exec_all().MOV(offset(payload, 8, n++),
    128                                   retype(header, BRW_REGISTER_TYPE_UD));
    129 
    130             for (unsigned i = 0; i < addr_sz; i++)
    131                bld.MOV(offset(payload, 8, n++),
    132                        offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i));
    133 
    134             for (unsigned i = 0; i < src_sz; i++)
    135                bld.MOV(offset(payload, 8, n++),
    136                        offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i));
    137 
    138             /* Reduce the dynamically uniform surface index to a single
    139              * scalar.
    140              */
    141             const src_reg usurface = bld.emit_uniformize(surface);
    142 
    143             /* Emit the message send instruction. */
    144             const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz);
    145             vec4_instruction *inst =
    146                bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg));
    147             inst->mlen = sz;
    148             inst->size_written = ret_sz * REG_SIZE;
    149             inst->header_size = header_sz;
    150             inst->predicate = pred;
    151 
    152             return src_reg(dst);
    153          }
    154       }
    155 
    156       /**
    157        * Emit an untyped surface read opcode.  \p dims determines the number
    158        * of components of the address and \p size the number of components of
    159        * the returned value.
    160        */
    161       src_reg
    162       emit_untyped_read(const vec4_builder &bld,
    163                         const src_reg &surface, const src_reg &addr,
    164                         unsigned dims, unsigned size,
    165                         brw_predicate pred)
    166       {
    167          return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ, src_reg(),
    168                           emit_insert(bld, addr, dims, true), 1,
    169                           src_reg(), 0,
    170                           surface, size, 1, pred);
    171       }
    172 
    173       /**
    174        * Emit an untyped surface write opcode.  \p dims determines the number
    175        * of components of the address and \p size the number of components of
    176        * the argument.
    177        */
    178       void
    179       emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
    180                          const src_reg &addr, const src_reg &src,
    181                          unsigned dims, unsigned size,
    182                          brw_predicate pred)
    183       {
    184          const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
    185                                    bld.shader->devinfo->is_haswell);
    186          emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
    187                    emit_insert(bld, addr, dims, has_simd4x2),
    188                    has_simd4x2 ? 1 : dims,
    189                    emit_insert(bld, src, size, has_simd4x2),
    190                    has_simd4x2 ? 1 : size,
    191                    surface, size, 0, pred);
    192       }
    193 
    194       /**
    195        * Emit an untyped surface atomic opcode.  \p dims determines the number
    196        * of components of the address and \p rsize the number of components of
    197        * the returned value (either zero or one).
    198        */
    199       src_reg
    200       emit_untyped_atomic(const vec4_builder &bld,
    201                           const src_reg &surface, const src_reg &addr,
    202                           const src_reg &src0, const src_reg &src1,
    203                           unsigned dims, unsigned rsize, unsigned op,
    204                           brw_predicate pred)
    205       {
    206          const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
    207                                    bld.shader->devinfo->is_haswell);
    208 
    209          /* Zip the components of both sources, they are represented as the X
    210           * and Y components of the same vector.
    211           */
    212          const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
    213          const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
    214 
    215          if (size >= 1) {
    216             bld.MOV(writemask(srcs, WRITEMASK_X),
    217                     swizzle(src0, BRW_SWIZZLE_XXXX));
    218          }
    219 
    220          if (size >= 2) {
    221             bld.MOV(writemask(srcs, WRITEMASK_Y),
    222                     swizzle(src1, BRW_SWIZZLE_XXXX));
    223          }
    224 
    225          return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC, src_reg(),
    226                           emit_insert(bld, addr, dims, has_simd4x2),
    227                           has_simd4x2 ? 1 : dims,
    228                           emit_insert(bld, src_reg(srcs), size, has_simd4x2),
    229                           has_simd4x2 && size ? 1 : size,
    230                           surface, op, rsize, pred);
    231       }
    232 
    233       namespace {
    234          /**
    235           * Initialize the header present in typed surface messages.
    236           */
    237          src_reg
    238          emit_typed_message_header(const vec4_builder &bld)
    239          {
    240             const vec4_builder ubld = bld.exec_all();
    241             const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
    242 
    243             ubld.MOV(dst, brw_imm_d(0));
    244 
    245             if (bld.shader->devinfo->gen == 7 &&
    246                 !bld.shader->devinfo->is_haswell) {
    247                /* The sample mask is used on IVB for the SIMD8 messages that
    248                 * have no SIMD4x2 variant.  We only use the two X channels
    249                 * in that case, mask everything else out.
    250                 */
    251                ubld.MOV(writemask(dst, WRITEMASK_W), brw_imm_d(0x11));
    252             }
    253 
    254             return src_reg(dst);
    255          }
    256       }
    257 
    258       /**
    259        * Emit a typed surface read opcode.  \p dims determines the number of
    260        * components of the address and \p size the number of components of the
    261        * returned value.
    262        */
    263       src_reg
    264       emit_typed_read(const vec4_builder &bld, const src_reg &surface,
    265                       const src_reg &addr, unsigned dims, unsigned size)
    266       {
    267          const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
    268                                    bld.shader->devinfo->is_haswell);
    269          const src_reg tmp =
    270             emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ,
    271                       emit_typed_message_header(bld),
    272                       emit_insert(bld, addr, dims, has_simd4x2),
    273                       has_simd4x2 ? 1 : dims,
    274                       src_reg(), 0,
    275                       surface, size,
    276                       has_simd4x2 ? 1 : size);
    277 
    278          return emit_extract(bld, tmp, size, has_simd4x2);
    279       }
    280 
    281       /**
    282        * Emit a typed surface write opcode.  \p dims determines the number of
    283        * components of the address and \p size the number of components of the
    284        * argument.
    285        */
    286       void
    287       emit_typed_write(const vec4_builder &bld, const src_reg &surface,
    288                        const src_reg &addr, const src_reg &src,
    289                        unsigned dims, unsigned size)
    290       {
    291          const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
    292                                    bld.shader->devinfo->is_haswell);
    293          emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE,
    294                    emit_typed_message_header(bld),
    295                    emit_insert(bld, addr, dims, has_simd4x2),
    296                    has_simd4x2 ? 1 : dims,
    297                    emit_insert(bld, src, size, has_simd4x2),
    298                    has_simd4x2 ? 1 : size,
    299                    surface, size, 0);
    300       }
    301 
    302       /**
    303        * Emit a typed surface atomic opcode.  \p dims determines the number of
    304        * components of the address and \p rsize the number of components of
    305        * the returned value (either zero or one).
    306        */
    307       src_reg
    308       emit_typed_atomic(const vec4_builder &bld,
    309                         const src_reg &surface, const src_reg &addr,
    310                         const src_reg &src0, const src_reg &src1,
    311                         unsigned dims, unsigned rsize, unsigned op,
    312                         brw_predicate pred)
    313       {
    314          const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
    315                                    bld.shader->devinfo->is_haswell);
    316 
    317          /* Zip the components of both sources, they are represented as the X
    318           * and Y components of the same vector.
    319           */
    320          const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
    321          const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
    322 
    323          if (size >= 1)
    324             bld.MOV(writemask(srcs, WRITEMASK_X), src0);
    325          if (size >= 2)
    326             bld.MOV(writemask(srcs, WRITEMASK_Y), src1);
    327 
    328          return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC,
    329                           emit_typed_message_header(bld),
    330                           emit_insert(bld, addr, dims, has_simd4x2),
    331                           has_simd4x2 ? 1 : dims,
    332                           emit_insert(bld, src_reg(srcs), size, has_simd4x2),
    333                           has_simd4x2 ? 1 : size,
    334                           surface, op, rsize, pred);
    335       }
    336    }
    337 }
    338