Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2013 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file brw_vec4_tes.cpp
     26  *
     27  * Tessellaton evaluation shader specific code derived from the vec4_visitor class.
     28  */
     29 
     30 #include "brw_vec4_tes.h"
     31 #include "brw_cfg.h"
     32 
     33 namespace brw {
     34 
     35 vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
     36                                   void *log_data,
     37                                   const struct brw_tes_prog_key *key,
     38                                   struct brw_tes_prog_data *prog_data,
     39                                   const nir_shader *shader,
     40                                   void *mem_ctx,
     41                                   int shader_time_index)
     42    : vec4_visitor(compiler, log_data, &key->tex, &prog_data->base,
     43                   shader, mem_ctx, false, shader_time_index)
     44 {
     45 }
     46 
     47 
     48 dst_reg *
     49 vec4_tes_visitor::make_reg_for_system_value(int location)
     50 {
     51    return NULL;
     52 }
     53 
     54 void
     55 vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
     56 {
     57    switch (instr->intrinsic) {
     58    case nir_intrinsic_load_tess_level_outer:
     59    case nir_intrinsic_load_tess_level_inner:
     60       break;
     61    default:
     62       vec4_visitor::nir_setup_system_value_intrinsic(instr);
     63    }
     64 }
     65 
     66 
     67 void
     68 vec4_tes_visitor::setup_payload()
     69 {
     70    int reg = 0;
     71 
     72    /* The payload always contains important data in r0 and r1, which contains
     73     * the URB handles that are passed on to the URB write at the end
     74     * of the thread.
     75     */
     76    reg += 2;
     77 
     78    reg = setup_uniforms(reg);
     79 
     80    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
     81       for (int i = 0; i < 3; i++) {
     82          if (inst->src[i].file != ATTR)
     83             continue;
     84 
     85          bool is_64bit = type_sz(inst->src[i].type) == 8;
     86 
     87          unsigned slot = inst->src[i].nr + inst->src[i].offset / 16;
     88          struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2));
     89          grf = stride(grf, 0, is_64bit ? 2 : 4, 1);
     90          grf.swizzle = inst->src[i].swizzle;
     91          grf.type = inst->src[i].type;
     92          grf.abs = inst->src[i].abs;
     93          grf.negate = inst->src[i].negate;
     94 
     95          /* For 64-bit attributes we can end up with components XY in the
     96           * second half of a register and components ZW in the first half
     97           * of the next. Fix it up here.
     98           */
     99          if (is_64bit && grf.subnr > 0) {
    100             /* We can't do swizzles that mix XY and ZW channels in this case.
    101              * Such cases should have been handled by the scalarization pass.
    102              */
    103             assert((brw_mask_for_swizzle(grf.swizzle) & 0x3) ^
    104                    (brw_mask_for_swizzle(grf.swizzle) & 0xc));
    105             if (brw_mask_for_swizzle(grf.swizzle) & 0xc) {
    106                grf.subnr = 0;
    107                grf.nr++;
    108                grf.swizzle -= BRW_SWIZZLE_ZZZZ;
    109             }
    110          }
    111 
    112          inst->src[i] = grf;
    113       }
    114    }
    115 
    116    reg += 8 * prog_data->urb_read_length;
    117 
    118    this->first_non_payload_grf = reg;
    119 }
    120 
    121 
    122 void
    123 vec4_tes_visitor::emit_prolog()
    124 {
    125    input_read_header = src_reg(this, glsl_type::uvec4_type);
    126    emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
    127 
    128    this->current_annotation = NULL;
    129 }
    130 
    131 
    132 void
    133 vec4_tes_visitor::emit_urb_write_header(int mrf)
    134 {
    135    /* No need to do anything for DS; an implied write to this MRF will be
    136     * performed by VS_OPCODE_URB_WRITE.
    137     */
    138    (void) mrf;
    139 }
    140 
    141 
    142 vec4_instruction *
    143 vec4_tes_visitor::emit_urb_write_opcode(bool complete)
    144 {
    145    /* For DS, the URB writes end the thread. */
    146    if (complete) {
    147       if (INTEL_DEBUG & DEBUG_SHADER_TIME)
    148          emit_shader_time_end();
    149    }
    150 
    151    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
    152    inst->urb_write_flags = complete ?
    153       BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
    154 
    155    return inst;
    156 }
    157 
    158 void
    159 vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
    160 {
    161    const struct brw_tes_prog_data *tes_prog_data =
    162       (const struct brw_tes_prog_data *) prog_data;
    163 
    164    switch (instr->intrinsic) {
    165    case nir_intrinsic_load_tess_coord:
    166       /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
    167       emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
    168                src_reg(brw_vec8_grf(1, 0))));
    169       break;
    170    case nir_intrinsic_load_tess_level_outer:
    171       if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
    172          emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
    173                   swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
    174                           BRW_SWIZZLE_ZWZW)));
    175       } else {
    176          emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
    177                   swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
    178                           BRW_SWIZZLE_WZYX)));
    179       }
    180       break;
    181    case nir_intrinsic_load_tess_level_inner:
    182       if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
    183          emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
    184                   swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
    185                           BRW_SWIZZLE_WZYX)));
    186       } else {
    187          emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
    188                   src_reg(ATTR, 1, glsl_type::float_type)));
    189       }
    190       break;
    191    case nir_intrinsic_load_primitive_id:
    192       emit(TES_OPCODE_GET_PRIMITIVE_ID,
    193            get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
    194       break;
    195 
    196    case nir_intrinsic_load_input:
    197    case nir_intrinsic_load_per_vertex_input: {
    198       src_reg indirect_offset = get_indirect_offset(instr);
    199       unsigned imm_offset = instr->const_index[0];
    200       src_reg header = input_read_header;
    201       bool is_64bit = nir_dest_bit_size(instr->dest) == 64;
    202       unsigned first_component = nir_intrinsic_component(instr);
    203       if (is_64bit)
    204          first_component /= 2;
    205 
    206       if (indirect_offset.file != BAD_FILE) {
    207          header = src_reg(this, glsl_type::uvec4_type);
    208          emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
    209               input_read_header, indirect_offset);
    210       } else {
    211          /* Arbitrarily only push up to 24 vec4 slots worth of data,
    212           * which is 12 registers (since each holds 2 vec4 slots).
    213           */
    214          const unsigned max_push_slots = 24;
    215          if (imm_offset < max_push_slots) {
    216             const glsl_type *src_glsl_type =
    217                is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type;
    218             src_reg src = src_reg(ATTR, imm_offset, src_glsl_type);
    219             src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
    220 
    221             const brw_reg_type dst_reg_type =
    222                is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D;
    223             emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src));
    224 
    225             prog_data->urb_read_length =
    226                MAX2(prog_data->urb_read_length,
    227                     DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2));
    228             break;
    229          }
    230       }
    231 
    232       if (!is_64bit) {
    233          dst_reg temp(this, glsl_type::ivec4_type);
    234          vec4_instruction *read =
    235             emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
    236          read->offset = imm_offset;
    237          read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
    238 
    239          src_reg src = src_reg(temp);
    240          src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
    241 
    242          /* Copy to target.  We might end up with some funky writemasks landing
    243           * in here, but we really don't want them in the above pseudo-ops.
    244           */
    245          dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
    246          dst.writemask = brw_writemask_for_size(instr->num_components);
    247          emit(MOV(dst, src));
    248       } else {
    249          /* For 64-bit we need to load twice as many 32-bit components, and for
    250           * dvec3/4 we need to emit 2 URB Read messages
    251           */
    252          dst_reg temp(this, glsl_type::dvec4_type);
    253          dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D);
    254 
    255          vec4_instruction *read =
    256             emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header));
    257          read->offset = imm_offset;
    258          read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
    259 
    260          if (instr->num_components > 2) {
    261             read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE),
    262                         src_reg(header));
    263             read->offset = imm_offset + 1;
    264             read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
    265          }
    266 
    267          src_reg temp_as_src = src_reg(temp);
    268          temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
    269 
    270          dst_reg shuffled(this, glsl_type::dvec4_type);
    271          shuffle_64bit_data(shuffled, temp_as_src, false);
    272 
    273          dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF);
    274          dst.writemask = brw_writemask_for_size(instr->num_components);
    275          emit(MOV(dst, src_reg(shuffled)));
    276       }
    277       break;
    278    }
    279    default:
    280       vec4_visitor::nir_emit_intrinsic(instr);
    281    }
    282 }
    283 
    284 
    285 void
    286 vec4_tes_visitor::emit_thread_end()
    287 {
    288    /* For DS, we always end the thread by emitting a single vertex.
    289     * emit_urb_write_opcode() will take care of setting the eot flag on the
    290     * SEND instruction.
    291     */
    292    emit_vertex();
    293 }
    294 
    295 } /* namespace brw */
    296