Home | History | Annotate | Download | only in core
      1 /*
      2  * Mesa 3-D graphics library
      3  *
      4  * Copyright (C) 2012-2015 LunarG, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included
     14  * in all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     22  * DEALINGS IN THE SOFTWARE.
     23  *
     24  * Authors:
     25  *    Chia-I Wu <olv (at) lunarg.com>
     26  */
     27 
     28 #include "ilo_debug.h"
     29 #include "ilo_vma.h"
     30 #include "ilo_state_sol.h"
     31 
     32 static bool
     33 sol_stream_validate_gen7(const struct ilo_dev *dev,
     34                          const struct ilo_state_sol_stream_info *stream)
     35 {
     36    uint8_t i;
     37 
     38    ILO_DEV_ASSERT(dev, 7, 8);
     39 
     40    assert(stream->vue_read_base + stream->vue_read_count <=
     41          stream->cv_vue_attr_count);
     42 
     43    /*
     44     * From the Ivy Bridge PRM, volume 2 part 1, page 200:
     45     *
     46     *     "(Stream 0 Vertex Read Offset)
     47     *      Format: U1 count of 256-bit units
     48     *
     49     *      Specifies amount of data to skip over before reading back Stream 0
     50     *      vertex data. Must be zero if the GS is enabled and the Output
     51     *      Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B
     52     *      unit)."
     53     *
     54     *     "(Stream 0 Vertex Read Length)
     55     *      Format: U5-1 count of 256-bit units
     56     *
     57     *      Specifies amount of vertex data to read back for Stream 0 vertices,
     58     *      starting at the Stream 0 Vertex Read Offset location. Maximum
     59     *      readback is 17 256-bit units (34 128-bit vertex attributes). Read
     60     *      data past the end of the valid vertex data has undefined contents,
     61     *      and therefore shouldn't be used to source stream out data.  Must be
     62     *      zero (i.e., read length = 256b) if the GS is enabled and the Output
     63     *      Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B
     64     *      unit)."
     65     */
     66    assert(stream->vue_read_base == 0 || stream->vue_read_base == 2);
     67    assert(stream->vue_read_count <= 34);
     68 
     69    assert(stream->decl_count <= ILO_STATE_SOL_MAX_DECL_COUNT);
     70 
     71    for (i = 0; i < stream->decl_count; i++) {
     72       const struct ilo_state_sol_decl_info *decl = &stream->decls[i];
     73 
     74       assert(decl->is_hole || decl->attr < stream->vue_read_count);
     75 
     76       /*
     77        * From the Ivy Bridge PRM, volume 2 part 1, page 205:
     78        *
     79        *     "There is only enough internal storage for the 128-bit vertex
     80        *      header and 32 128-bit vertex attributes."
     81        */
     82       assert(decl->attr < 33);
     83 
     84       assert(decl->component_base < 4 &&
     85              decl->component_base + decl->component_count <= 4);
     86       assert(decl->buffer < ILO_STATE_SOL_MAX_BUFFER_COUNT);
     87    }
     88 
     89    return true;
     90 }
     91 
     92 static bool
     93 sol_validate_gen7(const struct ilo_dev *dev,
     94                   const struct ilo_state_sol_info *info)
     95 {
     96    uint8_t i;
     97 
     98    ILO_DEV_ASSERT(dev, 7, 8);
     99 
    100    /*
    101     * From the Ivy Bridge PRM, volume 2 part 1, page 198:
    102     *
    103     *     "This bit (Render Stream Select) is used even if SO Function Enable
    104     *      is DISABLED."
    105     *
    106     * From the Haswell PRM, volume 2b, page 796:
    107     *
    108     *     "SO Function Enable must also be ENABLED in order for thiis field
    109     *      (Render Stream Select) to select a stream for rendering. When SO
    110     *      Function Enable is DISABLED and Rendering Disable is cleared (i.e.,
    111     *      rendering is enabled), StreamID is ignored downstream of the SO
    112     *      stage, allowing any stream to be rendered."
    113     *
    114     * We want Gen7 behavior, but we have to require users to follow Gen7.5
    115     * behavior: info->sol_enable must be set for info->render_stream to work.
    116     */
    117 
    118    for (i = 0; i < ARRAY_SIZE(info->streams); i++) {
    119       if (!sol_stream_validate_gen7(dev, &info->streams[i]))
    120          return false;
    121    }
    122 
    123    /*
    124     * From the Ivy Bridge PRM, volume 2 part 1, page 208:
    125     *
    126     *     "(Surface Pitch)
    127     *      [0,2048]  Must be 0 or a multiple of 4 Bytes."
    128     */
    129    for (i = 0; i < ARRAY_SIZE(info->buffer_strides); i++) {
    130       assert(info->buffer_strides[i] <= 2048 &&
    131              info->buffer_strides[i] % 4 == 0);
    132    }
    133 
    134    return true;
    135 }
    136 
    137 static bool
    138 sol_set_gen7_3DSTATE_STREAMOUT(struct ilo_state_sol *sol,
    139                                const struct ilo_dev *dev,
    140                                const struct ilo_state_sol_info *info)
    141 {
    142    struct {
    143       uint8_t offset;
    144       uint8_t len;
    145    } vue_read[ILO_STATE_SOL_MAX_STREAM_COUNT];
    146    uint8_t i;
    147    uint32_t dw1, dw2;
    148 
    149    ILO_DEV_ASSERT(dev, 7, 8);
    150 
    151    if (!sol_validate_gen7(dev, info))
    152       return false;
    153 
    154    for (i = 0; i < ARRAY_SIZE(info->streams); i++) {
    155       const struct ilo_state_sol_stream_info *stream = &info->streams[i];
    156 
    157       vue_read[i].offset = stream->vue_read_base / 2;
    158       /*
    159        * In pairs minus 1.  URB entries are aligned to 512-bits.  There is no
    160        * need to worry about reading past entries.
    161        */
    162       vue_read[i].len = (stream->vue_read_count + 1) / 2;
    163       if (vue_read[i].len)
    164          vue_read[i].len--;
    165    }
    166 
    167    dw1 = info->render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT |
    168          info->tristrip_reorder << GEN7_SO_DW1_REORDER_MODE__SHIFT;
    169 
    170    if (info->sol_enable)
    171       dw1 |= GEN7_SO_DW1_SO_ENABLE;
    172 
    173    if (info->render_disable)
    174       dw1 |= GEN7_SO_DW1_RENDER_DISABLE;
    175 
    176    if (info->stats_enable)
    177       dw1 |= GEN7_SO_DW1_STATISTICS;
    178 
    179    if (ilo_dev_gen(dev) < ILO_GEN(8)) {
    180       const uint8_t buffer_enables = ((bool) info->buffer_strides[3]) << 3 |
    181                                      ((bool) info->buffer_strides[2]) << 2 |
    182                                      ((bool) info->buffer_strides[1]) << 1 |
    183                                      ((bool) info->buffer_strides[0]);
    184       dw1 |= buffer_enables << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT;
    185    }
    186 
    187    dw2 = vue_read[3].offset << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT |
    188          vue_read[3].len << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT |
    189          vue_read[2].offset << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT |
    190          vue_read[2].len << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT |
    191          vue_read[1].offset << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT |
    192          vue_read[1].len << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT |
    193          vue_read[0].offset << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT |
    194          vue_read[0].len << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT;
    195 
    196    STATIC_ASSERT(ARRAY_SIZE(sol->streamout) >= 2);
    197    sol->streamout[0] = dw1;
    198    sol->streamout[1] = dw2;
    199 
    200    memcpy(sol->strides, info->buffer_strides, sizeof(sol->strides));
    201 
    202    return true;
    203 }
    204 
    205 static bool
    206 sol_set_gen7_3DSTATE_SO_DECL_LIST(struct ilo_state_sol *sol,
    207                                   const struct ilo_dev *dev,
    208                                   const struct ilo_state_sol_info *info,
    209                                   uint8_t max_decl_count)
    210 {
    211    uint64_t decl_list[ILO_STATE_SOL_MAX_DECL_COUNT];
    212    uint8_t decl_counts[ILO_STATE_SOL_MAX_STREAM_COUNT];
    213    uint8_t buffer_selects[ILO_STATE_SOL_MAX_STREAM_COUNT];
    214    uint32_t dw1, dw2;
    215    uint8_t i, j;
    216 
    217    ILO_DEV_ASSERT(dev, 7, 8);
    218 
    219    memset(decl_list, 0, sizeof(decl_list[0]) * max_decl_count);
    220 
    221    for (i = 0; i < ARRAY_SIZE(info->streams); i++) {
    222       const struct ilo_state_sol_stream_info *stream = &info->streams[i];
    223 
    224       assert(stream->decl_count <= max_decl_count);
    225       decl_counts[i] = stream->decl_count;
    226       buffer_selects[i] = 0;
    227 
    228       for (j = 0; j < stream->decl_count; j++) {
    229          const struct ilo_state_sol_decl_info *decl = &stream->decls[j];
    230          const uint8_t mask = ((1 << decl->component_count) - 1) <<
    231             decl->component_base;
    232          uint16_t val;
    233 
    234          val = decl->buffer << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
    235                mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
    236 
    237          if (decl->is_hole)
    238             val |= GEN7_SO_DECL_HOLE_FLAG;
    239          else
    240             val |= decl->attr << GEN7_SO_DECL_REG_INDEX__SHIFT;
    241 
    242          decl_list[j] |= (uint64_t) val << (16 * i);
    243          buffer_selects[i] |= 1 << decl->buffer;
    244       }
    245    }
    246 
    247    dw1 = buffer_selects[3] << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
    248          buffer_selects[2] << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
    249          buffer_selects[1] << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
    250          buffer_selects[0] << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
    251    dw2 = decl_counts[3] << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
    252          decl_counts[2] << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
    253          decl_counts[1] << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
    254          decl_counts[0] << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
    255 
    256    STATIC_ASSERT(ARRAY_SIZE(sol->so_decl) >= 2);
    257    sol->so_decl[0] = dw1;
    258    sol->so_decl[1] = dw2;
    259 
    260    STATIC_ASSERT(ARRAY_SIZE(sol->decl[0]) == 2);
    261    memcpy(sol->decl, decl_list, sizeof(sol->decl[0]) * max_decl_count);
    262    sol->decl_count = max_decl_count;
    263 
    264    return true;
    265 }
    266 
    267 static bool
    268 sol_buffer_validate_gen7(const struct ilo_dev *dev,
    269                          const struct ilo_state_sol_buffer_info *info)
    270 {
    271    ILO_DEV_ASSERT(dev, 7, 8);
    272 
    273    /*
    274     * From the Ivy Bridge PRM, volume 2 part 1, page 208:
    275     *
    276     *     "(Surface Base Address) This field specifies the starting DWord
    277     *      address..."
    278     */
    279    assert(info->offset % 4 == 0);
    280 
    281    if (info->vma) {
    282       assert(info->vma->vm_alignment % 4 == 0);
    283       assert(info->size && info->offset + info->size <= info->vma->vm_size);
    284    }
    285 
    286    /* Gen8+ only */
    287    if (info->write_offset_load || info->write_offset_save) {
    288       assert(ilo_dev_gen(dev) >= ILO_GEN(8) && info->write_offset_vma);
    289       assert(info->write_offset_offset + sizeof(uint32_t) <=
    290             info->write_offset_vma->vm_size);
    291    }
    292 
    293    /*
    294     * From the Broadwell PRM, volume 2b, page 206:
    295     *
    296     *     "This field (Stream Offset) specifies the Offset in stream output
    297     *      buffer to start at, or whether to append to the end of an existing
    298     *      buffer. The Offset must be DWORD aligned."
    299     */
    300    if (info->write_offset_imm_enable) {
    301       assert(info->write_offset_load);
    302       assert(info->write_offset_imm % 4 == 0);
    303    }
    304 
    305    return true;
    306 }
    307 
    308 static uint32_t
    309 sol_buffer_get_gen6_size(const struct ilo_dev *dev,
    310                          const struct ilo_state_sol_buffer_info *info)
    311 {
    312    ILO_DEV_ASSERT(dev, 6, 8);
    313 
    314    /*
    315     * From the Ivy Bridge PRM, volume 2 part 1, page 208:
    316     *
    317     *     "(Surface End Address) This field specifies the ending DWord
    318     *      address..."
    319     */
    320    return (info->vma) ? info->size & ~3 : 0;
    321 }
    322 
    323 static bool
    324 sol_buffer_set_gen7_3dstate_so_buffer(struct ilo_state_sol_buffer *sb,
    325                                       const struct ilo_dev *dev,
    326                                       const struct ilo_state_sol_buffer_info *info)
    327 {
    328    const uint32_t size = sol_buffer_get_gen6_size(dev, info);
    329 
    330    ILO_DEV_ASSERT(dev, 7, 7.5);
    331 
    332    if (!sol_buffer_validate_gen7(dev, info))
    333       return false;
    334 
    335    STATIC_ASSERT(ARRAY_SIZE(sb->so_buf) >= 2);
    336    sb->so_buf[0] = info->offset;
    337    sb->so_buf[1] = (size) ? info->offset + size : 0;
    338 
    339    return true;
    340 }
    341 
    342 static bool
    343 sol_buffer_set_gen8_3dstate_so_buffer(struct ilo_state_sol_buffer *sb,
    344                                       const struct ilo_dev *dev,
    345                                       const struct ilo_state_sol_buffer_info *info)
    346 {
    347    const uint32_t size = sol_buffer_get_gen6_size(dev, info);
    348    uint32_t dw1;
    349 
    350    ILO_DEV_ASSERT(dev, 8, 8);
    351 
    352    if (!sol_buffer_validate_gen7(dev, info))
    353       return false;
    354 
    355    dw1 = 0;
    356 
    357    if (info->vma)
    358       dw1 |= GEN8_SO_BUF_DW1_ENABLE;
    359    if (info->write_offset_load)
    360       dw1 |= GEN8_SO_BUF_DW1_OFFSET_WRITE_ENABLE;
    361    if (info->write_offset_save)
    362       dw1 |= GEN8_SO_BUF_DW1_OFFSET_ENABLE;
    363 
    364    STATIC_ASSERT(ARRAY_SIZE(sb->so_buf) >= 4);
    365    sb->so_buf[0] = dw1;
    366    sb->so_buf[1] = info->offset;
    367 
    368    /*
    369     * From the Broadwell PRM, volume 2b, page 205:
    370     *
    371     *     "This field (Surface Size) specifies the size of buffer in number
    372     *      DWords minus 1 of the buffer in Graphics Memory."
    373     */
    374    sb->so_buf[2] = (size) ? size / 4 - 1 : 0;
    375 
    376    /* load from imm or sb->write_offset_bo */
    377    sb->so_buf[3] = (info->write_offset_imm_enable) ?
    378       info->write_offset_imm : ~0u;
    379 
    380    return true;
    381 }
    382 
    383 bool
    384 ilo_state_sol_init(struct ilo_state_sol *sol,
    385                    const struct ilo_dev *dev,
    386                    const struct ilo_state_sol_info *info)
    387 {
    388    bool ret = true;
    389 
    390    assert(ilo_is_zeroed(sol, sizeof(*sol)));
    391    assert(ilo_is_zeroed(info->data, info->data_size));
    392 
    393    if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
    394       uint8_t max_decl_count, i;
    395 
    396       max_decl_count = info->streams[0].decl_count;
    397       for (i = 1; i < ARRAY_SIZE(info->streams); i++) {
    398          if (max_decl_count < info->streams[i].decl_count)
    399             max_decl_count = info->streams[i].decl_count;
    400       }
    401 
    402       assert(ilo_state_sol_data_size(dev, max_decl_count) <= info->data_size);
    403       sol->decl = (uint32_t (*)[2]) info->data;
    404 
    405       ret &= sol_set_gen7_3DSTATE_STREAMOUT(sol, dev, info);
    406       ret &= sol_set_gen7_3DSTATE_SO_DECL_LIST(sol, dev, info, max_decl_count);
    407    }
    408 
    409    assert(ret);
    410 
    411    return ret;
    412 }
    413 
    414 bool
    415 ilo_state_sol_init_disabled(struct ilo_state_sol *sol,
    416                             const struct ilo_dev *dev,
    417                             bool render_disable)
    418 {
    419    struct ilo_state_sol_info info;
    420 
    421    memset(&info, 0, sizeof(info));
    422    info.render_disable = render_disable;
    423 
    424    return ilo_state_sol_init(sol, dev, &info);
    425 }
    426 
    427 uint32_t
    428 ilo_state_sol_buffer_size(const struct ilo_dev *dev, uint32_t size,
    429                           uint32_t *alignment)
    430 {
    431    /* DWord aligned without padding */
    432    *alignment = 4;
    433    return size;
    434 }
    435 
    436 bool
    437 ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb,
    438                           const struct ilo_dev *dev,
    439                           const struct ilo_state_sol_buffer_info *info)
    440 {
    441    bool ret = true;
    442 
    443    assert(ilo_is_zeroed(sb, sizeof(*sb)));
    444 
    445    if (ilo_dev_gen(dev) >= ILO_GEN(8))
    446       ret &= sol_buffer_set_gen8_3dstate_so_buffer(sb, dev, info);
    447    else
    448       ret &= sol_buffer_set_gen7_3dstate_so_buffer(sb, dev, info);
    449 
    450    sb->vma = info->vma;
    451    sb->write_offset_vma = info->write_offset_vma;
    452 
    453    assert(ret);
    454 
    455    return ret;
    456 }
    457 
    458 bool
    459 ilo_state_sol_buffer_init_disabled(struct ilo_state_sol_buffer *sb,
    460                                    const struct ilo_dev *dev)
    461 {
    462    struct ilo_state_sol_buffer_info info;
    463 
    464    memset(&info, 0, sizeof(info));
    465 
    466    return ilo_state_sol_buffer_init(sb, dev, &info);
    467 }
    468