Home | History | Annotate | Download | only in vbo
      1 
      2 /*
      3  * Mesa 3-D graphics library
      4  *
      5  * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the "Software"),
      9  * to deal in the Software without restriction, including without limitation
     10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     11  * and/or sell copies of the Software, and to permit persons to whom the
     12  * Software is furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included
     15  * in all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     23  * OTHER DEALINGS IN THE SOFTWARE.
     24  *
     25  * Authors:
     26  *    Keith Whitwell <keithw (at) vmware.com>
     27  */
     28 
     29 /* Split indexed primitives with per-vertex copying.
     30  */
     31 
     32 #include <stdio.h>
     33 
     34 #include "main/glheader.h"
     35 #include "main/bufferobj.h"
     36 #include "main/imports.h"
     37 #include "main/glformats.h"
     38 #include "main/macros.h"
     39 #include "main/mtypes.h"
     40 
     41 #include "vbo_split.h"
     42 #include "vbo.h"
     43 
     44 
     45 #define ELT_TABLE_SIZE 16
     46 
     47 /**
     48  * Used for vertex-level splitting of indexed buffers.  Note that
     49  * non-indexed primitives may be converted to indexed in some cases
     50  * (eg loops, fans) in order to use this splitting path.
     51  */
     52 struct copy_context {
     53 
     54    struct gl_context *ctx;
     55    const struct gl_vertex_array **array;
     56    const struct _mesa_prim *prim;
     57    GLuint nr_prims;
     58    const struct _mesa_index_buffer *ib;
     59    vbo_draw_func draw;
     60 
     61    const struct split_limits *limits;
     62 
     63    struct {
     64       GLuint attr;
     65       GLuint size;
     66       const struct gl_vertex_array *array;
     67       const GLubyte *src_ptr;
     68 
     69       struct gl_vertex_array dstarray;
     70 
     71    } varying[VERT_ATTRIB_MAX];
     72    GLuint nr_varying;
     73 
     74    const struct gl_vertex_array *dstarray_ptr[VERT_ATTRIB_MAX];
     75    struct _mesa_index_buffer dstib;
     76 
     77    GLuint *translated_elt_buf;
     78    const GLuint *srcelt;
     79 
     80    /** A baby hash table to avoid re-emitting (some) duplicate
     81     * vertices when splitting indexed primitives.
     82     */
     83    struct {
     84       GLuint in;
     85       GLuint out;
     86    } vert_cache[ELT_TABLE_SIZE];
     87 
     88    GLuint vertex_size;
     89    GLubyte *dstbuf;
     90    GLubyte *dstptr;     /**< dstptr == dstbuf + dstelt_max * vertsize */
     91    GLuint dstbuf_size;  /**< in vertices */
     92    GLuint dstbuf_nr;    /**< count of emitted vertices, also the largest value
     93                          * in dstelt.  Our MaxIndex.
     94                          */
     95 
     96    GLuint *dstelt;
     97    GLuint dstelt_nr;
     98    GLuint dstelt_size;
     99 
    100 #define MAX_PRIM 32
    101    struct _mesa_prim dstprim[MAX_PRIM];
    102    GLuint dstprim_nr;
    103 
    104 };
    105 
    106 
    107 static GLuint attr_size( const struct gl_vertex_array *array )
    108 {
    109    return array->Size * _mesa_sizeof_type(array->Type);
    110 }
    111 
    112 
    113 /**
    114  * Starts returning true slightly before the buffer fills, to ensure
    115  * that there is sufficient room for any remaining vertices to finish
    116  * off the prim:
    117  */
    118 static GLboolean
    119 check_flush( struct copy_context *copy )
    120 {
    121    GLenum mode = copy->dstprim[copy->dstprim_nr].mode;
    122 
    123    if (GL_TRIANGLE_STRIP == mode &&
    124        copy->dstelt_nr & 1) { /* see bug9962 */
    125        return GL_FALSE;
    126    }
    127 
    128    if (copy->dstbuf_nr + 4 > copy->dstbuf_size)
    129       return GL_TRUE;
    130 
    131    if (copy->dstelt_nr + 4 > copy->dstelt_size)
    132       return GL_TRUE;
    133 
    134    return GL_FALSE;
    135 }
    136 
    137 
    138 /**
    139  * Dump the parameters/info for a vbo->draw() call.
    140  */
    141 static void
    142 dump_draw_info(struct gl_context *ctx,
    143                const struct gl_vertex_array **arrays,
    144                const struct _mesa_prim *prims,
    145                GLuint nr_prims,
    146                const struct _mesa_index_buffer *ib,
    147                GLuint min_index,
    148                GLuint max_index)
    149 {
    150    GLuint i, j;
    151 
    152    printf("VBO Draw:\n");
    153    for (i = 0; i < nr_prims; i++) {
    154       printf("Prim %u of %u\n", i, nr_prims);
    155       printf("  Prim mode 0x%x\n", prims[i].mode);
    156       printf("  IB: %p\n", (void*) ib);
    157       for (j = 0; j < VERT_ATTRIB_MAX; j++) {
    158          printf("    array %d at %p:\n", j, (void*) arrays[j]);
    159          printf("      ptr %p, size %d, type 0x%x, stride %d\n",
    160 		arrays[j]->Ptr,
    161 		arrays[j]->Size, arrays[j]->Type, arrays[j]->StrideB);
    162          if (0) {
    163             GLint k = prims[i].start + prims[i].count - 1;
    164             GLfloat *last = (GLfloat *) (arrays[j]->Ptr + arrays[j]->StrideB * k);
    165             printf("        last: %f %f %f\n",
    166 		   last[0], last[1], last[2]);
    167          }
    168       }
    169    }
    170 }
    171 
    172 
    173 static void
    174 flush( struct copy_context *copy )
    175 {
    176    struct gl_context *ctx = copy->ctx;
    177    const struct gl_vertex_array **saved_arrays = ctx->Array._DrawArrays;
    178    GLuint i;
    179 
    180    /* Set some counters:
    181     */
    182    copy->dstib.count = copy->dstelt_nr;
    183 
    184 #if 0
    185    dump_draw_info(copy->ctx,
    186                   copy->dstarray_ptr,
    187                   copy->dstprim,
    188                   copy->dstprim_nr,
    189                   &copy->dstib,
    190                   0,
    191                   copy->dstbuf_nr);
    192 #else
    193    (void) dump_draw_info;
    194 #endif
    195 
    196    ctx->Array._DrawArrays = copy->dstarray_ptr;
    197    ctx->NewDriverState |= ctx->DriverFlags.NewArray;
    198 
    199    copy->draw( ctx,
    200 	       copy->dstprim,
    201 	       copy->dstprim_nr,
    202 	       &copy->dstib,
    203 	       GL_TRUE,
    204 	       0,
    205 	       copy->dstbuf_nr - 1,
    206 	       NULL, 0, NULL );
    207 
    208    ctx->Array._DrawArrays = saved_arrays;
    209    ctx->NewDriverState |= ctx->DriverFlags.NewArray;
    210 
    211    /* Reset all pointers:
    212     */
    213    copy->dstprim_nr = 0;
    214    copy->dstelt_nr = 0;
    215    copy->dstbuf_nr = 0;
    216    copy->dstptr = copy->dstbuf;
    217 
    218    /* Clear the vertex cache:
    219     */
    220    for (i = 0; i < ELT_TABLE_SIZE; i++)
    221       copy->vert_cache[i].in = ~0;
    222 }
    223 
    224 
    225 /**
    226  * Called at begin of each primitive during replay.
    227  */
    228 static void
    229 begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag )
    230 {
    231    struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
    232 
    233    prim->mode = mode;
    234    prim->begin = begin_flag;
    235    prim->num_instances = 1;
    236 }
    237 
    238 
    239 /**
    240  * Use a hashtable to attempt to identify recently-emitted vertices
    241  * and avoid re-emitting them.
    242  */
    243 static GLuint
    244 elt(struct copy_context *copy, GLuint elt_idx)
    245 {
    246    GLuint elt = copy->srcelt[elt_idx] + copy->prim->basevertex;
    247    GLuint slot = elt & (ELT_TABLE_SIZE-1);
    248 
    249 /*    printf("elt %d\n", elt); */
    250 
    251    /* Look up the incoming element in the vertex cache.  Re-emit if
    252     * necessary.
    253     */
    254    if (copy->vert_cache[slot].in != elt) {
    255       GLubyte *csr = copy->dstptr;
    256       GLuint i;
    257 
    258 /*       printf("  --> emit to dstelt %d\n", copy->dstbuf_nr); */
    259 
    260       for (i = 0; i < copy->nr_varying; i++) {
    261 	 const struct gl_vertex_array *srcarray = copy->varying[i].array;
    262 	 const GLubyte *srcptr = copy->varying[i].src_ptr + elt * srcarray->StrideB;
    263 
    264 	 memcpy(csr, srcptr, copy->varying[i].size);
    265 	 csr += copy->varying[i].size;
    266 
    267 #ifdef NAN_CHECK
    268          if (srcarray->Type == GL_FLOAT) {
    269             GLuint k;
    270             GLfloat *f = (GLfloat *) srcptr;
    271             for (k = 0; k < srcarray->Size; k++) {
    272                assert(!IS_INF_OR_NAN(f[k]));
    273                assert(f[k] <= 1.0e20 && f[k] >= -1.0e20);
    274             }
    275          }
    276 #endif
    277 
    278 	 if (0)
    279 	 {
    280 	    const GLuint *f = (const GLuint *)srcptr;
    281 	    GLuint j;
    282 	    printf("  varying %d: ", i);
    283 	    for(j = 0; j < copy->varying[i].size / 4; j++)
    284 	       printf("%x ", f[j]);
    285 	    printf("\n");
    286 	 }
    287       }
    288 
    289       copy->vert_cache[slot].in = elt;
    290       copy->vert_cache[slot].out = copy->dstbuf_nr++;
    291       copy->dstptr += copy->vertex_size;
    292 
    293       assert(csr == copy->dstptr);
    294       assert(copy->dstptr == (copy->dstbuf +
    295                               copy->dstbuf_nr * copy->vertex_size));
    296    }
    297 /*    else */
    298 /*       printf("  --> reuse vertex\n"); */
    299 
    300 /*    printf("  --> emit %d\n", copy->vert_cache[slot].out); */
    301    copy->dstelt[copy->dstelt_nr++] = copy->vert_cache[slot].out;
    302    return check_flush(copy);
    303 }
    304 
    305 
    306 /**
    307  * Called at end of each primitive during replay.
    308  */
    309 static void
    310 end( struct copy_context *copy, GLboolean end_flag )
    311 {
    312    struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
    313 
    314 /*    printf("end (%d)\n", end_flag); */
    315 
    316    prim->end = end_flag;
    317    prim->count = copy->dstelt_nr - prim->start;
    318 
    319    if (++copy->dstprim_nr == MAX_PRIM ||
    320        check_flush(copy))
    321       flush(copy);
    322 }
    323 
    324 
    325 static void
    326 replay_elts( struct copy_context *copy )
    327 {
    328    GLuint i, j, k;
    329    GLboolean split;
    330 
    331    for (i = 0; i < copy->nr_prims; i++) {
    332       const struct _mesa_prim *prim = &copy->prim[i];
    333       const GLuint start = prim->start;
    334       GLuint first, incr;
    335 
    336       switch (prim->mode) {
    337 
    338       case GL_LINE_LOOP:
    339 	 /* Convert to linestrip and emit the final vertex explicitly,
    340 	  * but only in the resultant strip that requires it.
    341 	  */
    342 	 j = 0;
    343 	 while (j != prim->count) {
    344 	    begin(copy, GL_LINE_STRIP, prim->begin && j == 0);
    345 
    346 	    for (split = GL_FALSE; j != prim->count && !split; j++)
    347 	       split = elt(copy, start + j);
    348 
    349 	    if (j == prim->count) {
    350 	       /* Done, emit final line.  Split doesn't matter as
    351 		* it is always raised a bit early so we can emit
    352 		* the last verts if necessary!
    353 		*/
    354 	       if (prim->end)
    355 		  (void)elt(copy, start + 0);
    356 
    357 	       end(copy, prim->end);
    358 	    }
    359 	    else {
    360 	       /* Wrap
    361 		*/
    362 	       assert(split);
    363 	       end(copy, 0);
    364 	       j--;
    365 	    }
    366 	 }
    367 	 break;
    368 
    369       case GL_TRIANGLE_FAN:
    370       case GL_POLYGON:
    371 	 j = 2;
    372 	 while (j != prim->count) {
    373 	    begin(copy, prim->mode, prim->begin && j == 0);
    374 
    375 	    split = elt(copy, start+0);
    376 	    assert(!split);
    377 
    378 	    split = elt(copy, start+j-1);
    379 	    assert(!split);
    380 
    381 	    for (; j != prim->count && !split; j++)
    382 	       split = elt(copy, start+j);
    383 
    384 	    end(copy, prim->end && j == prim->count);
    385 
    386 	    if (j != prim->count) {
    387 	       /* Wrapped the primitive, need to repeat some vertices:
    388 		*/
    389 	       j -= 1;
    390 	    }
    391 	 }
    392 	 break;
    393 
    394       default:
    395 	 (void)split_prim_inplace(prim->mode, &first, &incr);
    396 
    397 	 j = 0;
    398 	 while (j != prim->count) {
    399 
    400 	    begin(copy, prim->mode, prim->begin && j == 0);
    401 
    402 	    split = 0;
    403 	    for (k = 0; k < first; k++, j++)
    404 	       split |= elt(copy, start+j);
    405 
    406 	    assert(!split);
    407 
    408 	    for (; j != prim->count && !split; )
    409 	       for (k = 0; k < incr; k++, j++)
    410 		  split |= elt(copy, start+j);
    411 
    412 	    end(copy, prim->end && j == prim->count);
    413 
    414 	    if (j != prim->count) {
    415 	       /* Wrapped the primitive, need to repeat some vertices:
    416 		*/
    417 	       assert(j > first - incr);
    418 	       j -= (first - incr);
    419 	    }
    420 	 }
    421 	 break;
    422       }
    423    }
    424 
    425    if (copy->dstprim_nr)
    426       flush(copy);
    427 }
    428 
    429 
    430 static void
    431 replay_init( struct copy_context *copy )
    432 {
    433    struct gl_context *ctx = copy->ctx;
    434    GLuint i;
    435    GLuint offset;
    436    const GLvoid *srcptr;
    437 
    438    /* Make a list of varying attributes and their vbo's.  Also
    439     * calculate vertex size.
    440     */
    441    copy->vertex_size = 0;
    442    for (i = 0; i < VERT_ATTRIB_MAX; i++) {
    443       struct gl_buffer_object *vbo = copy->array[i]->BufferObj;
    444 
    445       if (copy->array[i]->StrideB == 0) {
    446 	 copy->dstarray_ptr[i] = copy->array[i];
    447       }
    448       else {
    449 	 GLuint j = copy->nr_varying++;
    450 
    451 	 copy->varying[j].attr = i;
    452 	 copy->varying[j].array = copy->array[i];
    453 	 copy->varying[j].size = attr_size(copy->array[i]);
    454 	 copy->vertex_size += attr_size(copy->array[i]);
    455 
    456 	 if (_mesa_is_bufferobj(vbo) &&
    457              !_mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
    458 	    ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo,
    459                                        MAP_INTERNAL);
    460 
    461 	 copy->varying[j].src_ptr =
    462                ADD_POINTERS(vbo->Mappings[MAP_INTERNAL].Pointer,
    463                             copy->array[i]->Ptr);
    464 
    465 	 copy->dstarray_ptr[i] = &copy->varying[j].dstarray;
    466       }
    467    }
    468 
    469    /* There must always be an index buffer.  Currently require the
    470     * caller convert non-indexed prims to indexed.  Could alternately
    471     * do it internally.
    472     */
    473    if (_mesa_is_bufferobj(copy->ib->obj) &&
    474        !_mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL))
    475       ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT,
    476 				 copy->ib->obj, MAP_INTERNAL);
    477 
    478    srcptr = (const GLubyte *)
    479             ADD_POINTERS(copy->ib->obj->Mappings[MAP_INTERNAL].Pointer,
    480                          copy->ib->ptr);
    481 
    482    switch (copy->ib->type) {
    483    case GL_UNSIGNED_BYTE:
    484       copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
    485       copy->srcelt = copy->translated_elt_buf;
    486 
    487       for (i = 0; i < copy->ib->count; i++)
    488 	 copy->translated_elt_buf[i] = ((const GLubyte *)srcptr)[i];
    489       break;
    490 
    491    case GL_UNSIGNED_SHORT:
    492       copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
    493       copy->srcelt = copy->translated_elt_buf;
    494 
    495       for (i = 0; i < copy->ib->count; i++)
    496 	 copy->translated_elt_buf[i] = ((const GLushort *)srcptr)[i];
    497       break;
    498 
    499    case GL_UNSIGNED_INT:
    500       copy->translated_elt_buf = NULL;
    501       copy->srcelt = (const GLuint *)srcptr;
    502       break;
    503    }
    504 
    505    /* Figure out the maximum allowed vertex buffer size:
    506     */
    507    if (copy->vertex_size * copy->limits->max_verts <= copy->limits->max_vb_size) {
    508       copy->dstbuf_size = copy->limits->max_verts;
    509    }
    510    else {
    511       copy->dstbuf_size = copy->limits->max_vb_size / copy->vertex_size;
    512    }
    513 
    514    /* Allocate an output vertex buffer:
    515     *
    516     * XXX:  This should be a VBO!
    517     */
    518    copy->dstbuf = malloc(copy->dstbuf_size * copy->vertex_size);
    519    copy->dstptr = copy->dstbuf;
    520 
    521    /* Setup new vertex arrays to point into the output buffer:
    522     */
    523    for (offset = 0, i = 0; i < copy->nr_varying; i++) {
    524       const struct gl_vertex_array *src = copy->varying[i].array;
    525       struct gl_vertex_array *dst = &copy->varying[i].dstarray;
    526 
    527       dst->Size = src->Size;
    528       dst->Type = src->Type;
    529       dst->Format = GL_RGBA;
    530       dst->StrideB = copy->vertex_size;
    531       dst->Ptr = copy->dstbuf + offset;
    532       dst->Normalized = src->Normalized;
    533       dst->Integer = src->Integer;
    534       dst->Doubles = src->Doubles;
    535       dst->BufferObj = ctx->Shared->NullBufferObj;
    536       dst->_ElementSize = src->_ElementSize;
    537 
    538       offset += copy->varying[i].size;
    539    }
    540 
    541    /* Allocate an output element list:
    542     */
    543    copy->dstelt_size = MIN2(65536,
    544 			    copy->ib->count * 2 + 3);
    545    copy->dstelt_size = MIN2(copy->dstelt_size,
    546 			    copy->limits->max_indices);
    547    copy->dstelt = malloc(sizeof(GLuint) * copy->dstelt_size);
    548    copy->dstelt_nr = 0;
    549 
    550    /* Setup the new index buffer to point to the allocated element
    551     * list:
    552     */
    553    copy->dstib.count = 0;	/* duplicates dstelt_nr */
    554    copy->dstib.type = GL_UNSIGNED_INT;
    555    copy->dstib.obj = ctx->Shared->NullBufferObj;
    556    copy->dstib.ptr = copy->dstelt;
    557 }
    558 
    559 
    560 /**
    561  * Free up everything allocated during split/replay.
    562  */
    563 static void
    564 replay_finish( struct copy_context *copy )
    565 {
    566    struct gl_context *ctx = copy->ctx;
    567    GLuint i;
    568 
    569    /* Free our vertex and index buffers:
    570     */
    571    free(copy->translated_elt_buf);
    572    free(copy->dstbuf);
    573    free(copy->dstelt);
    574 
    575    /* Unmap VBO's
    576     */
    577    for (i = 0; i < copy->nr_varying; i++) {
    578       struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj;
    579       if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
    580 	 ctx->Driver.UnmapBuffer(ctx, vbo, MAP_INTERNAL);
    581    }
    582 
    583    /* Unmap index buffer:
    584     */
    585    if (_mesa_is_bufferobj(copy->ib->obj) &&
    586        _mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL)) {
    587       ctx->Driver.UnmapBuffer(ctx, copy->ib->obj, MAP_INTERNAL);
    588    }
    589 }
    590 
    591 
    592 /**
    593  * Split VBO into smaller pieces, draw the pieces.
    594  */
    595 void vbo_split_copy( struct gl_context *ctx,
    596 		     const struct gl_vertex_array *arrays[],
    597 		     const struct _mesa_prim *prim,
    598 		     GLuint nr_prims,
    599 		     const struct _mesa_index_buffer *ib,
    600 		     vbo_draw_func draw,
    601 		     const struct split_limits *limits )
    602 {
    603    struct copy_context copy;
    604    GLuint i, this_nr_prims;
    605 
    606    for (i = 0; i < nr_prims;) {
    607       /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices
    608        * will rebase the elements to the basevertex, and we'll only
    609        * emit strings of prims with the same basevertex in one draw call.
    610        */
    611       for (this_nr_prims = 1; i + this_nr_prims < nr_prims;
    612 	   this_nr_prims++) {
    613 	 if (prim[i].basevertex != prim[i + this_nr_prims].basevertex)
    614 	    break;
    615       }
    616 
    617       memset(&copy, 0, sizeof(copy));
    618 
    619       /* Require indexed primitives:
    620        */
    621       assert(ib);
    622 
    623       copy.ctx = ctx;
    624       copy.array = arrays;
    625       copy.prim = &prim[i];
    626       copy.nr_prims = this_nr_prims;
    627       copy.ib = ib;
    628       copy.draw = draw;
    629       copy.limits = limits;
    630 
    631       /* Clear the vertex cache:
    632        */
    633       for (i = 0; i < ELT_TABLE_SIZE; i++)
    634 	 copy.vert_cache[i].in = ~0;
    635 
    636       replay_init(&copy);
    637       replay_elts(&copy);
    638       replay_finish(&copy);
    639    }
    640 }
    641