Home | History | Annotate | Download | only in vbo
      1 
      2 /*
      3  * Mesa 3-D graphics library
      4  * Version:  6.5
      5  *
      6  * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
      7  *
      8  * Permission is hereby granted, free of charge, to any person obtaining a
      9  * copy of this software and associated documentation files (the "Software"),
     10  * to deal in the Software without restriction, including without limitation
     11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     12  * and/or sell copies of the Software, and to permit persons to whom the
     13  * Software is furnished to do so, subject to the following conditions:
     14  *
     15  * The above copyright notice and this permission notice shall be included
     16  * in all copies or substantial portions of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     21  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
     22  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     23  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     24  *
     25  * Authors:
     26  *    Keith Whitwell <keith (at) tungstengraphics.com>
     27  */
     28 
     29 /* Split indexed primitives with per-vertex copying.
     30  */
     31 
     32 #include "main/glheader.h"
     33 #include "main/bufferobj.h"
     34 #include "main/imports.h"
     35 #include "main/glformats.h"
     36 #include "main/macros.h"
     37 #include "main/mtypes.h"
     38 
     39 #include "vbo_split.h"
     40 #include "vbo.h"
     41 
     42 
     43 #define ELT_TABLE_SIZE 16
     44 
     45 /**
     46  * Used for vertex-level splitting of indexed buffers.  Note that
     47  * non-indexed primitives may be converted to indexed in some cases
     48  * (eg loops, fans) in order to use this splitting path.
     49  */
     50 struct copy_context {
     51 
     52    struct gl_context *ctx;
     53    const struct gl_client_array **array;
     54    const struct _mesa_prim *prim;
     55    GLuint nr_prims;
     56    const struct _mesa_index_buffer *ib;
     57    vbo_draw_func draw;
     58 
     59    const struct split_limits *limits;
     60 
     61    struct {
     62       GLuint attr;
     63       GLuint size;
     64       const struct gl_client_array *array;
     65       const GLubyte *src_ptr;
     66 
     67       struct gl_client_array dstarray;
     68 
     69    } varying[VERT_ATTRIB_MAX];
     70    GLuint nr_varying;
     71 
     72    const struct gl_client_array *dstarray_ptr[VERT_ATTRIB_MAX];
     73    struct _mesa_index_buffer dstib;
     74 
     75    GLuint *translated_elt_buf;
     76    const GLuint *srcelt;
     77 
     78    /** A baby hash table to avoid re-emitting (some) duplicate
     79     * vertices when splitting indexed primitives.
     80     */
     81    struct {
     82       GLuint in;
     83       GLuint out;
     84    } vert_cache[ELT_TABLE_SIZE];
     85 
     86    GLuint vertex_size;
     87    GLubyte *dstbuf;
     88    GLubyte *dstptr;     /**< dstptr == dstbuf + dstelt_max * vertsize */
     89    GLuint dstbuf_size;  /**< in vertices */
     90    GLuint dstbuf_nr;    /**< count of emitted vertices, also the largest value
     91                          * in dstelt.  Our MaxIndex.
     92                          */
     93 
     94    GLuint *dstelt;
     95    GLuint dstelt_nr;
     96    GLuint dstelt_size;
     97 
     98 #define MAX_PRIM 32
     99    struct _mesa_prim dstprim[MAX_PRIM];
    100    GLuint dstprim_nr;
    101 
    102 };
    103 
    104 
    105 static GLuint attr_size( const struct gl_client_array *array )
    106 {
    107    return array->Size * _mesa_sizeof_type(array->Type);
    108 }
    109 
    110 
    111 /**
    112  * Starts returning true slightly before the buffer fills, to ensure
    113  * that there is sufficient room for any remaining vertices to finish
    114  * off the prim:
    115  */
    116 static GLboolean
    117 check_flush( struct copy_context *copy )
    118 {
    119    GLenum mode = copy->dstprim[copy->dstprim_nr].mode;
    120 
    121    if (GL_TRIANGLE_STRIP == mode &&
    122        copy->dstelt_nr & 1) { /* see bug9962 */
    123        return GL_FALSE;
    124    }
    125 
    126    if (copy->dstbuf_nr + 4 > copy->dstbuf_size)
    127       return GL_TRUE;
    128 
    129    if (copy->dstelt_nr + 4 > copy->dstelt_size)
    130       return GL_TRUE;
    131 
    132    return GL_FALSE;
    133 }
    134 
    135 
    136 /**
    137  * Dump the parameters/info for a vbo->draw() call.
    138  */
    139 static void
    140 dump_draw_info(struct gl_context *ctx,
    141                const struct gl_client_array **arrays,
    142                const struct _mesa_prim *prims,
    143                GLuint nr_prims,
    144                const struct _mesa_index_buffer *ib,
    145                GLuint min_index,
    146                GLuint max_index)
    147 {
    148    GLuint i, j;
    149 
    150    printf("VBO Draw:\n");
    151    for (i = 0; i < nr_prims; i++) {
    152       printf("Prim %u of %u\n", i, nr_prims);
    153       printf("  Prim mode 0x%x\n", prims[i].mode);
    154       printf("  IB: %p\n", (void*) ib);
    155       for (j = 0; j < VERT_ATTRIB_MAX; j++) {
    156          printf("    array %d at %p:\n", j, (void*) arrays[j]);
    157          printf("      enabled %d, ptr %p, size %d, type 0x%x, stride %d\n",
    158 		arrays[j]->Enabled, arrays[j]->Ptr,
    159 		arrays[j]->Size, arrays[j]->Type, arrays[j]->StrideB);
    160          if (0) {
    161             GLint k = prims[i].start + prims[i].count - 1;
    162             GLfloat *last = (GLfloat *) (arrays[j]->Ptr + arrays[j]->Stride * k);
    163             printf("        last: %f %f %f\n",
    164 		   last[0], last[1], last[2]);
    165          }
    166       }
    167    }
    168 }
    169 
    170 
    171 static void
    172 flush( struct copy_context *copy )
    173 {
    174    struct gl_context *ctx = copy->ctx;
    175    const struct gl_client_array **saved_arrays = ctx->Array._DrawArrays;
    176    GLuint i;
    177 
    178    /* Set some counters:
    179     */
    180    copy->dstib.count = copy->dstelt_nr;
    181 
    182 #if 0
    183    dump_draw_info(copy->ctx,
    184                   copy->dstarray_ptr,
    185                   copy->dstprim,
    186                   copy->dstprim_nr,
    187                   &copy->dstib,
    188                   0,
    189                   copy->dstbuf_nr);
    190 #else
    191    (void) dump_draw_info;
    192 #endif
    193 
    194    ctx->Array._DrawArrays = copy->dstarray_ptr;
    195    ctx->NewDriverState |= ctx->DriverFlags.NewArray;
    196 
    197    copy->draw( ctx,
    198 	       copy->dstprim,
    199 	       copy->dstprim_nr,
    200 	       &copy->dstib,
    201 	       GL_TRUE,
    202 	       0,
    203 	       copy->dstbuf_nr - 1,
    204 	       NULL );
    205 
    206    ctx->Array._DrawArrays = saved_arrays;
    207    ctx->NewDriverState |= ctx->DriverFlags.NewArray;
    208 
    209    /* Reset all pointers:
    210     */
    211    copy->dstprim_nr = 0;
    212    copy->dstelt_nr = 0;
    213    copy->dstbuf_nr = 0;
    214    copy->dstptr = copy->dstbuf;
    215 
    216    /* Clear the vertex cache:
    217     */
    218    for (i = 0; i < ELT_TABLE_SIZE; i++)
    219       copy->vert_cache[i].in = ~0;
    220 }
    221 
    222 
    223 /**
    224  * Called at begin of each primitive during replay.
    225  */
    226 static void
    227 begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag )
    228 {
    229    struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
    230 
    231    prim->mode = mode;
    232    prim->begin = begin_flag;
    233    prim->num_instances = 1;
    234 }
    235 
    236 
    237 /**
    238  * Use a hashtable to attempt to identify recently-emitted vertices
    239  * and avoid re-emitting them.
    240  */
    241 static GLuint
    242 elt(struct copy_context *copy, GLuint elt_idx)
    243 {
    244    GLuint elt = copy->srcelt[elt_idx];
    245    GLuint slot = elt & (ELT_TABLE_SIZE-1);
    246 
    247 /*    printf("elt %d\n", elt); */
    248 
    249    /* Look up the incoming element in the vertex cache.  Re-emit if
    250     * necessary.
    251     */
    252    if (copy->vert_cache[slot].in != elt) {
    253       GLubyte *csr = copy->dstptr;
    254       GLuint i;
    255 
    256 /*       printf("  --> emit to dstelt %d\n", copy->dstbuf_nr); */
    257 
    258       for (i = 0; i < copy->nr_varying; i++) {
    259 	 const struct gl_client_array *srcarray = copy->varying[i].array;
    260 	 const GLubyte *srcptr = copy->varying[i].src_ptr + elt * srcarray->StrideB;
    261 
    262 	 memcpy(csr, srcptr, copy->varying[i].size);
    263 	 csr += copy->varying[i].size;
    264 
    265 #ifdef NAN_CHECK
    266          if (srcarray->Type == GL_FLOAT) {
    267             GLuint k;
    268             GLfloat *f = (GLfloat *) srcptr;
    269             for (k = 0; k < srcarray->Size; k++) {
    270                assert(!IS_INF_OR_NAN(f[k]));
    271                assert(f[k] <= 1.0e20 && f[k] >= -1.0e20);
    272             }
    273          }
    274 #endif
    275 
    276 	 if (0)
    277 	 {
    278 	    const GLuint *f = (const GLuint *)srcptr;
    279 	    GLuint j;
    280 	    printf("  varying %d: ", i);
    281 	    for(j = 0; j < copy->varying[i].size / 4; j++)
    282 	       printf("%x ", f[j]);
    283 	    printf("\n");
    284 	 }
    285       }
    286 
    287       copy->vert_cache[slot].in = elt;
    288       copy->vert_cache[slot].out = copy->dstbuf_nr++;
    289       copy->dstptr += copy->vertex_size;
    290 
    291       assert(csr == copy->dstptr);
    292       assert(copy->dstptr == (copy->dstbuf +
    293                               copy->dstbuf_nr * copy->vertex_size));
    294    }
    295 /*    else */
    296 /*       printf("  --> reuse vertex\n"); */
    297 
    298 /*    printf("  --> emit %d\n", copy->vert_cache[slot].out); */
    299    copy->dstelt[copy->dstelt_nr++] = copy->vert_cache[slot].out;
    300    return check_flush(copy);
    301 }
    302 
    303 
    304 /**
    305  * Called at end of each primitive during replay.
    306  */
    307 static void
    308 end( struct copy_context *copy, GLboolean end_flag )
    309 {
    310    struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
    311 
    312 /*    printf("end (%d)\n", end_flag); */
    313 
    314    prim->end = end_flag;
    315    prim->count = copy->dstelt_nr - prim->start;
    316 
    317    if (++copy->dstprim_nr == MAX_PRIM ||
    318        check_flush(copy))
    319       flush(copy);
    320 }
    321 
    322 
    323 static void
    324 replay_elts( struct copy_context *copy )
    325 {
    326    GLuint i, j, k;
    327    GLboolean split;
    328 
    329    for (i = 0; i < copy->nr_prims; i++) {
    330       const struct _mesa_prim *prim = &copy->prim[i];
    331       const GLuint start = prim->start;
    332       GLuint first, incr;
    333 
    334       switch (prim->mode) {
    335 
    336       case GL_LINE_LOOP:
    337 	 /* Convert to linestrip and emit the final vertex explicitly,
    338 	  * but only in the resultant strip that requires it.
    339 	  */
    340 	 j = 0;
    341 	 while (j != prim->count) {
    342 	    begin(copy, GL_LINE_STRIP, prim->begin && j == 0);
    343 
    344 	    for (split = GL_FALSE; j != prim->count && !split; j++)
    345 	       split = elt(copy, start + j);
    346 
    347 	    if (j == prim->count) {
    348 	       /* Done, emit final line.  Split doesn't matter as
    349 		* it is always raised a bit early so we can emit
    350 		* the last verts if necessary!
    351 		*/
    352 	       if (prim->end)
    353 		  (void)elt(copy, start + 0);
    354 
    355 	       end(copy, prim->end);
    356 	    }
    357 	    else {
    358 	       /* Wrap
    359 		*/
    360 	       assert(split);
    361 	       end(copy, 0);
    362 	       j--;
    363 	    }
    364 	 }
    365 	 break;
    366 
    367       case GL_TRIANGLE_FAN:
    368       case GL_POLYGON:
    369 	 j = 2;
    370 	 while (j != prim->count) {
    371 	    begin(copy, prim->mode, prim->begin && j == 0);
    372 
    373 	    split = elt(copy, start+0);
    374 	    assert(!split);
    375 
    376 	    split = elt(copy, start+j-1);
    377 	    assert(!split);
    378 
    379 	    for (; j != prim->count && !split; j++)
    380 	       split = elt(copy, start+j);
    381 
    382 	    end(copy, prim->end && j == prim->count);
    383 
    384 	    if (j != prim->count) {
    385 	       /* Wrapped the primitive, need to repeat some vertices:
    386 		*/
    387 	       j -= 1;
    388 	    }
    389 	 }
    390 	 break;
    391 
    392       default:
    393 	 (void)split_prim_inplace(prim->mode, &first, &incr);
    394 
    395 	 j = 0;
    396 	 while (j != prim->count) {
    397 
    398 	    begin(copy, prim->mode, prim->begin && j == 0);
    399 
    400 	    split = 0;
    401 	    for (k = 0; k < first; k++, j++)
    402 	       split |= elt(copy, start+j);
    403 
    404 	    assert(!split);
    405 
    406 	    for (; j != prim->count && !split; )
    407 	       for (k = 0; k < incr; k++, j++)
    408 		  split |= elt(copy, start+j);
    409 
    410 	    end(copy, prim->end && j == prim->count);
    411 
    412 	    if (j != prim->count) {
    413 	       /* Wrapped the primitive, need to repeat some vertices:
    414 		*/
    415 	       assert(j > first - incr);
    416 	       j -= (first - incr);
    417 	    }
    418 	 }
    419 	 break;
    420       }
    421    }
    422 
    423    if (copy->dstprim_nr)
    424       flush(copy);
    425 }
    426 
    427 
    428 static void
    429 replay_init( struct copy_context *copy )
    430 {
    431    struct gl_context *ctx = copy->ctx;
    432    GLuint i;
    433    GLuint offset;
    434    const GLvoid *srcptr;
    435 
    436    /* Make a list of varying attributes and their vbo's.  Also
    437     * calculate vertex size.
    438     */
    439    copy->vertex_size = 0;
    440    for (i = 0; i < VERT_ATTRIB_MAX; i++) {
    441       struct gl_buffer_object *vbo = copy->array[i]->BufferObj;
    442 
    443       if (copy->array[i]->StrideB == 0) {
    444 	 copy->dstarray_ptr[i] = copy->array[i];
    445       }
    446       else {
    447 	 GLuint j = copy->nr_varying++;
    448 
    449 	 copy->varying[j].attr = i;
    450 	 copy->varying[j].array = copy->array[i];
    451 	 copy->varying[j].size = attr_size(copy->array[i]);
    452 	 copy->vertex_size += attr_size(copy->array[i]);
    453 
    454 	 if (_mesa_is_bufferobj(vbo) && !_mesa_bufferobj_mapped(vbo))
    455 	    ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo);
    456 
    457 	 copy->varying[j].src_ptr = ADD_POINTERS(vbo->Pointer,
    458 						 copy->array[i]->Ptr);
    459 
    460 	 copy->dstarray_ptr[i] = &copy->varying[j].dstarray;
    461       }
    462    }
    463 
    464    /* There must always be an index buffer.  Currently require the
    465     * caller convert non-indexed prims to indexed.  Could alternately
    466     * do it internally.
    467     */
    468    if (_mesa_is_bufferobj(copy->ib->obj) &&
    469        !_mesa_bufferobj_mapped(copy->ib->obj))
    470       ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT,
    471 				 copy->ib->obj);
    472 
    473    srcptr = (const GLubyte *) ADD_POINTERS(copy->ib->obj->Pointer,
    474                                            copy->ib->ptr);
    475 
    476    switch (copy->ib->type) {
    477    case GL_UNSIGNED_BYTE:
    478       copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
    479       copy->srcelt = copy->translated_elt_buf;
    480 
    481       for (i = 0; i < copy->ib->count; i++)
    482 	 copy->translated_elt_buf[i] = ((const GLubyte *)srcptr)[i];
    483       break;
    484 
    485    case GL_UNSIGNED_SHORT:
    486       copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
    487       copy->srcelt = copy->translated_elt_buf;
    488 
    489       for (i = 0; i < copy->ib->count; i++)
    490 	 copy->translated_elt_buf[i] = ((const GLushort *)srcptr)[i];
    491       break;
    492 
    493    case GL_UNSIGNED_INT:
    494       copy->translated_elt_buf = NULL;
    495       copy->srcelt = (const GLuint *)srcptr;
    496       break;
    497    }
    498 
    499    /* Figure out the maximum allowed vertex buffer size:
    500     */
    501    if (copy->vertex_size * copy->limits->max_verts <= copy->limits->max_vb_size) {
    502       copy->dstbuf_size = copy->limits->max_verts;
    503    }
    504    else {
    505       copy->dstbuf_size = copy->limits->max_vb_size / copy->vertex_size;
    506    }
    507 
    508    /* Allocate an output vertex buffer:
    509     *
    510     * XXX:  This should be a VBO!
    511     */
    512    copy->dstbuf = malloc(copy->dstbuf_size * copy->vertex_size);
    513    copy->dstptr = copy->dstbuf;
    514 
    515    /* Setup new vertex arrays to point into the output buffer:
    516     */
    517    for (offset = 0, i = 0; i < copy->nr_varying; i++) {
    518       const struct gl_client_array *src = copy->varying[i].array;
    519       struct gl_client_array *dst = &copy->varying[i].dstarray;
    520 
    521       dst->Size = src->Size;
    522       dst->Type = src->Type;
    523       dst->Format = GL_RGBA;
    524       dst->Stride = copy->vertex_size;
    525       dst->StrideB = copy->vertex_size;
    526       dst->Ptr = copy->dstbuf + offset;
    527       dst->Enabled = GL_TRUE;
    528       dst->Normalized = src->Normalized;
    529       dst->Integer = src->Integer;
    530       dst->BufferObj = ctx->Shared->NullBufferObj;
    531       dst->_ElementSize = src->_ElementSize;
    532       dst->_MaxElement = copy->dstbuf_size; /* may be less! */
    533 
    534       offset += copy->varying[i].size;
    535    }
    536 
    537    /* Allocate an output element list:
    538     */
    539    copy->dstelt_size = MIN2(65536,
    540 			    copy->ib->count * 2 + 3);
    541    copy->dstelt_size = MIN2(copy->dstelt_size,
    542 			    copy->limits->max_indices);
    543    copy->dstelt = malloc(sizeof(GLuint) * copy->dstelt_size);
    544    copy->dstelt_nr = 0;
    545 
    546    /* Setup the new index buffer to point to the allocated element
    547     * list:
    548     */
    549    copy->dstib.count = 0;	/* duplicates dstelt_nr */
    550    copy->dstib.type = GL_UNSIGNED_INT;
    551    copy->dstib.obj = ctx->Shared->NullBufferObj;
    552    copy->dstib.ptr = copy->dstelt;
    553 }
    554 
    555 
    556 /**
    557  * Free up everything allocated during split/replay.
    558  */
    559 static void
    560 replay_finish( struct copy_context *copy )
    561 {
    562    struct gl_context *ctx = copy->ctx;
    563    GLuint i;
    564 
    565    /* Free our vertex and index buffers:
    566     */
    567    free(copy->translated_elt_buf);
    568    free(copy->dstbuf);
    569    free(copy->dstelt);
    570 
    571    /* Unmap VBO's
    572     */
    573    for (i = 0; i < copy->nr_varying; i++) {
    574       struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj;
    575       if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo))
    576 	 ctx->Driver.UnmapBuffer(ctx, vbo);
    577    }
    578 
    579    /* Unmap index buffer:
    580     */
    581    if (_mesa_is_bufferobj(copy->ib->obj) &&
    582        _mesa_bufferobj_mapped(copy->ib->obj)) {
    583       ctx->Driver.UnmapBuffer(ctx, copy->ib->obj);
    584    }
    585 }
    586 
    587 
    588 /**
    589  * Split VBO into smaller pieces, draw the pieces.
    590  */
    591 void vbo_split_copy( struct gl_context *ctx,
    592 		     const struct gl_client_array *arrays[],
    593 		     const struct _mesa_prim *prim,
    594 		     GLuint nr_prims,
    595 		     const struct _mesa_index_buffer *ib,
    596 		     vbo_draw_func draw,
    597 		     const struct split_limits *limits )
    598 {
    599    struct copy_context copy;
    600    GLuint i, this_nr_prims;
    601 
    602    for (i = 0; i < nr_prims;) {
    603       /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices
    604        * will rebase the elements to the basevertex, and we'll only
    605        * emit strings of prims with the same basevertex in one draw call.
    606        */
    607       for (this_nr_prims = 1; i + this_nr_prims < nr_prims;
    608 	   this_nr_prims++) {
    609 	 if (prim[i].basevertex != prim[i + this_nr_prims].basevertex)
    610 	    break;
    611       }
    612 
    613       memset(&copy, 0, sizeof(copy));
    614 
    615       /* Require indexed primitives:
    616        */
    617       assert(ib);
    618 
    619       copy.ctx = ctx;
    620       copy.array = arrays;
    621       copy.prim = &prim[i];
    622       copy.nr_prims = this_nr_prims;
    623       copy.ib = ib;
    624       copy.draw = draw;
    625       copy.limits = limits;
    626 
    627       /* Clear the vertex cache:
    628        */
    629       for (i = 0; i < ELT_TABLE_SIZE; i++)
    630 	 copy.vert_cache[i].in = ~0;
    631 
    632       replay_init(&copy);
    633       replay_elts(&copy);
    634       replay_finish(&copy);
    635    }
    636 }
    637