Home | History | Annotate | Download | only in libagl
      1 /* libs/opengles/primitives.cpp
      2 **
      3 ** Copyright 2006, The Android Open Source Project
      4 **
      5 ** Licensed under the Apache License, Version 2.0 (the "License");
      6 ** you may not use this file except in compliance with the License.
      7 ** You may obtain a copy of the License at
      8 **
      9 **     http://www.apache.org/licenses/LICENSE-2.0
     10 **
     11 ** Unless required by applicable law or agreed to in writing, software
     12 ** distributed under the License is distributed on an "AS IS" BASIS,
     13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 ** See the License for the specific language governing permissions and
     15 ** limitations under the License.
     16 */
     17 
     18 #include <stdio.h>
     19 #include <stdlib.h>
     20 #include <math.h>
     21 
     22 #include "context.h"
     23 #include "primitives.h"
     24 #include "light.h"
     25 #include "matrix.h"
     26 #include "vertex.h"
     27 #include "fp.h"
     28 #include "TextureObjectManager.h"
     29 
     30 extern "C" void iterators0032(const void* that,
     31         int32_t* it, int32_t c0, int32_t c1, int32_t c2);
     32 
     33 namespace android {
     34 
     35 // ----------------------------------------------------------------------------
     36 
     37 static void primitive_point(ogles_context_t* c, vertex_t* v);
     38 static void primitive_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1);
     39 static void primitive_clip_triangle(ogles_context_t* c,
     40         vertex_t* v0, vertex_t* v1, vertex_t* v2);
     41 
     42 static void primitive_nop_point(ogles_context_t* c, vertex_t* v);
     43 static void primitive_nop_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1);
     44 static void primitive_nop_triangle(ogles_context_t* c,
     45         vertex_t* v0, vertex_t* v1, vertex_t* v2);
     46 
     47 static inline bool cull_triangle(ogles_context_t* c,
     48         vertex_t* v0, vertex_t* v1, vertex_t* v2);
     49 
     50 static void lerp_triangle(ogles_context_t* c,
     51         vertex_t* v0, vertex_t* v1, vertex_t* v2);
     52 
     53 static void lerp_texcoords(ogles_context_t* c,
     54         vertex_t* v0, vertex_t* v1, vertex_t* v2);
     55 
     56 static void lerp_texcoords_w(ogles_context_t* c,
     57         vertex_t* v0, vertex_t* v1, vertex_t* v2);
     58 
     59 static void triangle(ogles_context_t* c,
     60         vertex_t* v0, vertex_t* v1, vertex_t* v2);
     61 
     62 static void clip_triangle(ogles_context_t* c,
     63         vertex_t* v0, vertex_t* v1, vertex_t* v2);
     64 
     65 static unsigned int clip_line(ogles_context_t* c,
     66         vertex_t* s, vertex_t* p);
     67 
     68 // ----------------------------------------------------------------------------
     69 #if 0
     70 #pragma mark -
     71 #endif
     72 
     73 static void lightTriangleDarkSmooth(ogles_context_t* c,
     74         vertex_t* v0, vertex_t* v1, vertex_t* v2)
     75 {
     76     if (!(v0->flags & vertex_t::LIT)) {
     77         v0->flags |= vertex_t::LIT;
     78         const GLvoid* cp = c->arrays.color.element(
     79                 v0->index & vertex_cache_t::INDEX_MASK);
     80         c->arrays.color.fetch(c, v0->color.v, cp);
     81     }
     82     if (!(v1->flags & vertex_t::LIT)) {
     83         v1->flags |= vertex_t::LIT;
     84         const GLvoid* cp = c->arrays.color.element(
     85                 v1->index & vertex_cache_t::INDEX_MASK);
     86         c->arrays.color.fetch(c, v1->color.v, cp);
     87     }
     88     if(!(v2->flags & vertex_t::LIT)) {
     89         v2->flags |= vertex_t::LIT;
     90         const GLvoid* cp = c->arrays.color.element(
     91                 v2->index & vertex_cache_t::INDEX_MASK);
     92         c->arrays.color.fetch(c, v2->color.v, cp);
     93     }
     94 }
     95 
     96 static void lightTriangleDarkFlat(ogles_context_t* c,
     97         vertex_t* /*v0*/, vertex_t* /*v1*/, vertex_t* v2)
     98 {
     99     if (!(v2->flags & vertex_t::LIT)) {
    100         v2->flags |= vertex_t::LIT;
    101         const GLvoid* cp = c->arrays.color.element(
    102                 v2->index & vertex_cache_t::INDEX_MASK);
    103         c->arrays.color.fetch(c, v2->color.v, cp);
    104     }
    105     // configure the rasterizer here, before we clip
    106     c->rasterizer.procs.color4xv(c, v2->color.v);
    107 }
    108 
    109 static void lightTriangleSmooth(ogles_context_t* c,
    110         vertex_t* v0, vertex_t* v1, vertex_t* v2)
    111 {
    112     if (!(v0->flags & vertex_t::LIT))
    113         c->lighting.lightVertex(c, v0);
    114     if (!(v1->flags & vertex_t::LIT))
    115         c->lighting.lightVertex(c, v1);
    116     if(!(v2->flags & vertex_t::LIT))
    117         c->lighting.lightVertex(c, v2);
    118 }
    119 
    120 static void lightTriangleFlat(ogles_context_t* c,
    121         vertex_t* /*v0*/, vertex_t* /*v1*/, vertex_t* v2)
    122 {
    123     if (!(v2->flags & vertex_t::LIT))
    124         c->lighting.lightVertex(c, v2);
    125     // configure the rasterizer here, before we clip
    126     c->rasterizer.procs.color4xv(c, v2->color.v);
    127 }
    128 
    129 // The fog versions...
    130 
    131 static inline
    132 void lightVertexDarkSmoothFog(ogles_context_t* c, vertex_t* v)
    133 {
    134     if (!(v->flags & vertex_t::LIT)) {
    135         v->flags |= vertex_t::LIT;
    136         v->fog = c->fog.fog(c, v->eye.z);
    137         const GLvoid* cp = c->arrays.color.element(
    138                 v->index & vertex_cache_t::INDEX_MASK);
    139         c->arrays.color.fetch(c, v->color.v, cp);
    140     }
    141 }
    142 static inline
    143 void lightVertexDarkFlatFog(ogles_context_t* c, vertex_t* v)
    144 {
    145     if (!(v->flags & vertex_t::LIT)) {
    146         v->flags |= vertex_t::LIT;
    147         v->fog = c->fog.fog(c, v->eye.z);
    148     }
    149 }
    150 static inline
    151 void lightVertexSmoothFog(ogles_context_t* c, vertex_t* v)
    152 {
    153     if (!(v->flags & vertex_t::LIT)) {
    154         v->fog = c->fog.fog(c, v->eye.z);
    155         c->lighting.lightVertex(c, v);
    156     }
    157 }
    158 
    159 static void lightTriangleDarkSmoothFog(ogles_context_t* c,
    160         vertex_t* v0, vertex_t* v1, vertex_t* v2)
    161 {
    162     lightVertexDarkSmoothFog(c, v0);
    163     lightVertexDarkSmoothFog(c, v1);
    164     lightVertexDarkSmoothFog(c, v2);
    165 }
    166 
    167 static void lightTriangleDarkFlatFog(ogles_context_t* c,
    168         vertex_t* v0, vertex_t* v1, vertex_t* v2)
    169 {
    170     lightVertexDarkFlatFog(c, v0);
    171     lightVertexDarkFlatFog(c, v1);
    172     lightVertexDarkSmoothFog(c, v2);
    173     // configure the rasterizer here, before we clip
    174     c->rasterizer.procs.color4xv(c, v2->color.v);
    175 }
    176 
    177 static void lightTriangleSmoothFog(ogles_context_t* c,
    178         vertex_t* v0, vertex_t* v1, vertex_t* v2)
    179 {
    180     lightVertexSmoothFog(c, v0);
    181     lightVertexSmoothFog(c, v1);
    182     lightVertexSmoothFog(c, v2);
    183 }
    184 
    185 static void lightTriangleFlatFog(ogles_context_t* c,
    186         vertex_t* v0, vertex_t* v1, vertex_t* v2)
    187 {
    188     lightVertexDarkFlatFog(c, v0);
    189     lightVertexDarkFlatFog(c, v1);
    190     lightVertexSmoothFog(c, v2);
    191     // configure the rasterizer here, before we clip
    192     c->rasterizer.procs.color4xv(c, v2->color.v);
    193 }
    194 
    195 
    196 
    197 typedef void (*light_primitive_t)(ogles_context_t*,
    198         vertex_t*, vertex_t*, vertex_t*);
    199 
    200 // fog 0x4, light 0x2, smooth 0x1
    201 static const light_primitive_t lightPrimitive[8] = {
    202     lightTriangleDarkFlat,          // no fog | dark  | flat
    203     lightTriangleDarkSmooth,        // no fog | dark  | smooth
    204     lightTriangleFlat,              // no fog | light | flat
    205     lightTriangleSmooth,            // no fog | light | smooth
    206     lightTriangleDarkFlatFog,       // fog    | dark  | flat
    207     lightTriangleDarkSmoothFog,     // fog    | dark  | smooth
    208     lightTriangleFlatFog,           // fog    | light | flat
    209     lightTriangleSmoothFog          // fog    | light | smooth
    210 };
    211 
    212 void ogles_validate_primitives(ogles_context_t* c)
    213 {
    214     const uint32_t enables = c->rasterizer.state.enables;
    215 
    216     // set up the lighting/shading/smoothing/fogging function
    217     int index = enables & GGL_ENABLE_SMOOTH ? 0x1 : 0;
    218     index |= c->lighting.enable ? 0x2 : 0;
    219     index |= enables & GGL_ENABLE_FOG ? 0x4 : 0;
    220     c->lighting.lightTriangle = lightPrimitive[index];
    221 
    222     // set up the primitive renderers
    223     if (ggl_likely(c->arrays.vertex.enable)) {
    224         c->prims.renderPoint    = primitive_point;
    225         c->prims.renderLine     = primitive_line;
    226         c->prims.renderTriangle = primitive_clip_triangle;
    227     } else {
    228         c->prims.renderPoint    = primitive_nop_point;
    229         c->prims.renderLine     = primitive_nop_line;
    230         c->prims.renderTriangle = primitive_nop_triangle;
    231     }
    232 }
    233 
    234 // ----------------------------------------------------------------------------
    235 
    236 void compute_iterators_t::initTriangle(
    237         vertex_t const* v0, vertex_t const* v1, vertex_t const* v2)
    238 {
    239     m_dx01 = v1->window.x - v0->window.x;
    240     m_dy10 = v0->window.y - v1->window.y;
    241     m_dx20 = v0->window.x - v2->window.x;
    242     m_dy02 = v2->window.y - v0->window.y;
    243     m_area = m_dx01*m_dy02 + (-m_dy10)*m_dx20;
    244     (void)m_reserved; // suppress unused warning
    245 }
    246 
    247 void compute_iterators_t::initLine(
    248         vertex_t const* v0, vertex_t const* v1)
    249 {
    250     m_dx01 = m_dy02 = v1->window.x - v0->window.x;
    251     m_dy10 = m_dx20 = v0->window.y - v1->window.y;
    252     m_area = m_dx01*m_dy02 + (-m_dy10)*m_dx20;
    253 }
    254 
    255 void compute_iterators_t::initLerp(vertex_t const* v0, uint32_t enables)
    256 {
    257     m_x0 = v0->window.x;
    258     m_y0 = v0->window.y;
    259     const GGLcoord area = (m_area + TRI_HALF) >> TRI_FRACTION_BITS;
    260     const GGLcoord minArea = 2; // cannot be inverted
    261     // triangles with an area smaller than 1.0 are not smooth-shaded
    262 
    263     int q=0, s=0, d=0;
    264     if (abs(area) >= minArea) {
    265         // Here we do some voodoo magic, to compute a suitable scale
    266         // factor for deltas/area:
    267 
    268         // First compute the 1/area with full 32-bits precision,
    269         // gglRecipQNormalized returns a number [-0.5, 0.5[ and an exponent.
    270         d = gglRecipQNormalized(area, &q);
    271 
    272         // Then compute the minimum left-shift to not overflow the muls
    273         // below.
    274         s = 32 - gglClz(abs(m_dy02)|abs(m_dy10)|abs(m_dx01)|abs(m_dx20));
    275 
    276         // We'll keep 16-bits of precision for deltas/area. So we need
    277         // to shift everything left an extra 15 bits.
    278         s += 15;
    279 
    280         // make sure all final shifts are not > 32, because gglMulx
    281         // can't handle it.
    282         if (s < q) s = q;
    283         if (s > 32) {
    284             d >>= 32-s;
    285             s = 32;
    286         }
    287     }
    288 
    289     m_dx01 = gglMulx(m_dx01, d, s);
    290     m_dy10 = gglMulx(m_dy10, d, s);
    291     m_dx20 = gglMulx(m_dx20, d, s);
    292     m_dy02 = gglMulx(m_dy02, d, s);
    293     m_area_scale = 32 + q - s;
    294     m_scale = 0;
    295 
    296     if (enables & GGL_ENABLE_TMUS) {
    297         const int A = gglClz(abs(m_dy02)|abs(m_dy10)|abs(m_dx01)|abs(m_dx20));
    298         const int B = gglClz(abs(m_x0)|abs(m_y0));
    299         m_scale = max(0, 32 - (A + 16)) +
    300                   max(0, 32 - (B + TRI_FRACTION_BITS)) + 1;
    301     }
    302 }
    303 
    304 int compute_iterators_t::iteratorsScale(GGLfixed* it,
    305         int32_t c0, int32_t c1, int32_t c2) const
    306 {
    307     int32_t dc01 = c1 - c0;
    308     int32_t dc02 = c2 - c0;
    309     const int A = gglClz(abs(c0));
    310     const int B = gglClz(abs(dc01)|abs(dc02));
    311     const int scale = min(A, B - m_scale) - 2;
    312     if (scale >= 0) {
    313         c0   <<= scale;
    314         dc01 <<= scale;
    315         dc02 <<= scale;
    316     } else {
    317         c0   >>= -scale;
    318         dc01 >>= -scale;
    319         dc02 >>= -scale;
    320     }
    321     const int s = m_area_scale;
    322     int32_t dcdx = gglMulAddx(dc01, m_dy02, gglMulx(dc02, m_dy10, s), s);
    323     int32_t dcdy = gglMulAddx(dc02, m_dx01, gglMulx(dc01, m_dx20, s), s);
    324     int32_t c = c0 - (gglMulAddx(dcdx, m_x0,
    325             gglMulx(dcdy, m_y0, TRI_FRACTION_BITS), TRI_FRACTION_BITS));
    326     it[0] = c;
    327     it[1] = dcdx;
    328     it[2] = dcdy;
    329     return scale;
    330 }
    331 
    332 void compute_iterators_t::iterators1616(GGLfixed* it,
    333         GGLfixed c0, GGLfixed c1, GGLfixed c2) const
    334 {
    335     const GGLfixed dc01 = c1 - c0;
    336     const GGLfixed dc02 = c2 - c0;
    337     // 16.16 x 16.16 == 32.32 --> 16.16
    338     const int s = m_area_scale;
    339     int32_t dcdx = gglMulAddx(dc01, m_dy02, gglMulx(dc02, m_dy10, s), s);
    340     int32_t dcdy = gglMulAddx(dc02, m_dx01, gglMulx(dc01, m_dx20, s), s);
    341     int32_t c = c0 - (gglMulAddx(dcdx, m_x0,
    342             gglMulx(dcdy, m_y0, TRI_FRACTION_BITS), TRI_FRACTION_BITS));
    343     it[0] = c;
    344     it[1] = dcdx;
    345     it[2] = dcdy;
    346 }
    347 
    348 void compute_iterators_t::iterators0032(int64_t* it,
    349         int32_t c0, int32_t c1, int32_t c2) const
    350 {
    351     const int s = m_area_scale - 16;
    352     int32_t dc01 = (c1 - c0)>>s;
    353     int32_t dc02 = (c2 - c0)>>s;
    354     // 16.16 x 16.16 == 32.32
    355     int64_t dcdx = gglMulii(dc01, m_dy02) + gglMulii(dc02, m_dy10);
    356     int64_t dcdy = gglMulii(dc02, m_dx01) + gglMulii(dc01, m_dx20);
    357     it[ 0] = (c0<<16) - ((dcdx*m_x0 + dcdy*m_y0)>>4);
    358     it[ 1] = dcdx;
    359     it[ 2] = dcdy;
    360 }
    361 
    362 #if defined(__arm__) && !defined(__thumb__)
    363 inline void compute_iterators_t::iterators0032(int32_t* it,
    364         int32_t c0, int32_t c1, int32_t c2) const
    365 {
    366     ::iterators0032(this, it, c0, c1, c2);
    367 }
    368 #else
    369 void compute_iterators_t::iterators0032(int32_t* it,
    370         int32_t c0, int32_t c1, int32_t c2) const
    371 {
    372     int64_t it64[3];
    373     iterators0032(it64, c0, c1, c2);
    374     it[0] = it64[0];
    375     it[1] = it64[1];
    376     it[2] = it64[2];
    377 }
    378 #endif
    379 
    380 // ----------------------------------------------------------------------------
    381 
    382 static inline int32_t clampZ(GLfixed z) CONST;
    383 int32_t clampZ(GLfixed z) {
    384     z = (z & ~(z>>31));
    385     if (z >= 0x10000)
    386         z = 0xFFFF;
    387     return z;
    388 }
    389 
    390 static __attribute__((noinline))
    391 void fetch_texcoord_impl(ogles_context_t* c,
    392         vertex_t* v0, vertex_t* v1, vertex_t* v2)
    393 {
    394     vertex_t* const vtx[3] = { v0, v1, v2 };
    395     array_t const * const texcoordArray = c->arrays.texture;
    396 
    397     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
    398         if (!(c->rasterizer.state.texture[i].enable))
    399             continue;
    400 
    401         for (int j=0 ; j<3 ; j++) {
    402             vertex_t* const v = vtx[j];
    403             if (v->flags & vertex_t::TT)
    404                 continue;
    405 
    406             // NOTE: here we could compute automatic texgen
    407             // such as sphere/cube maps, instead of fetching them
    408             // from the textcoord array.
    409 
    410             vec4_t& coords = v->texture[i];
    411             const GLubyte* tp = texcoordArray[i].element(
    412                     v->index & vertex_cache_t::INDEX_MASK);
    413             texcoordArray[i].fetch(c, coords.v, tp);
    414 
    415             // transform texture coordinates...
    416             coords.Q = 0x10000;
    417             const transform_t& tr = c->transforms.texture[i].transform;
    418             if (ggl_unlikely(tr.ops)) {
    419                 c->arrays.tex_transform[i](&tr, &coords, &coords);
    420             }
    421 
    422             // divide by Q
    423             const GGLfixed q = coords.Q;
    424             if (ggl_unlikely(q != 0x10000)) {
    425                 const int32_t qinv = gglRecip28(q);
    426                 coords.S = gglMulx(coords.S, qinv, 28);
    427                 coords.T = gglMulx(coords.T, qinv, 28);
    428             }
    429         }
    430     }
    431     v0->flags |= vertex_t::TT;
    432     v1->flags |= vertex_t::TT;
    433     v2->flags |= vertex_t::TT;
    434 }
    435 
    436 inline void fetch_texcoord(ogles_context_t* c,
    437         vertex_t* v0, vertex_t* v1, vertex_t* v2)
    438 {
    439     const uint32_t enables = c->rasterizer.state.enables;
    440     if (!(enables & GGL_ENABLE_TMUS))
    441         return;
    442 
    443     // Fetch & transform texture coordinates...
    444     if (ggl_likely(v0->flags & v1->flags & v2->flags & vertex_t::TT)) {
    445         // already done for all three vertices, bail...
    446         return;
    447     }
    448     fetch_texcoord_impl(c, v0, v1, v2);
    449 }
    450 
    451 // ----------------------------------------------------------------------------
    452 #if 0
    453 #pragma mark -
    454 #pragma mark Point
    455 #endif
    456 
    457 void primitive_nop_point(ogles_context_t*, vertex_t*) {
    458 }
    459 
    460 void primitive_point(ogles_context_t* c, vertex_t* v)
    461 {
    462     // lighting & clamping...
    463     const uint32_t enables = c->rasterizer.state.enables;
    464 
    465     if (ggl_unlikely(!(v->flags & vertex_t::LIT))) {
    466         if (c->lighting.enable) {
    467             c->lighting.lightVertex(c, v);
    468         } else {
    469             v->flags |= vertex_t::LIT;
    470             const GLvoid* cp = c->arrays.color.element(
    471                     v->index & vertex_cache_t::INDEX_MASK);
    472             c->arrays.color.fetch(c, v->color.v, cp);
    473         }
    474         if (enables & GGL_ENABLE_FOG) {
    475             v->fog = c->fog.fog(c, v->eye.z);
    476         }
    477     }
    478 
    479     // XXX: we don't need to do that each-time
    480     // if color array and lighting not enabled
    481     c->rasterizer.procs.color4xv(c, v->color.v);
    482 
    483     // XXX: look into ES point-sprite extension
    484     if (enables & GGL_ENABLE_TMUS) {
    485         fetch_texcoord(c, v,v,v);
    486         for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
    487             if (!c->rasterizer.state.texture[i].enable)
    488                 continue;
    489             int32_t itt[8];
    490             itt[1] = itt[2] = itt[4] = itt[5] = 0;
    491             itt[6] = itt[7] = 16; // XXX: check that
    492             if (c->rasterizer.state.texture[i].s_wrap == GGL_CLAMP) {
    493                 int width = c->textures.tmu[i].texture->surface.width;
    494                 itt[0] = v->texture[i].S * width;
    495                 itt[6] = 0;
    496             }
    497             if (c->rasterizer.state.texture[i].t_wrap == GGL_CLAMP) {
    498                 int height = c->textures.tmu[i].texture->surface.height;
    499                 itt[3] = v->texture[i].T * height;
    500                 itt[7] = 0;
    501             }
    502             c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
    503         }
    504     }
    505 
    506     if (enables & GGL_ENABLE_DEPTH_TEST) {
    507         int32_t itz[3];
    508         itz[0] = clampZ(v->window.z) * 0x00010001;
    509         itz[1] = itz[2] = 0;
    510         c->rasterizer.procs.zGrad3xv(c, itz);
    511     }
    512 
    513     if (enables & GGL_ENABLE_FOG) {
    514         GLfixed itf[3];
    515         itf[0] = v->fog;
    516         itf[1] = itf[2] = 0;
    517         c->rasterizer.procs.fogGrad3xv(c, itf);
    518     }
    519 
    520     // Render our point...
    521     c->rasterizer.procs.pointx(c, v->window.v, c->point.size);
    522 }
    523 
    524 // ----------------------------------------------------------------------------
    525 #if 0
    526 #pragma mark -
    527 #pragma mark Line
    528 #endif
    529 
    530 void primitive_nop_line(ogles_context_t*, vertex_t*, vertex_t*) {
    531 }
    532 
    533 void primitive_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1)
    534 {
    535     // get texture coordinates
    536     fetch_texcoord(c, v0, v1, v1);
    537 
    538     // light/shade the vertices first (they're copied below)
    539     c->lighting.lightTriangle(c, v0, v1, v1);
    540 
    541     // clip the line if needed
    542     if (ggl_unlikely((v0->flags | v1->flags) & vertex_t::CLIP_ALL)) {
    543         unsigned int count = clip_line(c, v0, v1);
    544         if (ggl_unlikely(count == 0))
    545             return;
    546     }
    547 
    548     // compute iterators...
    549     const uint32_t enables = c->rasterizer.state.enables;
    550     const uint32_t mask =   GGL_ENABLE_TMUS |
    551                             GGL_ENABLE_SMOOTH |
    552                             GGL_ENABLE_W |
    553                             GGL_ENABLE_FOG |
    554                             GGL_ENABLE_DEPTH_TEST;
    555 
    556     if (ggl_unlikely(enables & mask)) {
    557         c->lerp.initLine(v0, v1);
    558         lerp_triangle(c, v0, v1, v0);
    559     }
    560 
    561     // render our line
    562     c->rasterizer.procs.linex(c, v0->window.v, v1->window.v, c->line.width);
    563 }
    564 
    565 // ----------------------------------------------------------------------------
    566 #if 0
    567 #pragma mark -
    568 #pragma mark Triangle
    569 #endif
    570 
    571 void primitive_nop_triangle(ogles_context_t* /*c*/,
    572         vertex_t* /*v0*/, vertex_t* /*v1*/, vertex_t* /*v2*/) {
    573 }
    574 
    575 void primitive_clip_triangle(ogles_context_t* c,
    576         vertex_t* v0, vertex_t* v1, vertex_t* v2)
    577 {
    578     uint32_t cc = (v0->flags | v1->flags | v2->flags) & vertex_t::CLIP_ALL;
    579     if (ggl_likely(!cc)) {
    580         // code below must be as optimized as possible, this is the
    581         // common code path.
    582 
    583         // This triangle is not clipped, test if it's culled
    584         // unclipped triangle...
    585         c->lerp.initTriangle(v0, v1, v2);
    586         if (cull_triangle(c, v0, v1, v2))
    587             return; // culled!
    588 
    589         // Fetch all texture coordinates if needed
    590         fetch_texcoord(c, v0, v1, v2);
    591 
    592         // light (or shade) our triangle!
    593         c->lighting.lightTriangle(c, v0, v1, v2);
    594 
    595         triangle(c, v0, v1, v2);
    596         return;
    597     }
    598 
    599     // The assumption here is that we're not going to clip very often,
    600     // and even more rarely will we clip a triangle that ends up
    601     // being culled out. So it's okay to light the vertices here, even though
    602     // in a few cases we won't render the triangle (if culled).
    603 
    604     // Fetch texture coordinates...
    605     fetch_texcoord(c, v0, v1, v2);
    606 
    607     // light (or shade) our triangle!
    608     c->lighting.lightTriangle(c, v0, v1, v2);
    609 
    610     clip_triangle(c, v0, v1, v2);
    611 }
    612 
    613 // -----------------------------------------------------------------------
    614 
    615 void triangle(ogles_context_t* c,
    616         vertex_t* v0, vertex_t* v1, vertex_t* v2)
    617 {
    618     // compute iterators...
    619     const uint32_t enables = c->rasterizer.state.enables;
    620     const uint32_t mask =   GGL_ENABLE_TMUS |
    621                             GGL_ENABLE_SMOOTH |
    622                             GGL_ENABLE_W |
    623                             GGL_ENABLE_FOG |
    624                             GGL_ENABLE_DEPTH_TEST;
    625 
    626     if (ggl_likely(enables & mask))
    627         lerp_triangle(c, v0, v1, v2);
    628 
    629     c->rasterizer.procs.trianglex(c, v0->window.v, v1->window.v, v2->window.v);
    630 }
    631 
    632 void lerp_triangle(ogles_context_t* c,
    633         vertex_t* v0, vertex_t* v1, vertex_t* v2)
    634 {
    635     const uint32_t enables = c->rasterizer.state.enables;
    636     c->lerp.initLerp(v0, enables);
    637 
    638     // set up texture iterators
    639     if (enables & GGL_ENABLE_TMUS) {
    640         if (enables & GGL_ENABLE_W) {
    641             lerp_texcoords_w(c, v0, v1, v2);
    642         } else {
    643             lerp_texcoords(c, v0, v1, v2);
    644         }
    645     }
    646 
    647     // set up the color iterators
    648     const compute_iterators_t& lerp = c->lerp;
    649     if (enables & GGL_ENABLE_SMOOTH) {
    650         GLfixed itc[12];
    651         for (int i=0 ; i<4 ; i++) {
    652             const GGLcolor c0 = v0->color.v[i] * 255;
    653             const GGLcolor c1 = v1->color.v[i] * 255;
    654             const GGLcolor c2 = v2->color.v[i] * 255;
    655             lerp.iterators1616(&itc[i*3], c0, c1, c2);
    656         }
    657         c->rasterizer.procs.colorGrad12xv(c, itc);
    658     }
    659 
    660     if (enables & GGL_ENABLE_DEPTH_TEST) {
    661         int32_t itz[3];
    662         const int32_t v0z = clampZ(v0->window.z);
    663         const int32_t v1z = clampZ(v1->window.z);
    664         const int32_t v2z = clampZ(v2->window.z);
    665         if (ggl_unlikely(c->polygonOffset.enable)) {
    666             const int32_t units = (c->polygonOffset.units << 16);
    667             const GLfixed factor = c->polygonOffset.factor;
    668             if (factor) {
    669                 int64_t itz64[3];
    670                 lerp.iterators0032(itz64, v0z, v1z, v2z);
    671                 int64_t maxDepthSlope = max(itz64[1], itz64[2]);
    672                 itz[0] = uint32_t(itz64[0])
    673                         + uint32_t((maxDepthSlope*factor)>>16) + units;
    674                 itz[1] = uint32_t(itz64[1]);
    675                 itz[2] = uint32_t(itz64[2]);
    676             } else {
    677                 lerp.iterators0032(itz, v0z, v1z, v2z);
    678                 itz[0] += units;
    679             }
    680         } else {
    681             lerp.iterators0032(itz, v0z, v1z, v2z);
    682         }
    683         c->rasterizer.procs.zGrad3xv(c, itz);
    684     }
    685 
    686     if (ggl_unlikely(enables & GGL_ENABLE_FOG)) {
    687         GLfixed itf[3];
    688         lerp.iterators1616(itf, v0->fog, v1->fog, v2->fog);
    689         c->rasterizer.procs.fogGrad3xv(c, itf);
    690     }
    691 }
    692 
    693 
    694 static inline
    695 int compute_lod(ogles_context_t* c, int i,
    696         int32_t s0, int32_t t0, int32_t s1, int32_t t1, int32_t s2, int32_t t2)
    697 {
    698     // Compute mipmap level / primitive
    699     // rho = sqrt( texelArea / area )
    700     // lod = log2( rho )
    701     // lod = log2( texelArea / area ) / 2
    702     // lod = (log2( texelArea ) - log2( area )) / 2
    703     const compute_iterators_t& lerp = c->lerp;
    704     const GGLcoord area = abs(lerp.area());
    705     const int w = c->textures.tmu[i].texture->surface.width;
    706     const int h = c->textures.tmu[i].texture->surface.height;
    707     const int shift = 16 + (16 - TRI_FRACTION_BITS);
    708     int32_t texelArea = abs( gglMulx(s1-s0, t2-t0, shift) -
    709             gglMulx(s2-s0, t1-t0, shift) )*w*h;
    710     int log2TArea = (32-TRI_FRACTION_BITS  -1) - gglClz(texelArea);
    711     int log2Area  = (32-TRI_FRACTION_BITS*2-1) - gglClz(area);
    712     int lod = (log2TArea - log2Area + 1) >> 1;
    713     return lod;
    714 }
    715 
    716 void lerp_texcoords(ogles_context_t* c,
    717         vertex_t* v0, vertex_t* v1, vertex_t* v2)
    718 {
    719     const compute_iterators_t& lerp = c->lerp;
    720     int32_t itt[8] __attribute__((aligned(16)));
    721     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
    722         const texture_t& tmu = c->rasterizer.state.texture[i];
    723         if (!tmu.enable)
    724             continue;
    725 
    726         // compute the jacobians using block floating-point
    727         int32_t s0 = v0->texture[i].S;
    728         int32_t t0 = v0->texture[i].T;
    729         int32_t s1 = v1->texture[i].S;
    730         int32_t t1 = v1->texture[i].T;
    731         int32_t s2 = v2->texture[i].S;
    732         int32_t t2 = v2->texture[i].T;
    733 
    734         const GLenum min_filter = c->textures.tmu[i].texture->min_filter;
    735         if (ggl_unlikely(min_filter >= GL_NEAREST_MIPMAP_NEAREST)) {
    736             int lod = compute_lod(c, i, s0, t0, s1, t1, s2, t2);
    737             c->rasterizer.procs.bindTextureLod(c, i,
    738                     &c->textures.tmu[i].texture->mip(lod));
    739         }
    740 
    741         // premultiply (s,t) when clampling
    742         if (tmu.s_wrap == GGL_CLAMP) {
    743             const int width = tmu.surface.width;
    744             s0 *= width;
    745             s1 *= width;
    746             s2 *= width;
    747         }
    748         if (tmu.t_wrap == GGL_CLAMP) {
    749             const int height = tmu.surface.height;
    750             t0 *= height;
    751             t1 *= height;
    752             t2 *= height;
    753         }
    754         itt[6] = -lerp.iteratorsScale(itt+0, s0, s1, s2);
    755         itt[7] = -lerp.iteratorsScale(itt+3, t0, t1, t2);
    756         c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
    757     }
    758 }
    759 
    760 void lerp_texcoords_w(ogles_context_t* c,
    761         vertex_t* v0, vertex_t* v1, vertex_t* v2)
    762 {
    763     const compute_iterators_t& lerp = c->lerp;
    764     int32_t itt[8] __attribute__((aligned(16)));
    765     int32_t itw[3];
    766 
    767     // compute W's scale to 2.30
    768     int32_t w0 = v0->window.w;
    769     int32_t w1 = v1->window.w;
    770     int32_t w2 = v2->window.w;
    771     int wscale = 32 - gglClz(w0|w1|w2);
    772 
    773     // compute the jacobian using block floating-point
    774     int sc = lerp.iteratorsScale(itw, w0, w1, w2);
    775     sc +=  wscale - 16;
    776     c->rasterizer.procs.wGrad3xv(c, itw);
    777 
    778     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
    779         const texture_t& tmu = c->rasterizer.state.texture[i];
    780         if (!tmu.enable)
    781             continue;
    782 
    783         // compute the jacobians using block floating-point
    784         int32_t s0 = v0->texture[i].S;
    785         int32_t t0 = v0->texture[i].T;
    786         int32_t s1 = v1->texture[i].S;
    787         int32_t t1 = v1->texture[i].T;
    788         int32_t s2 = v2->texture[i].S;
    789         int32_t t2 = v2->texture[i].T;
    790 
    791         const GLenum min_filter = c->textures.tmu[i].texture->min_filter;
    792         if (ggl_unlikely(min_filter >= GL_NEAREST_MIPMAP_NEAREST)) {
    793             int lod = compute_lod(c, i, s0, t0, s1, t1, s2, t2);
    794             c->rasterizer.procs.bindTextureLod(c, i,
    795                     &c->textures.tmu[i].texture->mip(lod));
    796         }
    797 
    798         // premultiply (s,t) when clampling
    799         if (tmu.s_wrap == GGL_CLAMP) {
    800             const int width = tmu.surface.width;
    801             s0 *= width;
    802             s1 *= width;
    803             s2 *= width;
    804         }
    805         if (tmu.t_wrap == GGL_CLAMP) {
    806             const int height = tmu.surface.height;
    807             t0 *= height;
    808             t1 *= height;
    809             t2 *= height;
    810         }
    811 
    812         s0 = gglMulx(s0, w0, wscale);
    813         t0 = gglMulx(t0, w0, wscale);
    814         s1 = gglMulx(s1, w1, wscale);
    815         t1 = gglMulx(t1, w1, wscale);
    816         s2 = gglMulx(s2, w2, wscale);
    817         t2 = gglMulx(t2, w2, wscale);
    818 
    819         itt[6] = sc - lerp.iteratorsScale(itt+0, s0, s1, s2);
    820         itt[7] = sc - lerp.iteratorsScale(itt+3, t0, t1, t2);
    821         c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
    822     }
    823 }
    824 
    825 
    826 static inline
    827 bool cull_triangle(ogles_context_t* c, vertex_t* /*v0*/, vertex_t* /*v1*/, vertex_t* /*v2*/)
    828 {
    829     if (ggl_likely(c->cull.enable)) {
    830         const GLenum winding = (c->lerp.area() > 0) ? GL_CW : GL_CCW;
    831         const GLenum face = (winding == c->cull.frontFace) ? GL_FRONT : GL_BACK;
    832         if (face == c->cull.cullFace)
    833             return true; // culled!
    834     }
    835     return false;
    836 }
    837 
    838 static inline
    839 GLfixed frustumPlaneDist(int plane, const vec4_t& s)
    840 {
    841     const GLfixed d = s.v[ plane >> 1 ];
    842     return  ((plane & 1) ? (s.w - d) : (s.w + d));
    843 }
    844 
    845 static inline
    846 int32_t clipDivide(GLfixed a, GLfixed b) {
    847     // returns a 4.28 fixed-point
    848     return gglMulDivi(1LU<<28, a, b);
    849 }
    850 
    851 void clip_triangle(ogles_context_t* c,
    852         vertex_t* v0, vertex_t* v1, vertex_t* v2)
    853 {
    854     uint32_t all_cc = (v0->flags | v1->flags | v2->flags) & vertex_t::CLIP_ALL;
    855 
    856     vertex_t *p0, *p1, *p2;
    857     const int MAX_CLIPPING_PLANES = 6 + OGLES_MAX_CLIP_PLANES;
    858     const int MAX_VERTICES = 3;
    859 
    860     // Temporary buffer to hold the new vertices. Each plane can add up to
    861     // two new vertices (because the polygon is convex).
    862     // We need one extra element, to handle an overflow case when
    863     // the polygon degenerates into something non convex.
    864     vertex_t buffer[MAX_CLIPPING_PLANES * 2 + 1];   // ~3KB
    865     vertex_t* buf = buffer;
    866 
    867     // original list of vertices (polygon to clip, in fact this
    868     // function works with an arbitrary polygon).
    869     vertex_t* in[3] = { v0, v1, v2 };
    870 
    871     // output lists (we need 2, which we use back and forth)
    872     // (maximum outpout list's size is MAX_CLIPPING_PLANES + MAX_VERTICES)
    873     // 2 more elements for overflow when non convex polygons.
    874     vertex_t* out[2][MAX_CLIPPING_PLANES + MAX_VERTICES + 2];
    875     unsigned int outi = 0;
    876 
    877     // current input list
    878     vertex_t** ivl = in;
    879 
    880     // 3 input vertices, 0 in the output list, first plane
    881     unsigned int ic = 3;
    882 
    883     // User clip-planes first, the clipping is always done in eye-coordinate
    884     // this is basically the same algorithm than for the view-volume
    885     // clipping, except for the computation of the distance (vertex, plane)
    886     // and the fact that we need to compute the eye-coordinates of each
    887     // new vertex we create.
    888 
    889     if (ggl_unlikely(all_cc & vertex_t::USER_CLIP_ALL))
    890     {
    891         unsigned int plane = 0;
    892         uint32_t cc = (all_cc & vertex_t::USER_CLIP_ALL) >> 8;
    893         do {
    894             if (cc & 1) {
    895                 // pointers to our output list (head and current)
    896                 vertex_t** const ovl = &out[outi][0];
    897                 vertex_t** output = ovl;
    898                 unsigned int oc = 0;
    899                 unsigned int sentinel = 0;
    900                 // previous vertex, compute distance to the plane
    901                 vertex_t* s = ivl[ic-1];
    902                 const vec4_t& equation = c->clipPlanes.plane[plane].equation;
    903                 GLfixed sd = dot4(equation.v, s->eye.v);
    904                 // clip each vertex against this plane...
    905                 for (unsigned int i=0 ; i<ic ; i++) {
    906                     vertex_t* p = ivl[i];
    907                     const GLfixed pd = dot4(equation.v, p->eye.v);
    908                     if (sd >= 0) {
    909                         if (pd >= 0) {
    910                             // both inside
    911                             *output++ = p;
    912                             oc++;
    913                         } else {
    914                             // s inside, p outside (exiting)
    915                             const GLfixed t = clipDivide(sd, sd-pd);
    916                             c->arrays.clipEye(c, buf, t, p, s);
    917                             *output++ = buf++;
    918                             oc++;
    919                             if (++sentinel >= 3)
    920                                 return; // non-convex polygon!
    921                         }
    922                     } else {
    923                         if (pd >= 0) {
    924                             // s outside (entering)
    925                             if (pd) {
    926                                 const GLfixed t = clipDivide(pd, pd-sd);
    927                                 c->arrays.clipEye(c, buf, t, s, p);
    928                                 *output++ = buf++;
    929                                 oc++;
    930                                 if (++sentinel >= 3)
    931                                     return; // non-convex polygon!
    932                             }
    933                             *output++ = p;
    934                             oc++;
    935                         } else {
    936                            // both outside
    937                         }
    938                     }
    939                     s = p;
    940                     sd = pd;
    941                 }
    942                 // output list become the new input list
    943                 if (oc<3)
    944                     return; // less than 3 vertices left? we're done!
    945                 ivl = ovl;
    946                 ic = oc;
    947                 outi = 1-outi;
    948             }
    949             cc >>= 1;
    950             plane++;
    951         } while (cc);
    952     }
    953 
    954     // frustum clip-planes
    955     if (all_cc & vertex_t::FRUSTUM_CLIP_ALL)
    956     {
    957         unsigned int plane = 0;
    958         uint32_t cc = all_cc & vertex_t::FRUSTUM_CLIP_ALL;
    959         do {
    960             if (cc & 1) {
    961                 // pointers to our output list (head and current)
    962                 vertex_t** const ovl = &out[outi][0];
    963                 vertex_t** output = ovl;
    964                 unsigned int oc = 0;
    965                 unsigned int sentinel = 0;
    966                 // previous vertex, compute distance to the plane
    967                 vertex_t* s = ivl[ic-1];
    968                 GLfixed sd = frustumPlaneDist(plane, s->clip);
    969                 // clip each vertex against this plane...
    970                 for (unsigned int i=0 ; i<ic ; i++) {
    971                     vertex_t* p = ivl[i];
    972                     const GLfixed pd = frustumPlaneDist(plane, p->clip);
    973                     if (sd >= 0) {
    974                         if (pd >= 0) {
    975                             // both inside
    976                             *output++ = p;
    977                             oc++;
    978                         } else {
    979                             // s inside, p outside (exiting)
    980                             const GLfixed t = clipDivide(sd, sd-pd);
    981                             c->arrays.clipVertex(c, buf, t, p, s);
    982                             *output++ = buf++;
    983                             oc++;
    984                             if (++sentinel >= 3)
    985                                 return; // non-convex polygon!
    986                         }
    987                     } else {
    988                         if (pd >= 0) {
    989                             // s outside (entering)
    990                             if (pd) {
    991                                 const GLfixed t = clipDivide(pd, pd-sd);
    992                                 c->arrays.clipVertex(c, buf, t, s, p);
    993                                 *output++ = buf++;
    994                                 oc++;
    995                                 if (++sentinel >= 3)
    996                                     return; // non-convex polygon!
    997                             }
    998                             *output++ = p;
    999                             oc++;
   1000                         } else {
   1001                            // both outside
   1002                         }
   1003                     }
   1004                     s = p;
   1005                     sd = pd;
   1006                 }
   1007                 // output list become the new input list
   1008                 if (oc<3)
   1009                     return; // less than 3 vertices left? we're done!
   1010                 ivl = ovl;
   1011                 ic = oc;
   1012                 outi = 1-outi;
   1013             }
   1014             cc >>= 1;
   1015             plane++;
   1016         } while (cc);
   1017     }
   1018 
   1019     // finally we can render our triangles...
   1020     p0 = ivl[0];
   1021     p1 = ivl[1];
   1022     for (unsigned int i=2 ; i<ic ; i++) {
   1023         p2 = ivl[i];
   1024         c->lerp.initTriangle(p0, p1, p2);
   1025         if (cull_triangle(c, p0, p1, p2)) {
   1026             p1 = p2;
   1027             continue; // culled!
   1028         }
   1029         triangle(c, p0, p1, p2);
   1030         p1 = p2;
   1031     }
   1032 }
   1033 
   1034 unsigned int clip_line(ogles_context_t* c, vertex_t* s, vertex_t* p)
   1035 {
   1036     const uint32_t all_cc = (s->flags | p->flags) & vertex_t::CLIP_ALL;
   1037 
   1038     if (ggl_unlikely(all_cc & vertex_t::USER_CLIP_ALL))
   1039     {
   1040         unsigned int plane = 0;
   1041         uint32_t cc = (all_cc & vertex_t::USER_CLIP_ALL) >> 8;
   1042         do {
   1043             if (cc & 1) {
   1044                 const vec4_t& equation = c->clipPlanes.plane[plane].equation;
   1045                 const GLfixed sd = dot4(equation.v, s->eye.v);
   1046                 const GLfixed pd = dot4(equation.v, p->eye.v);
   1047                 if (sd >= 0) {
   1048                     if (pd >= 0) {
   1049                         // both inside
   1050                     } else {
   1051                         // s inside, p outside (exiting)
   1052                         const GLfixed t = clipDivide(sd, sd-pd);
   1053                         c->arrays.clipEye(c, p, t, p, s);
   1054                     }
   1055                 } else {
   1056                     if (pd >= 0) {
   1057                         // s outside (entering)
   1058                         if (pd) {
   1059                             const GLfixed t = clipDivide(pd, pd-sd);
   1060                             c->arrays.clipEye(c, s, t, s, p);
   1061                         }
   1062                     } else {
   1063                        // both outside
   1064                        return 0;
   1065                     }
   1066                 }
   1067             }
   1068             cc >>= 1;
   1069             plane++;
   1070         } while (cc);
   1071     }
   1072 
   1073     // frustum clip-planes
   1074     if (all_cc & vertex_t::FRUSTUM_CLIP_ALL)
   1075     {
   1076         unsigned int plane = 0;
   1077         uint32_t cc = all_cc & vertex_t::FRUSTUM_CLIP_ALL;
   1078         do {
   1079             if (cc & 1) {
   1080                 const GLfixed sd = frustumPlaneDist(plane, s->clip);
   1081                 const GLfixed pd = frustumPlaneDist(plane, p->clip);
   1082                 if (sd >= 0) {
   1083                     if (pd >= 0) {
   1084                         // both inside
   1085                     } else {
   1086                         // s inside, p outside (exiting)
   1087                         const GLfixed t = clipDivide(sd, sd-pd);
   1088                         c->arrays.clipVertex(c, p, t, p, s);
   1089                     }
   1090                 } else {
   1091                     if (pd >= 0) {
   1092                         // s outside (entering)
   1093                         if (pd) {
   1094                             const GLfixed t = clipDivide(pd, pd-sd);
   1095                             c->arrays.clipVertex(c, s, t, s, p);
   1096                         }
   1097                     } else {
   1098                        // both outside
   1099                        return 0;
   1100                     }
   1101                 }
   1102             }
   1103             cc >>= 1;
   1104             plane++;
   1105         } while (cc);
   1106     }
   1107 
   1108     return 2;
   1109 }
   1110 
   1111 
   1112 }; // namespace android
   1113