Home | History | Annotate | Download | only in tnl
      1 /*
      2  * Mesa 3-D graphics library
      3  *
      4  * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included
     14  * in all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22  * OTHER DEALINGS IN THE SOFTWARE.
     23  *
     24  * Authors:
     25  *    Keith Whitwell <keithw (at) vmware.com>
     26  */
     27 
     28 #include "main/glheader.h"
     29 #include "main/context.h"
     30 #include "main/imports.h"
     31 #include "main/mtypes.h"
     32 
     33 #include "t_context.h"
     34 #include "t_pipeline.h"
     35 #include "t_vp_build.h"
     36 #include "t_vertex.h"
     37 
     38 void _tnl_install_pipeline( struct gl_context *ctx,
     39 			    const struct tnl_pipeline_stage **stages )
     40 {
     41    TNLcontext *tnl = TNL_CONTEXT(ctx);
     42    GLuint i;
     43 
     44    tnl->pipeline.new_state = ~0;
     45 
     46    /* Create a writeable copy of each stage.
     47     */
     48    for (i = 0 ; i < MAX_PIPELINE_STAGES && stages[i] ; i++) {
     49       struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
     50       memcpy(s, stages[i], sizeof(*s));
     51       if (s->create)
     52 	 s->create(ctx, s);
     53    }
     54 
     55    tnl->pipeline.nr_stages = i;
     56 }
     57 
     58 void _tnl_destroy_pipeline( struct gl_context *ctx )
     59 {
     60    TNLcontext *tnl = TNL_CONTEXT(ctx);
     61    GLuint i;
     62 
     63    for (i = 0 ; i < tnl->pipeline.nr_stages ; i++) {
     64       struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
     65       if (s->destroy)
     66 	 s->destroy(s);
     67    }
     68 
     69    tnl->pipeline.nr_stages = 0;
     70 }
     71 
     72 
     73 
     74 static GLuint check_input_changes( struct gl_context *ctx )
     75 {
     76    TNLcontext *tnl = TNL_CONTEXT(ctx);
     77    GLuint i;
     78 
     79    for (i = 0; i <= _TNL_LAST_MAT; i++) {
     80       if (tnl->vb.AttribPtr[i]->size != tnl->pipeline.last_attrib_size[i] ||
     81 	  tnl->vb.AttribPtr[i]->stride != tnl->pipeline.last_attrib_stride[i]) {
     82 	 tnl->pipeline.last_attrib_size[i] = tnl->vb.AttribPtr[i]->size;
     83 	 tnl->pipeline.last_attrib_stride[i] = tnl->vb.AttribPtr[i]->stride;
     84 	 tnl->pipeline.input_changes |= 1<<i;
     85       }
     86    }
     87 
     88    return tnl->pipeline.input_changes;
     89 }
     90 
     91 
     92 static GLuint check_output_changes( struct gl_context *ctx )
     93 {
     94 #if 0
     95    TNLcontext *tnl = TNL_CONTEXT(ctx);
     96 
     97    for (i = 0; i < VARYING_SLOT_MAX; i++) {
     98       if (tnl->vb.ResultPtr[i]->size != tnl->last_result_size[i] ||
     99 	  tnl->vb.ResultPtr[i]->stride != tnl->last_result_stride[i]) {
    100 	 tnl->last_result_size[i] = tnl->vb.ResultPtr[i]->size;
    101 	 tnl->last_result_stride[i] = tnl->vb.ResultPtr[i]->stride;
    102 	 tnl->pipeline.output_changes |= 1<<i;
    103       }
    104    }
    105 
    106    if (tnl->pipeline.output_changes)
    107       tnl->Driver.NotifyOutputChanges( ctx, tnl->pipeline.output_changes );
    108 
    109    return tnl->pipeline.output_changes;
    110 #else
    111    return ~0;
    112 #endif
    113 }
    114 
    115 /**
    116  * START/END_FAST_MATH macros:
    117  *
    118  * START_FAST_MATH: Set x86 FPU to faster, 32-bit precision mode (and save
    119  *                  original mode to a temporary).
    120  * END_FAST_MATH: Restore x86 FPU to original mode.
    121  */
    122 #if defined(__GNUC__) && defined(__i386__)
    123 /*
    124  * Set the x86 FPU control word to guarentee only 32 bits of precision
    125  * are stored in registers.  Allowing the FPU to store more introduces
    126  * differences between situations where numbers are pulled out of memory
    127  * vs. situations where the compiler is able to optimize register usage.
    128  *
    129  * In the worst case, we force the compiler to use a memory access to
    130  * truncate the float, by specifying the 'volatile' keyword.
    131  */
    132 /* Hardware default: All exceptions masked, extended double precision,
    133  * round to nearest (IEEE compliant):
    134  */
    135 #define DEFAULT_X86_FPU		0x037f
    136 /* All exceptions masked, single precision, round to nearest:
    137  */
    138 #define FAST_X86_FPU		0x003f
    139 /* The fldcw instruction will cause any pending FP exceptions to be
    140  * raised prior to entering the block, and we clear any pending
    141  * exceptions before exiting the block.  Hence, asm code has free
    142  * reign over the FPU while in the fast math block.
    143  */
    144 #if defined(NO_FAST_MATH)
    145 #define START_FAST_MATH(x)						\
    146 do {									\
    147    static GLuint mask = DEFAULT_X86_FPU;				\
    148    __asm__ ( "fnstcw %0" : "=m" (*&(x)) );				\
    149    __asm__ ( "fldcw %0" : : "m" (mask) );				\
    150 } while (0)
    151 #else
    152 #define START_FAST_MATH(x)						\
    153 do {									\
    154    static GLuint mask = FAST_X86_FPU;					\
    155    __asm__ ( "fnstcw %0" : "=m" (*&(x)) );				\
    156    __asm__ ( "fldcw %0" : : "m" (mask) );				\
    157 } while (0)
    158 #endif
    159 /* Restore original FPU mode, and clear any exceptions that may have
    160  * occurred in the FAST_MATH block.
    161  */
    162 #define END_FAST_MATH(x)						\
    163 do {									\
    164    __asm__ ( "fnclex ; fldcw %0" : : "m" (*&(x)) );			\
    165 } while (0)
    166 
    167 #elif defined(_MSC_VER) && defined(_M_IX86)
    168 #define DEFAULT_X86_FPU		0x037f /* See GCC comments above */
    169 #define FAST_X86_FPU		0x003f /* See GCC comments above */
    170 #if defined(NO_FAST_MATH)
    171 #define START_FAST_MATH(x) do {\
    172 	static GLuint mask = DEFAULT_X86_FPU;\
    173 	__asm fnstcw word ptr [x]\
    174 	__asm fldcw word ptr [mask]\
    175 } while(0)
    176 #else
    177 #define START_FAST_MATH(x) do {\
    178 	static GLuint mask = FAST_X86_FPU;\
    179 	__asm fnstcw word ptr [x]\
    180 	__asm fldcw word ptr [mask]\
    181 } while(0)
    182 #endif
    183 #define END_FAST_MATH(x) do {\
    184 	__asm fnclex\
    185 	__asm fldcw word ptr [x]\
    186 } while(0)
    187 
    188 #else
    189 #define START_FAST_MATH(x)  x = 0
    190 #define END_FAST_MATH(x)  (void)(x)
    191 #endif
    192 
    193 
    194 void _tnl_run_pipeline( struct gl_context *ctx )
    195 {
    196    TNLcontext *tnl = TNL_CONTEXT(ctx);
    197    unsigned short __tmp;
    198    GLuint i;
    199 
    200    if (!tnl->vb.Count)
    201       return;
    202 
    203    /* Check for changed input sizes or change in stride to/from zero
    204     * (ie const or non-const).
    205     */
    206    if (check_input_changes( ctx ) || tnl->pipeline.new_state) {
    207       if (ctx->VertexProgram._MaintainTnlProgram)
    208 	 _tnl_UpdateFixedFunctionProgram( ctx );
    209 
    210       for (i = 0; i < tnl->pipeline.nr_stages ; i++) {
    211 	 struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
    212 	 if (s->validate)
    213 	    s->validate( ctx, s );
    214       }
    215 
    216       tnl->pipeline.new_state = 0;
    217       tnl->pipeline.input_changes = 0;
    218 
    219       /* Pipeline can only change its output in response to either a
    220        * statechange or an input size/stride change.  No other changes
    221        * are allowed.
    222        */
    223       if (check_output_changes( ctx ))
    224 	 _tnl_notify_pipeline_output_change( ctx );
    225    }
    226 
    227 #ifndef _OPENMP
    228    /* Don't adjust FPU precision mode in case multiple threads are to be used.
    229     * This would require that the additional threads also changed the FPU mode
    230     * which is quite a mess as this had to be done in all parallelized sections;
    231     * otherwise the master thread and all other threads are running in different
    232     * modes, producing inconsistent results.
    233     * Note that all x64 implementations don't define/use START_FAST_MATH, so
    234     * this is "hack" is only used in i386 mode
    235     */
    236    START_FAST_MATH(__tmp);
    237 #endif
    238 
    239    for (i = 0; i < tnl->pipeline.nr_stages ; i++) {
    240       struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
    241       if (!s->run( ctx, s ))
    242 	 break;
    243    }
    244 
    245 #ifndef _OPENMP
    246    END_FAST_MATH(__tmp);
    247 #endif
    248 }
    249 
    250 
    251 
    252 /* The default pipeline.  This is useful for software rasterizers, and
    253  * simple hardware rasterizers.  For customization, I don't recommend
    254  * tampering with the internals of these stages in the way that
    255  * drivers did in Mesa 3.4.  These stages are basically black boxes,
    256  * and should be left intact.
    257  *
    258  * To customize the pipeline, consider:
    259  *
    260  * - removing redundant stages (making sure that the software rasterizer
    261  *   can cope with this on fallback paths).  An example is fog
    262  *   coordinate generation, which is not required in the FX driver.
    263  *
    264  * - replacing general-purpose machine-independent stages with
    265  *   general-purpose machine-specific stages.  There is no example of
    266  *   this to date, though it must be borne in mind that all subsequent
    267  *   stages that reference the output of the new stage must cope with
    268  *   any machine-specific data introduced.  This may not be easy
    269  *   unless there are no such stages (ie the new stage is the last in
    270  *   the pipe).
    271  *
    272  * - inserting optimized (but specialized) stages ahead of the
    273  *   general-purpose fallback implementation.  For example, the old
    274  *   fastpath mechanism, which only works when the VB->Elts input is
    275  *   available, can be duplicated by placing the fastpath stage at the
    276  *   head of this pipeline.  Such specialized stages are currently
    277  *   constrained to have no outputs (ie. they must either finish the *
    278  *   pipeline by returning GL_FALSE from run(), or do nothing).
    279  *
    280  * Some work can be done to lift some of the restrictions in the final
    281  * case, if it becomes necessary to do so.
    282  */
    283 const struct tnl_pipeline_stage *_tnl_default_pipeline[] = {
    284    &_tnl_vertex_transform_stage,
    285    &_tnl_normal_transform_stage,
    286    &_tnl_lighting_stage,
    287    &_tnl_texgen_stage,
    288    &_tnl_texture_transform_stage,
    289    &_tnl_point_attenuation_stage,
    290    &_tnl_vertex_program_stage,
    291    &_tnl_fog_coordinate_stage,
    292    &_tnl_render_stage,
    293    NULL
    294 };
    295 
    296 const struct tnl_pipeline_stage *_tnl_vp_pipeline[] = {
    297    &_tnl_vertex_program_stage,
    298    &_tnl_render_stage,
    299    NULL
    300 };
    301