Home | History | Annotate | Download | only in math
      1 /*
      2  * Mesa 3-D graphics library
      3  *
      4  * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included
     14  * in all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22  * OTHER DEALINGS IN THE SOFTWARE.
     23  *
     24  * Authors:
     25  *    Gareth Hughes
     26  */
     27 
     28 #ifndef __M_DEBUG_UTIL_H__
     29 #define __M_DEBUG_UTIL_H__
     30 
     31 
     32 #ifdef DEBUG_MATH  /* This code only used for debugging */
     33 
     34 
     35 #include "c99_math.h"
     36 
     37 
     38 /* Comment this out to deactivate the cycle counter.
     39  * NOTE: it works only on CPUs which know the 'rdtsc' command (586 or higher)
     40  * (hope, you don't try to debug Mesa on a 386 ;)
     41  */
     42 #if defined(__GNUC__) && \
     43     ((defined(__i386__) && defined(USE_X86_ASM)) || \
     44      (defined(__sparc__) && defined(USE_SPARC_ASM)))
     45 #define  RUN_DEBUG_BENCHMARK
     46 #endif
     47 
     48 #define TEST_COUNT		128	/* size of the tested vector array   */
     49 
     50 #define REQUIRED_PRECISION	10	/* allow 4 bits to miss              */
     51 #define MAX_PRECISION		24	/* max. precision possible           */
     52 
     53 
     54 #ifdef  RUN_DEBUG_BENCHMARK
     55 /* Overhead of profiling counter in cycles.  Automatically adjusted to
     56  * your machine at run time - counter initialization should give very
     57  * consistent results.
     58  */
     59 extern long counter_overhead;
     60 
     61 /* This is the value of the environment variable MESA_PROFILE, and is
     62  * used to determine if we should benchmark the functions as well as
     63  * verify their correctness.
     64  */
     65 extern char *mesa_profile;
     66 
     67 /* Modify the number of tests if you like.
     68  * We take the minimum of all results, because every error should be
     69  * positive (time used by other processes, task switches etc).
     70  * It is assumed that all calculations are done in the cache.
     71  */
     72 
     73 #if defined(__i386__)
     74 
     75 #if 1 /* PPro, PII, PIII version */
     76 
     77 /* Profiling on the P6 architecture requires a little more work, due to
     78  * the internal out-of-order execution.  We must perform a serializing
     79  * 'cpuid' instruction before and after the 'rdtsc' instructions to make
     80  * sure no other uops are executed when we sample the timestamp counter.
     81  */
     82 #define  INIT_COUNTER()							\
     83    do {									\
     84       int cycle_i;							\
     85       counter_overhead = LONG_MAX;					\
     86       for ( cycle_i = 0 ; cycle_i < 8 ; cycle_i++ ) {			\
     87 	 long cycle_tmp1 = 0, cycle_tmp2 = 0;				\
     88 	 __asm__ __volatile__ ( "push %%ebx       \n"			\
     89 				"xor %%eax, %%eax \n"			\
     90 				"cpuid            \n"			\
     91 				"rdtsc            \n"			\
     92 				"mov %%eax, %0    \n"			\
     93 				"xor %%eax, %%eax \n"			\
     94 				"cpuid            \n"			\
     95 				"pop %%ebx        \n"			\
     96 				"push %%ebx       \n"			\
     97 				"xor %%eax, %%eax \n"			\
     98 				"cpuid            \n"			\
     99 				"rdtsc            \n"			\
    100 				"mov %%eax, %1    \n"			\
    101 				"xor %%eax, %%eax \n"			\
    102 				"cpuid            \n"			\
    103 				"pop %%ebx        \n"			\
    104 				: "=m" (cycle_tmp1), "=m" (cycle_tmp2)	\
    105 				: : "eax", "ecx", "edx" );		\
    106 	 if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) {		\
    107 	    counter_overhead = cycle_tmp2 - cycle_tmp1;			\
    108 	 }								\
    109       }									\
    110    } while (0)
    111 
    112 #define  BEGIN_RACE(x)							\
    113    x = LONG_MAX;							\
    114    for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) {			\
    115       long cycle_tmp1 = 0, cycle_tmp2 = 0;				\
    116       __asm__ __volatile__ ( "push %%ebx       \n"			\
    117 			     "xor %%eax, %%eax \n"			\
    118 			     "cpuid            \n"			\
    119 			     "rdtsc            \n"			\
    120 			     "mov %%eax, %0    \n"			\
    121 			     "xor %%eax, %%eax \n"			\
    122 			     "cpuid            \n"			\
    123 			     "pop %%ebx        \n"			\
    124 			     : "=m" (cycle_tmp1)			\
    125 			     : : "eax", "ecx", "edx" );
    126 
    127 #define END_RACE(x)							\
    128       __asm__ __volatile__ ( "push %%ebx       \n"			\
    129 			     "xor %%eax, %%eax \n"			\
    130 			     "cpuid            \n"			\
    131 			     "rdtsc            \n"			\
    132 			     "mov %%eax, %0    \n"			\
    133 			     "xor %%eax, %%eax \n"			\
    134 			     "cpuid            \n"			\
    135 			     "pop %%ebx        \n"			\
    136 			     : "=m" (cycle_tmp2)			\
    137 			     : : "eax", "ecx", "edx" );			\
    138       if ( x > (cycle_tmp2 - cycle_tmp1) ) {				\
    139 	 x = cycle_tmp2 - cycle_tmp1;					\
    140       }									\
    141    }									\
    142    x -= counter_overhead;
    143 
    144 #else /* PPlain, PMMX version */
    145 
    146 /* To ensure accurate results, we stall the pipelines with the
    147  * non-pairable 'cdq' instruction.  This ensures all the code being
    148  * profiled is complete when the 'rdtsc' instruction executes.
    149  */
    150 #define  INIT_COUNTER(x)						\
    151    do {									\
    152       int cycle_i;							\
    153       x = LONG_MAX;							\
    154       for ( cycle_i = 0 ; cycle_i < 32 ; cycle_i++ ) {			\
    155 	 long cycle_tmp1, cycle_tmp2, dummy;				\
    156 	 __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) );		\
    157 	 __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) );		\
    158 	 __asm__ ( "cdq" );						\
    159 	 __asm__ ( "cdq" );						\
    160 	 __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) );		\
    161 	 __asm__ ( "cdq" );						\
    162 	 __asm__ ( "cdq" );						\
    163 	 __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) );		\
    164 	 if ( x > (cycle_tmp2 - cycle_tmp1) )				\
    165 	    x = cycle_tmp2 - cycle_tmp1;				\
    166       }									\
    167    } while (0)
    168 
    169 #define  BEGIN_RACE(x)							\
    170    x = LONG_MAX;							\
    171    for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) {			\
    172       long cycle_tmp1, cycle_tmp2, dummy;				\
    173       __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) );			\
    174       __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) );			\
    175       __asm__ ( "cdq" );						\
    176       __asm__ ( "cdq" );						\
    177       __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) );
    178 
    179 
    180 #define END_RACE(x)							\
    181       __asm__ ( "cdq" );						\
    182       __asm__ ( "cdq" );						\
    183       __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) );		\
    184       if ( x > (cycle_tmp2 - cycle_tmp1) )				\
    185 	 x = cycle_tmp2 - cycle_tmp1;					\
    186    }									\
    187    x -= counter_overhead;
    188 
    189 #endif
    190 
    191 #elif defined(__x86_64__)
    192 
    193 #define rdtscll(val) do { \
    194      unsigned int a,d; \
    195      __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); \
    196      (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \
    197 } while(0)
    198 
    199 /* Copied from i386 PIII version */
    200 #define  INIT_COUNTER()							\
    201    do {									\
    202       int cycle_i;							\
    203       counter_overhead = LONG_MAX;					\
    204       for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) {			\
    205 	 unsigned long cycle_tmp1, cycle_tmp2;        			\
    206 	 rdtscll(cycle_tmp1);						\
    207 	 rdtscll(cycle_tmp2);						\
    208 	 if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) {		\
    209 	    counter_overhead = cycle_tmp2 - cycle_tmp1;			\
    210 	 }								\
    211       }									\
    212    } while (0)
    213 
    214 
    215 #define  BEGIN_RACE(x)							\
    216    x = LONG_MAX;							\
    217    for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) {			\
    218       unsigned long cycle_tmp1, cycle_tmp2;				\
    219       rdtscll(cycle_tmp1);
    220 
    221 #define END_RACE(x)							\
    222       rdtscll(cycle_tmp2);						\
    223       if ( x > (cycle_tmp2 - cycle_tmp1) ) {				\
    224 	 x = cycle_tmp2 - cycle_tmp1;					\
    225       }									\
    226    }									\
    227    x -= counter_overhead;
    228 
    229 #elif defined(__sparc__)
    230 
    231 #define  INIT_COUNTER()	\
    232 	 do { counter_overhead = 5; } while(0)
    233 
    234 #define  BEGIN_RACE(x)                                                        \
    235 x = LONG_MAX;                                                                 \
    236 for (cycle_i = 0; cycle_i <10; cycle_i++) {                                   \
    237    register long cycle_tmp1 __asm__("l0");				      \
    238    register long cycle_tmp2 __asm__("l1");				      \
    239    /* rd %tick, %l0 */							      \
    240    __asm__ __volatile__ (".word 0xa1410000" : "=r" (cycle_tmp1));  /*  save timestamp   */
    241 
    242 #define END_RACE(x)                                                           \
    243    /* rd %tick, %l1 */							      \
    244    __asm__ __volatile__ (".word 0xa3410000" : "=r" (cycle_tmp2));	      \
    245    if (x > (cycle_tmp2-cycle_tmp1)) x = cycle_tmp2 - cycle_tmp1;              \
    246 }                                                                             \
    247 x -= counter_overhead;
    248 
    249 #else
    250 #error Your processor is not supported for RUN_XFORM_BENCHMARK
    251 #endif
    252 
    253 #else
    254 
    255 #define BEGIN_RACE(x)
    256 #define END_RACE(x)
    257 
    258 #endif
    259 
    260 
    261 /* =============================================================
    262  * Helper functions
    263  */
    264 
    265 static GLfloat rnd( void )
    266 {
    267    GLfloat f = (GLfloat)rand() / (GLfloat)RAND_MAX;
    268    GLfloat gran = (GLfloat)(1 << 13);
    269 
    270    f = (GLfloat)(GLint)(f * gran) / gran;
    271 
    272    return f * 2.0 - 1.0;
    273 }
    274 
    275 static int significand_match( GLfloat a, GLfloat b )
    276 {
    277    GLfloat d = a - b;
    278    int a_ex, b_ex, d_ex;
    279 
    280    if ( d == 0.0F ) {
    281       return MAX_PRECISION;   /* Exact match */
    282    }
    283 
    284    if ( a == 0.0F || b == 0.0F ) {
    285       /* It would probably be better to check if the
    286        * non-zero number is denormalized and return
    287        * the index of the highest set bit here.
    288        */
    289       return 0;
    290    }
    291 
    292    frexpf( a, &a_ex );
    293    frexpf( b, &b_ex );
    294    frexpf( d, &d_ex );
    295 
    296    if ( a_ex < b_ex ) {
    297       return a_ex - d_ex;
    298    } else {
    299       return b_ex - d_ex;
    300    }
    301 }
    302 
    303 enum { NIL = 0, ONE = 1, NEG = -1, VAR = 2 };
    304 
    305 /* Ensure our arrays are correctly aligned.
    306  */
    307 #if defined(__GNUC__)
    308 #  define ALIGN16(type, array)	type array __attribute__ ((aligned (16)))
    309 #elif defined(_MSC_VER)
    310 #  define ALIGN16(type, array)	type array __declspec(align(16)) /* GH: Does this work? */
    311 #elif defined(__xlC__)
    312 #  define ALIGN16(type, array)       type __align (16) array
    313 #else
    314 #  warning "ALIGN16 will not 16-byte align!\n"
    315 #  define ALIGN16
    316 #endif
    317 
    318 
    319 #endif /* DEBUG_MATH */
    320 
    321 #endif /* __M_DEBUG_UTIL_H__ */
    322