Home | History | Annotate | Download | only in x86
      1 
      2 /*
      3  * Mesa 3-D graphics library
      4  * Version:  5.1
      5  *
      6  * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
      7  *
      8  * Permission is hereby granted, free of charge, to any person obtaining a
      9  * copy of this software and associated documentation files (the "Software"),
     10  * to deal in the Software without restriction, including without limitation
     11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     12  * and/or sell copies of the Software, and to permit persons to whom the
     13  * Software is furnished to do so, subject to the following conditions:
     14  *
     15  * The above copyright notice and this permission notice shall be included
     16  * in all copies or substantial portions of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     21  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
     22  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     23  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     24  */
     25 
     26 /*
     27  * 3Dnow assembly code by Holger Waechtler
     28  */
     29 
     30 #ifdef USE_3DNOW_ASM
     31 
     32 #include "assyntax.h"
     33 #include "matypes.h"
     34 #include "norm_args.h"
     35 
     36         SEG_TEXT
     37 
     38 #define M(i)    REGOFF(i * 4, ECX)
     39 #define STRIDE  REGOFF(12, ESI)
     40 
     41 
     42 ALIGNTEXT16
     43 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
     44 HIDDEN(_mesa_3dnow_transform_normalize_normals)
     45 GLNAME(_mesa_3dnow_transform_normalize_normals):
     46 
     47 #define FRAME_OFFSET 12
     48 
     49     PUSH_L     ( EDI )
     50     PUSH_L     ( ESI )
     51     PUSH_L     ( EBP )
     52 
     53     MOV_L      ( ARG_LENGTHS, EDI )
     54     MOV_L      ( ARG_IN, ESI )
     55     MOV_L      ( ARG_DEST, EAX )
     56     MOV_L      ( REGOFF(V4F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
     57     MOV_L      ( EBP, REGOFF(V4F_COUNT, EAX) )
     58     MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
     59     MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
     60     MOV_L      ( ARG_MAT, ECX )
     61     MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
     62 
     63     CMP_L      ( CONST(0), EBP )        /*   count > 0 ??  */
     64     JE         ( LLBL (G3TN_end) )
     65 
     66     MOV_L      ( REGOFF (V4F_COUNT, ESI), EBP )
     67     FEMMS
     68 
     69     PUSH_L     ( EBP )
     70     PUSH_L     ( EAX )
     71     PUSH_L     ( EDX )                  /*  save counter & pointer for   */
     72                                         /*  the normalize pass           */
     73 #undef  FRAME_OFFSET
     74 #define FRAME_OFFSET 24
     75 
     76     MOVQ       ( M(0), MM3 )            /*  m1              | m0         */
     77     MOVQ       ( M(4), MM4 )            /*  m5              | m4         */
     78 
     79     MOVD       ( M(2), MM5 )            /*                  | m2         */
     80     PUNPCKLDQ  ( M(6), MM5 )            /*  m6              | m2         */
     81 
     82     MOVQ       ( M(8), MM6 )            /*  m9              | m8         */
     83     MOVQ       ( M(10), MM7 )           /*                  | m10        */
     84 
     85     CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                 */
     86     JNE        ( LLBL (G3TN_scale_end ) )
     87 
     88     MOVD       ( ARG_SCALE, MM0 )       /*               | scale           */
     89     PUNPCKLDQ  ( MM0, MM0 )             /* scale         | scale           */
     90 
     91     PFMUL      ( MM0, MM3 )             /* scale * m1    | scale * m0      */
     92     PFMUL      ( MM0, MM4 )             /* scale * m5    | scale * m4      */
     93     PFMUL      ( MM0, MM5 )             /* scale * m6    | scale * m2      */
     94     PFMUL      ( MM0, MM6 )             /* scale * m9    | scale * m8      */
     95     PFMUL      ( MM0, MM7 )             /*               | scale * m10     */
     96 
     97 ALIGNTEXT32
     98 LLBL (G3TN_scale_end):
     99 LLBL (G3TN_transform):
    100     MOVQ       ( REGIND (EDX), MM0 )    /*  x1              | x0         */
    101     MOVD       ( REGOFF (8, EDX), MM2 ) /*                  | x2         */
    102 
    103     MOVQ       ( MM0, MM1 )             /*  x1              | x0           */
    104     PUNPCKLDQ  ( MM2, MM2 )             /*  x2              | x2           */
    105 
    106     PFMUL      ( MM3, MM0 )             /*  x1*m1           | x0*m0        */
    107     ADD_L      ( CONST(16), EAX )       /*  next r                         */
    108 
    109     PREFETCHW  ( REGIND(EAX) )
    110 
    111     PFMUL      ( MM4, MM1 )             /*  x1*m5           | x0*m4        */
    112     PFACC      ( MM1, MM0 )             /*  x0*m4+x1*m5     | x0*m0+x1*m1  */
    113 
    114     PFMUL      ( MM5, MM2 )             /*  x2*m6           | x2*m2        */
    115     PFADD      ( MM2, MM0 )             /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
    116 
    117     MOVQ       ( REGIND (EDX), MM1 )    /*  x1           | x0              */
    118     MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                   */
    119 
    120     PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8           */
    121     MOVD       ( REGOFF (8, EDX), MM2 ) /*               | x2              */
    122 
    123     PFMUL      ( MM7, MM2 )             /*               | x2*m10          */
    124     PFACC      ( MM1, MM1 )             /*  *not used*   | x0*m8+x1*m9     */
    125 
    126     PFADD      ( MM2, MM1 )             /*  *not used*   | x0*m8+x1*m9+x2*m*/
    127     ADD_L      ( STRIDE, EDX )          /*  next normal                    */
    128 
    129     PREFETCH   ( REGIND(EDX) )
    130 
    131     MOVD       ( MM1, REGOFF(-8, EAX) ) /*  write r2                       */
    132     SUB_L      ( CONST(1), EBP )                  /*  decrement normal counter       */
    133     JNZ        ( LLBL (G3TN_transform) )
    134 
    135 
    136     POP_L      ( EDX )                  /*  end of transform ---           */
    137     POP_L      ( EAX )                  /*    now normalizing ...          */
    138     POP_L      ( EBP )
    139 
    140     CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                 */
    141     JE         ( LLBL (G3TN_norm ) )    /*  calculate lengths              */
    142 
    143 
    144 ALIGNTEXT32
    145 LLBL (G3TN_norm_w_lengths):
    146 
    147     PREFETCHW  ( REGOFF(12,EAX) )
    148 
    149     MOVQ       ( REGIND(EAX), MM0 )     /*  x1              | x0           */
    150     MOVD       ( REGOFF(8, EAX), MM1 )  /*                  | x2           */
    151 
    152     MOVD       ( REGIND (EDI), MM3 )    /*                  | length (x)   */
    153     PFMUL      ( MM3, MM1 )             /*                  | x2 (normalize*/
    154 
    155     PUNPCKLDQ  ( MM3, MM3 )             /*  length (x)      | length (x)   */
    156     PFMUL      ( MM3, MM0 )             /*  x1 (normalized) | x0 (normalize*/
    157 
    158     ADD_L      ( STRIDE, EDX )          /*  next normal                    */
    159     ADD_L      ( CONST(4), EDI )        /*  next length                    */
    160 
    161     PREFETCH   ( REGIND(EDI) )
    162 
    163     MOVQ       ( MM0, REGIND(EAX) )     /*  write new x0, x1               */
    164     MOVD       ( MM1, REGOFF(8, EAX) )  /*  write new x2                   */
    165 
    166     ADD_L      ( CONST(16), EAX )       /*  next r                         */
    167     SUB_L      ( CONST(1), EBP )        /*  decrement normal counter       */
    168 
    169     JNZ        ( LLBL (G3TN_norm_w_lengths) )
    170     JMP        ( LLBL (G3TN_exit_3dnow) )
    171 
    172 ALIGNTEXT32
    173 LLBL (G3TN_norm):
    174 
    175     PREFETCHW  ( REGIND(EAX) )
    176 
    177     MOVQ       ( REGIND (EAX), MM0 )    /*  x1             | x0           */
    178     MOVD       ( REGOFF(8, EAX), MM1 )  /*                 | x2           */
    179 
    180     MOVQ       ( MM0, MM3 )             /*  x1              | x0           */
    181     MOVQ       ( MM1, MM4 )             /*                  | x2           */
    182 
    183     PFMUL      ( MM0, MM3 )             /*  x1*x1           | x0*x0        */
    184     ADD_L      ( CONST(16), EAX )       /*  next r                         */
    185 
    186     PFMUL      ( MM1, MM4 )             /*                  | x2*x2        */
    187     PFADD      ( MM4, MM3 )             /*                  | x0*x0+x2*x2  */
    188 
    189     PFACC      ( MM3, MM3 )             /* **not used**    | x0*x0+x1*x1+x2**/
    190     PFRSQRT    ( MM3, MM5 )             /*  1/sqrt (x0*x0+x1*x1+x2*x2)     */
    191 
    192     MOVQ       ( MM5, MM4 )
    193     PUNPCKLDQ  ( MM3, MM3 )
    194 
    195     SUB_L      ( CONST(1), EBP )                  /*  decrement normal counter       */
    196     PFMUL      ( MM5, MM5 )
    197 
    198     PFRSQIT1   ( MM3, MM5 )
    199     PFRCPIT2   ( MM4, MM5 )
    200 
    201     PFMUL      ( MM5, MM0 )             /*  x1 (normalized) | x0 (normalize*/
    202 
    203     MOVQ       ( MM0, REGOFF(-16, EAX) ) /*  write new x0, x1              */
    204     PFMUL      ( MM5, MM1 )             /*                 | x2 (normalize*/
    205 
    206     MOVD       ( MM1, REGOFF(-8, EAX) ) /*  write new x2                  */
    207     JNZ        ( LLBL (G3TN_norm) )
    208 
    209 LLBL (G3TN_exit_3dnow):
    210     FEMMS
    211 
    212 LLBL (G3TN_end):
    213     POP_L      ( EBP )
    214     POP_L      ( ESI )
    215     POP_L      ( EDI )
    216     RET
    217 
    218 
    219 
    220 ALIGNTEXT16
    221 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
    222 HIDDEN(_mesa_3dnow_transform_normalize_normals_no_rot)
    223 GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
    224 
    225 #undef FRAME_OFFSET
    226 #define FRAME_OFFSET 12
    227 
    228     PUSH_L     ( EDI )
    229     PUSH_L     ( ESI )
    230     PUSH_L     ( EBP )
    231 
    232     MOV_L      ( ARG_LENGTHS, EDI )
    233     MOV_L      ( ARG_IN, ESI )
    234     MOV_L      ( ARG_DEST, EAX )
    235     MOV_L      ( REGOFF(V4F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
    236     MOV_L      ( EBP, REGOFF(V4F_COUNT, EAX) )
    237     MOV_L      ( ARG_MAT, ECX )
    238     MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
    239     MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
    240     MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
    241 
    242     CMP_L      ( CONST(0), EBP ) /*   count > 0 ??  */
    243     JE         ( LLBL (G3TNNR_end) )
    244 
    245     FEMMS
    246 
    247     MOVD       ( M(0), MM0 )            /*               | m0                 */
    248     PUNPCKLDQ  ( M(5), MM0 )            /* m5            | m0                 */
    249 
    250     MOVD       ( M(10), MM2 )           /*               | m10                */
    251     PUNPCKLDQ  ( MM2, MM2 )             /* m10           | m10                */
    252 
    253     CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                    */
    254     JNE        ( LLBL (G3TNNR_scale_end ) )
    255 
    256     MOVD       ( ARG_SCALE, MM7 )       /*               | scale              */
    257     PUNPCKLDQ  ( MM7, MM7 )             /* scale         | scale              */
    258 
    259     PFMUL      ( MM7, MM0 )             /* scale * m5    | scale * m0         */
    260     PFMUL      ( MM7, MM2 )             /* scale * m10   | scale * m10        */
    261 
    262 ALIGNTEXT32
    263 LLBL (G3TNNR_scale_end):
    264     CMP_L      ( CONST(0), EDI )        /* lengths == 0 ?                     */
    265     JE         ( LLBL (G3TNNR_norm) )   /* need to calculate lengths          */
    266 
    267     MOVD       ( REGIND(EDI), MM3 )     /*                 | length (x)       */
    268 
    269 
    270 ALIGNTEXT32
    271 LLBL (G3TNNR_norm_w_lengths):           /* use precalculated lengths          */
    272 
    273     PREFETCHW  ( REGIND(EAX) )
    274 
    275     MOVQ       ( REGIND(EDX), MM6 )     /* x1            | x0                 */
    276     MOVD       ( REGOFF(8, EDX), MM7 )  /*               | x2                 */
    277 
    278     PFMUL      ( MM0, MM6 )             /* x1*m5         | x0*m0              */
    279     ADD_L      ( STRIDE, EDX )          /* next normal                        */
    280 
    281     PREFETCH   ( REGIND(EDX) )
    282 
    283     PFMUL      ( MM2, MM7 )             /*               | x2*m10             */
    284     ADD_L      ( CONST(16), EAX )       /* next r                             */
    285 
    286     PFMUL      ( MM3, MM7 )             /*               | x2 (normalized)  */
    287     PUNPCKLDQ  ( MM3, MM3 )             /* length (x)    | length (x)       */
    288 
    289     ADD_L      ( CONST(4), EDI )        /* next length                        */
    290     PFMUL      ( MM3, MM6 )             /* x1 (normalized) | x0 (normalized)  */
    291 
    292     SUB_L      ( CONST(1), EBP )        /* decrement normal counter           */
    293     MOVQ       ( MM6, REGOFF(-16, EAX) ) /* write r0, r1                      */
    294 
    295     MOVD       ( MM7, REGOFF(-8, EAX) ) /* write r2                           */
    296     MOVD       ( REGIND(EDI), MM3 )     /*                 | length (x)       */
    297 
    298     JNZ        ( LLBL (G3TNNR_norm_w_lengths) )
    299     JMP        ( LLBL (G3TNNR_exit_3dnow) )
    300 
    301 ALIGNTEXT32
    302 LLBL (G3TNNR_norm):                     /* need to calculate lengths          */
    303 
    304     PREFETCHW  ( REGIND(EAX) )
    305 
    306     MOVQ       ( REGIND(EDX), MM6 )     /* x1              | x0               */
    307     MOVD       ( REGOFF(8, EDX), MM7 )  /*                 | x2               */
    308 
    309     PFMUL      ( MM0, MM6 )             /* x1*m5           | x0*m0            */
    310     ADD_L      ( CONST(16), EAX )       /* next r                             */
    311 
    312     PFMUL      ( MM2, MM7 )             /*                 | x2*m10           */
    313     MOVQ       ( MM6, MM3 )             /* x1 (transformed)| x0 (transformed) */
    314 
    315     MOVQ       ( MM7, MM4 )             /*                 | x2 (transformed) */
    316     PFMUL      ( MM6, MM3 )             /* x1*x1           | x0*x0            */
    317 
    318 
    319     PFMUL      ( MM7, MM4 )             /*                 | x2*x2            */
    320     PFACC      ( MM3, MM3 )             /* **not used**    | x0*x0+x1*x1      */
    321 
    322     PFADD      ( MM4, MM3 )             /*                 | x0*x0+x1*x1+x2*x2*/
    323     ADD_L      ( STRIDE, EDX )          /* next normal            */
    324 
    325     PREFETCH   ( REGIND(EDX) )
    326 
    327     PFRSQRT    ( MM3, MM5 )             /* 1/sqrt (x0*x0+x1*x1+x2*x2)         */
    328     MOVQ       ( MM5, MM4 )
    329 
    330     PUNPCKLDQ  ( MM3, MM3 )
    331     PFMUL      ( MM5, MM5 )
    332 
    333     PFRSQIT1   ( MM3, MM5 )
    334     SUB_L      ( CONST(1), EBP )        /* decrement normal counter           */
    335 
    336     PFRCPIT2   ( MM4, MM5 )
    337     PFMUL      ( MM5, MM6 )             /* x1 (normalized) | x0 (normalized)  */
    338 
    339     MOVQ       ( MM6, REGOFF(-16, EAX) ) /* write r0, r1                      */
    340     PFMUL      ( MM5, MM7 )             /*                 | x2 (normalized)  */
    341 
    342     MOVD       ( MM7, REGOFF(-8, EAX) ) /* write r2                           */
    343     JNZ        ( LLBL (G3TNNR_norm) )
    344 
    345 
    346 LLBL (G3TNNR_exit_3dnow):
    347     FEMMS
    348 
    349 LLBL (G3TNNR_end):
    350     POP_L      ( EBP )
    351     POP_L      ( ESI )
    352     POP_L      ( EDI )
    353     RET
    354 
    355 
    356 
    357 
    358 
    359 
    360 ALIGNTEXT16
    361 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
    362 HIDDEN(_mesa_3dnow_transform_rescale_normals_no_rot)
    363 GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
    364 
    365 #undef FRAME_OFFSET
    366 #define FRAME_OFFSET 12
    367 
    368     PUSH_L     ( EDI )
    369     PUSH_L     ( ESI )
    370     PUSH_L     ( EBP )
    371 
    372     MOV_L      ( ARG_IN, EAX )
    373     MOV_L      ( ARG_DEST, EDX )
    374     MOV_L      ( REGOFF(V4F_COUNT, EAX), EBP ) /*  dest->count = in->count   */
    375     MOV_L      ( EBP, REGOFF(V4F_COUNT, EDX) )
    376     MOV_L      ( ARG_IN, ESI )
    377     MOV_L      ( ARG_MAT, ECX )
    378     MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
    379     MOV_L      ( REGOFF(V4F_START, EDX), EAX ) /*  dest->start  */
    380     MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
    381 
    382     CMP_L      ( CONST(0), EBP )
    383     JE         ( LLBL (G3TRNR_end) )
    384 
    385     FEMMS
    386 
    387     MOVD       ( ARG_SCALE, MM6 )       /*               | scale              */
    388     PUNPCKLDQ  ( MM6, MM6 )             /* scale         | scale              */
    389 
    390     MOVD       ( REGIND(ECX), MM0 )     /*               | m0                 */
    391     PUNPCKLDQ  ( REGOFF(20, ECX), MM0 ) /* m5            | m0                 */
    392 
    393     PFMUL      ( MM6, MM0 )             /* scale*m5      | scale*m0           */
    394     MOVD       ( REGOFF(40, ECX), MM2 ) /*               | m10                */
    395 
    396     PFMUL      ( MM6, MM2 )             /*               | scale*m10          */
    397 
    398 ALIGNTEXT32
    399 LLBL (G3TRNR_rescale):
    400 
    401     PREFETCHW  ( REGIND(EAX) )
    402 
    403     MOVQ       ( REGIND(EDX), MM4 )     /* x1            | x0                 */
    404     MOVD       ( REGOFF(8, EDX), MM5 )  /*               | x2                 */
    405 
    406     PFMUL      ( MM0, MM4 )             /* x1*m5         | x0*m0              */
    407     ADD_L      ( STRIDE, EDX )          /* next normal                        */
    408 
    409     PREFETCH   ( REGIND(EDX) )
    410 
    411     PFMUL      ( MM2, MM5 )             /*               | x2*m10             */
    412     ADD_L      ( CONST(16), EAX )       /* next r                             */
    413 
    414     SUB_L      ( CONST(1), EBP )        /* decrement normal counter           */
    415     MOVQ       ( MM4, REGOFF(-16, EAX) ) /* write r0, r1                      */
    416 
    417     MOVD       ( MM5, REGOFF(-8, EAX) ) /* write r2                           */
    418     JNZ        ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal  */
    419 
    420     FEMMS
    421 
    422 LLBL (G3TRNR_end):
    423     POP_L      ( EBP )
    424     POP_L      ( ESI )
    425     POP_L      ( EDI )
    426     RET
    427 
    428 
    429 
    430 
    431 
    432 ALIGNTEXT16
    433 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
    434 HIDDEN(_mesa_3dnow_transform_rescale_normals)
    435 GLNAME(_mesa_3dnow_transform_rescale_normals):
    436 
    437 #undef  FRAME_OFFSET
    438 #define FRAME_OFFSET 8
    439 
    440     PUSH_L     ( EDI )
    441     PUSH_L     ( ESI )
    442 
    443     MOV_L      ( ARG_IN, ESI )
    444     MOV_L      ( ARG_DEST, EAX )
    445     MOV_L      ( ARG_MAT, ECX )
    446     MOV_L      ( REGOFF(V4F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
    447     MOV_L      ( EDI, REGOFF(V4F_COUNT, EAX) )
    448     MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
    449     MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
    450     MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
    451 
    452     CMP_L      ( CONST(0), EDI )
    453     JE         ( LLBL (G3TR_end) )
    454 
    455     FEMMS
    456 
    457     MOVQ       ( REGIND(ECX), MM3 )     /* m1            | m0                 */
    458 
    459     MOVQ       ( REGOFF(16,ECX), MM4 )  /* m5            | m4                 */
    460     MOVD       ( ARG_SCALE, MM0 )       /* scale       */
    461 
    462     MOVD       ( REGOFF(8,ECX), MM5 )   /*               | m2                 */
    463     PUNPCKLDQ  ( MM0, MM0 )             /* scale         | scale              */
    464 
    465     PUNPCKLDQ  ( REGOFF(24, ECX), MM5 )
    466     PFMUL      ( MM0, MM3 )             /* scale*m1      | scale*m0           */
    467 
    468     MOVQ       ( REGOFF(32, ECX), MM6 ) /* m9            | m8*/
    469     PFMUL      ( MM0, MM4 )             /* scale*m5      | scale*m4           */
    470 
    471     MOVD       ( REGOFF(40, ECX), MM7 ) /*               | m10                */
    472     PFMUL      ( MM0, MM5 )             /* scale*m6      | scale*m2           */
    473 
    474     PFMUL      ( MM0, MM6 )             /* scale*m9      | scale*m8           */
    475 
    476     PFMUL      ( MM0, MM7 )             /*               | scale*m10          */
    477 
    478 ALIGNTEXT32
    479 LLBL (G3TR_rescale):
    480 
    481     PREFETCHW  ( REGIND(EAX) )
    482 
    483     MOVQ       ( REGIND(EDX), MM0 )     /* x1            | x0                 */
    484     MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
    485 
    486     MOVQ       ( MM0, MM1 )             /* x1            | x0                 */
    487     PUNPCKLDQ  ( MM2, MM2 )             /* x2            | x2                 */
    488 
    489     PFMUL      ( MM3, MM0 )             /* x1*m1         | x0*m0              */
    490     ADD_L      ( CONST(16), EAX )       /* next r                             */
    491 
    492     PFMUL      ( MM4, MM1 )             /* x1*m5         | x0*m4              */
    493     PFACC      ( MM1, MM0 )             /* x0*m4+x1*m5   | x0*m0+x1*m1        */
    494 
    495     MOVQ       ( REGIND(EDX), MM1 )     /* x1            | x0                 */
    496 
    497     PFMUL      ( MM5, MM2 )             /* x2*m6         | x2*m2              */
    498     PFADD      ( MM2, MM0 )             /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2  */
    499 
    500     MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
    501     ADD_L      ( STRIDE, EDX )          /* next normal                    */
    502 
    503     PREFETCH   ( REGIND(EDX) )
    504 
    505     MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                      */
    506     PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8              */
    507 
    508     PFMUL      ( MM7, MM2 )             /*               | x2*m10             */
    509     PFACC      ( MM1, MM1 )             /* *not used*    | x0*m8+x1*m9        */
    510 
    511     PFADD      ( MM2, MM1 )             /* *not used*    | x0*m8+x1*m9+x2*m10 */
    512     MOVD       ( MM1, REGOFF(-8, EAX) ) /* write r2                           */
    513 
    514     SUB_L      ( CONST(1), EDI )        /* decrement normal counter           */
    515     JNZ        ( LLBL (G3TR_rescale) )
    516 
    517     FEMMS
    518 
    519 LLBL (G3TR_end):
    520     POP_L       ( ESI )
    521     POP_L       ( EDI )
    522     RET
    523 
    524 
    525 
    526 
    527 
    528 
    529 
    530 ALIGNTEXT16
    531 GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
    532 HIDDEN(_mesa_3dnow_transform_normals_no_rot)
    533 GLNAME(_mesa_3dnow_transform_normals_no_rot):
    534 
    535 #undef  FRAME_OFFSET
    536 #define FRAME_OFFSET 8
    537 
    538     PUSH_L     ( EDI )
    539     PUSH_L     ( ESI )
    540 
    541     MOV_L      ( ARG_IN, ESI )
    542     MOV_L      ( ARG_DEST, EAX )
    543     MOV_L      ( ARG_MAT, ECX )
    544     MOV_L      ( REGOFF(V4F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
    545     MOV_L      ( EDI, REGOFF(V4F_COUNT, EAX) )
    546     MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
    547     MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
    548     MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
    549 
    550     CMP_L      ( CONST(0), EDI )
    551     JE         ( LLBL (G3TNR_end) )
    552 
    553     FEMMS
    554 
    555     MOVD       ( REGIND(ECX), MM0 )     /*               | m0                 */
    556     PUNPCKLDQ  ( REGOFF(20, ECX), MM0 ) /* m5            | m0                 */
    557 
    558     MOVD       ( REGOFF(40, ECX), MM2 ) /*               | m10                */
    559     PUNPCKLDQ  ( MM2, MM2 )             /* m10           | m10                */
    560 
    561 ALIGNTEXT32
    562 LLBL (G3TNR_transform):
    563 
    564     PREFETCHW  ( REGIND(EAX) )
    565 
    566     MOVQ       ( REGIND(EDX), MM4 )     /* x1            | x0                 */
    567     MOVD       ( REGOFF(8, EDX), MM5 )  /*               | x2                 */
    568 
    569     PFMUL      ( MM0, MM4 )             /* x1*m5         | x0*m0              */
    570     ADD_L      ( STRIDE, EDX)           /* next normal      */
    571 
    572     PREFETCH   ( REGIND(EDX) )
    573 
    574     PFMUL      ( MM2, MM5 )             /*               | x2*m10             */
    575     ADD_L      ( CONST(16), EAX )       /* next r                             */
    576 
    577     SUB_L      ( CONST(1), EDI )        /* decrement normal counter           */
    578     MOVQ       ( MM4, REGOFF(-16, EAX) ) /* write r0, r1                      */
    579 
    580     MOVD       ( MM5, REGOFF(-8, EAX) ) /* write r2                           */
    581     JNZ        ( LLBL (G3TNR_transform) )
    582 
    583     FEMMS
    584 
    585 LLBL (G3TNR_end):
    586     POP_L       ( ESI )
    587     POP_L       ( EDI )
    588     RET
    589 
    590 
    591 
    592 
    593 
    594 
    595 
    596 
    597 ALIGNTEXT16
    598 GLOBL GLNAME(_mesa_3dnow_transform_normals)
    599 HIDDEN(_mesa_3dnow_transform_normals)
    600 GLNAME(_mesa_3dnow_transform_normals):
    601 
    602 #undef  FRAME_OFFSET
    603 #define FRAME_OFFSET 8
    604 
    605     PUSH_L     ( EDI )
    606     PUSH_L     ( ESI )
    607 
    608     MOV_L      ( ARG_IN, ESI )
    609     MOV_L      ( ARG_DEST, EAX )
    610     MOV_L      ( ARG_MAT, ECX )
    611     MOV_L      ( REGOFF(V4F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
    612     MOV_L      ( EDI, REGOFF(V4F_COUNT, EAX) )
    613     MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
    614     MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
    615     MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
    616 
    617     CMP_L      ( CONST(0), EDI )        /* count > 0 ??                       */
    618     JE         ( LLBL (G3T_end) )
    619 
    620     FEMMS
    621 
    622     MOVQ       ( REGIND(ECX), MM3 )     /* m1            | m0                 */
    623     MOVQ       ( REGOFF(16, ECX), MM4 ) /* m5            | m4                 */
    624 
    625     MOVD       ( REGOFF(8, ECX), MM5 )  /*               | m2                 */
    626     PUNPCKLDQ  ( REGOFF(24, ECX), MM5 ) /* m6            | m2                 */
    627 
    628     MOVQ       ( REGOFF(32, ECX), MM6 ) /* m9            | m8                 */
    629     MOVD       ( REGOFF(40, ECX), MM7 ) /*               | m10                */
    630 
    631 ALIGNTEXT32
    632 LLBL (G3T_transform):
    633 
    634     PREFETCHW  ( REGIND(EAX) )
    635 
    636     MOVQ       ( REGIND(EDX), MM0 )     /* x1            | x0                 */
    637     MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
    638 
    639     MOVQ       ( MM0, MM1 )             /* x1            | x0                 */
    640     PUNPCKLDQ  ( MM2, MM2 )             /* x2            | x2                 */
    641 
    642     PFMUL      ( MM3, MM0 )             /* x1*m1         | x0*m0              */
    643     ADD_L      ( CONST(16), EAX )       /* next r                             */
    644 
    645     PFMUL      ( MM4, MM1 )             /* x1*m5         | x0*m4              */
    646     PFACC      ( MM1, MM0 )             /* x0*m4+x1*m5   | x0*m0+x1*m1        */
    647 
    648     PFMUL      ( MM5, MM2 )             /* x2*m6         | x2*m2              */
    649     PFADD      ( MM2, MM0 )             /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2  */
    650 
    651     MOVQ       ( REGIND(EDX), MM1 )     /* x1            | x0                 */
    652     MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                      */
    653 
    654     PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8              */
    655     MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
    656 
    657     PFMUL      ( MM7, MM2 )             /*               | x2*m10             */
    658     ADD_L      ( STRIDE, EDX )          /* next normal               */
    659 
    660     PREFETCH   ( REGIND(EDX) )
    661 
    662     PFACC      ( MM1, MM1 )             /* *not used*    | x0*m8+x1*m9        */
    663     PFADD      ( MM2, MM1 )             /* *not used*    | x0*m8+x1*m9+x2*m10 */
    664 
    665     MOVD       ( MM1, REGOFF(-8, EAX) ) /* write r2                           */
    666     SUB_L      ( CONST(1), EDI )        /* decrement normal counter           */
    667 
    668     JNZ        ( LLBL (G3T_transform) )
    669 
    670     FEMMS
    671 
    672 LLBL (G3T_end):
    673     POP_L  ( ESI )
    674     POP_L  ( EDI )
    675     RET
    676 
    677 
    678 
    679 
    680 
    681 
    682 ALIGNTEXT16
    683 GLOBL GLNAME(_mesa_3dnow_normalize_normals)
    684 HIDDEN(_mesa_3dnow_normalize_normals)
    685 GLNAME(_mesa_3dnow_normalize_normals):
    686 
    687 #undef  FRAME_OFFSET
    688 #define FRAME_OFFSET 12
    689 
    690     PUSH_L     ( EDI )
    691     PUSH_L     ( ESI )
    692     PUSH_L     ( EBP )
    693 
    694     MOV_L      ( ARG_IN, ESI )
    695     MOV_L      ( ARG_DEST, EAX )
    696     MOV_L      ( REGOFF(V4F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
    697     MOV_L      ( EBP, REGOFF(V4F_COUNT, EAX) )
    698     MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
    699     MOV_L      ( REGOFF(V4F_START, ESI), ECX ) /*  in->start    */
    700     MOV_L      ( ARG_LENGTHS, EDX )
    701 
    702     CMP_L      ( CONST(0), EBP ) /* count > 0 ?? */
    703     JE         ( LLBL (G3N_end) )
    704 
    705     FEMMS
    706 
    707     CMP_L      ( CONST(0), EDX )        /* lengths == 0 ?                     */
    708     JE         ( LLBL (G3N_norm2) )     /* calculate lengths                  */
    709 
    710 ALIGNTEXT32
    711 LLBL (G3N_norm1):                       /* use precalculated lengths          */
    712 
    713     PREFETCH   ( REGIND(EAX) )
    714 
    715     MOVQ       ( REGIND(ECX), MM0 )     /* x1              | x0               */
    716     MOVD       ( REGOFF(8, ECX), MM1 )  /*                 | x2               */
    717 
    718     MOVD       ( REGIND(EDX), MM3 )     /*                 | length (x)       */
    719     PFMUL      ( MM3, MM1 )             /*                 | x2 (normalized)  */
    720 
    721     PUNPCKLDQ  ( MM3, MM3 )             /* length (x)      | length (x)       */
    722     ADD_L      ( STRIDE, ECX )          /* next normal            */
    723 
    724     PREFETCH   ( REGIND(ECX) )
    725 
    726     PFMUL      ( MM3, MM0 )             /* x1 (normalized) | x0 (normalized)  */
    727     MOVQ       ( MM0, REGIND(EAX) )     /* write new x0, x1                   */
    728 
    729     MOVD       ( MM1, REGOFF(8, EAX) )  /* write new x2                       */
    730     ADD_L      ( CONST(16), EAX )       /* next r                             */
    731 
    732     ADD_L      ( CONST(4), EDX )        /* next length                        */
    733     SUB_L      ( CONST(1), EBP )        /* decrement normal counter           */
    734 
    735     JNZ        ( LLBL (G3N_norm1) )
    736 
    737     JMP        ( LLBL (G3N_end1) )
    738 
    739 ALIGNTEXT32
    740 LLBL (G3N_norm2):                       /* need to calculate lengths          */
    741 
    742     PREFETCHW  ( REGIND(EAX) )
    743 
    744     PREFETCH   ( REGIND(ECX) )
    745 
    746     MOVQ       ( REGIND(ECX), MM0 )     /* x1              | x0               */
    747     MOVD       ( REGOFF(8, ECX), MM1 )  /*                 | x2               */
    748 
    749     MOVQ       ( MM0, MM3 )             /* x1              | x0               */
    750     ADD_L      ( STRIDE, ECX )          /* next normal    */
    751 
    752     PFMUL      ( MM0, MM3 )             /* x1*x1           | x0*x0            */
    753     MOVQ       ( MM1, MM4 )             /*                 | x2               */
    754 
    755     ADD_L      ( CONST(16), EAX )       /* next r                             */
    756     PFMUL      ( MM1, MM4 )             /*                 | x2*x2            */
    757 
    758     PFADD      ( MM4, MM3 )             /*                 | x0*x0+x2*x2      */
    759     PFACC      ( MM3, MM3 )             /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
    760 
    761     PFRSQRT    ( MM3, MM5 )             /* 1/sqrt (x0*x0+x1*x1+x2*x2)         */
    762     MOVQ       ( MM5, MM4 )
    763 
    764     PUNPCKLDQ  ( MM3, MM3 )
    765     PFMUL      ( MM5, MM5 )
    766 
    767     PFRSQIT1   ( MM3, MM5 )
    768     SUB_L      ( CONST(1), EBP )        /* decrement normal counter           */
    769 
    770     PFRCPIT2   ( MM4, MM5 )
    771 
    772     PFMUL      ( MM5, MM0 )             /* x1 (normalized) | x0 (normalized)  */
    773     MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1                  */
    774 
    775     PFMUL      ( MM5, MM1 )             /*                 | x2 (normalized)  */
    776     MOVD       ( MM1, REGOFF(-8, EAX) ) /* write new x2                       */
    777 
    778     JNZ        ( LLBL (G3N_norm2) )
    779 
    780 LLBL (G3N_end1):
    781     FEMMS
    782 
    783 LLBL (G3N_end):
    784     POP_L      ( EBP )
    785     POP_L      ( ESI )
    786     POP_L      ( EDI )
    787     RET
    788 
    789 
    790 
    791 
    792 
    793 
    794 ALIGNTEXT16
    795 GLOBL GLNAME(_mesa_3dnow_rescale_normals)
    796 HIDDEN(_mesa_3dnow_rescale_normals)
    797 GLNAME(_mesa_3dnow_rescale_normals):
    798 
    799 #undef  FRAME_OFFSET
    800 #define FRAME_OFFSET 8
    801     PUSH_L     ( EDI )
    802     PUSH_L     ( ESI )
    803 
    804     MOV_L      ( ARG_IN, ESI )
    805     MOV_L      ( ARG_DEST, EAX )
    806     MOV_L      ( REGOFF(V4F_COUNT, ESI), EDX ) /*  dest->count = in->count   */
    807     MOV_L      ( EDX, REGOFF(V4F_COUNT, EAX) )
    808     MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
    809     MOV_L      ( REGOFF(V4F_START, ESI), ECX ) /*  in->start    */
    810 
    811     CMP_L      ( CONST(0), EDX )
    812     JE         ( LLBL (G3R_end) )
    813 
    814     FEMMS
    815 
    816     MOVD       ( ARG_SCALE, MM0 )       /* scale                              */
    817     PUNPCKLDQ  ( MM0, MM0 )
    818 
    819 ALIGNTEXT32
    820 LLBL (G3R_rescale):
    821 
    822     PREFETCHW  ( REGIND(EAX) )
    823 
    824     MOVQ       ( REGIND(ECX), MM1 )     /* x1            | x0                 */
    825     MOVD       ( REGOFF(8, ECX), MM2 )  /*               | x2                 */
    826 
    827     PFMUL      ( MM0, MM1 )             /* x1*scale      | x0*scale           */
    828     ADD_L      ( STRIDE, ECX )          /* next normal                  */
    829 
    830     PREFETCH   ( REGIND(ECX) )
    831 
    832     PFMUL      ( MM0, MM2 )             /*               | x2*scale           */
    833     ADD_L      ( CONST(16), EAX )       /* next r                             */
    834 
    835     MOVQ       ( MM1, REGOFF(-16, EAX) ) /* write r0, r1                      */
    836     MOVD       ( MM2, REGOFF(-8, EAX) ) /* write r2                           */
    837 
    838     SUB_L      ( CONST(1), EDX )        /* decrement normal counter           */
    839     JNZ        ( LLBL (G3R_rescale) )
    840 
    841     FEMMS
    842 
    843 LLBL (G3R_end):
    844     POP_L      ( ESI )
    845     POP_L      ( EDI )
    846     RET
    847 
    848 #endif
    849 
    850 #if defined (__ELF__) && defined (__linux__)
    851 	.section .note.GNU-stack,"",%progbits
    852 #endif
    853