Home | History | Annotate | Download | only in x86
      1 
      2 /*
      3  * Mesa 3-D graphics library
      4  * Version:  3.5
      5  *
      6  * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
      7  *
      8  * Permission is hereby granted, free of charge, to any person obtaining a
      9  * copy of this software and associated documentation files (the "Software"),
     10  * to deal in the Software without restriction, including without limitation
     11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     12  * and/or sell copies of the Software, and to permit persons to whom the
     13  * Software is furnished to do so, subject to the following conditions:
     14  *
     15  * The above copyright notice and this permission notice shall be included
     16  * in all copies or substantial portions of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     21  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
     22  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     23  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     24  */
     25 
     26 #ifdef USE_3DNOW_ASM
     27 #include "assyntax.h"
     28 #include "matypes.h"
     29 #include "xform_args.h"
     30 
     31     SEG_TEXT
     32 
     33 #define FRAME_OFFSET	4
     34 
     35 
     36 ALIGNTEXT16
     37 GLOBL GLNAME( _mesa_3dnow_transform_points3_general )
     38 HIDDEN(_mesa_3dnow_transform_points3_general)
     39 GLNAME( _mesa_3dnow_transform_points3_general ):
     40 
     41     PUSH_L    ( ESI )
     42 
     43     MOV_L     ( ARG_DEST, ECX )
     44     MOV_L     ( ARG_MATRIX, ESI )
     45     MOV_L     ( ARG_SOURCE, EAX )
     46     MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
     47     OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
     48     MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
     49     MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
     50 
     51     PUSH_L    ( EDI )
     52 
     53     MOV_L     ( REGOFF(V4F_START, ECX), EDX )
     54     MOV_L     ( ESI, ECX )
     55     MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
     56     MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
     57     MOV_L     ( REGOFF(V4F_START, EAX), EAX )
     58 
     59     TEST_L    ( ESI, ESI )
     60     JZ        ( LLBL( G3TPGR_2 ) )
     61 
     62     PREFETCHW ( REGIND(EDX) )
     63 
     64 ALIGNTEXT16
     65 LLBL( G3TPGR_1 ):
     66 
     67     PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
     68 
     69     MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
     70     MOVD      ( REGOFF(8, EAX), MM2 )	/*                 | x2              */
     71 
     72     ADD_L     ( EDI, EAX )		/* next vertex                       */
     73     PREFETCH  ( REGIND(EAX) )
     74 
     75     MOVQ      ( MM0, MM1 )		/* x1              | x0              */
     76     PUNPCKLDQ ( MM2, MM2 )		/* x2              | x2              */
     77 
     78     PUNPCKLDQ ( MM0, MM0 )		/* x0              | x0              */
     79     MOVQ      ( MM2, MM5 )		/* x2              | x2              */
     80 
     81     PUNPCKHDQ ( MM1, MM1 )		/* x1              | x1              */
     82     PFMUL     ( REGOFF(32, ECX), MM2 )	/* x2*m9           | x2*m8           */
     83 
     84     MOVQ      ( MM0, MM3 )		/* x0              | x0              */
     85     PFMUL     ( REGOFF(40, ECX), MM5 )	/* x2*m11          | x2*m10          */
     86 
     87     MOVQ      ( MM1, MM4 )		/* x1              | x1              */
     88     PFMUL     ( REGIND(ECX), MM0 )	/* x0*m1           | x0*m0           */
     89 
     90     PFADD     ( REGOFF(48, ECX), MM2 )	/* x2*m9+m13       | x2*m8+m12       */
     91     PFMUL     ( REGOFF(16, ECX), MM1 )	/* x1*m5           | x1*m4           */
     92 
     93     PFADD     ( REGOFF(56, ECX), MM5 )	/* x2*m11+m15      | x2*m10+m14      */
     94     PFADD     ( MM0, MM1 )		/* x0*m1+x1*m5     | x0*m0+x1*m4     */
     95 
     96     PFMUL     ( REGOFF(8, ECX), MM3 )	/* x0*m3           | x0*m2           */
     97     PFADD     ( MM1, MM2 )		/* r1              | r0              */
     98 
     99     PFMUL     ( REGOFF(24, ECX), MM4 )	/* x1*m7           | x1*m6           */
    100     ADD_L     ( CONST(16), EDX )	/* next output vertex                */
    101 
    102     PFADD     ( MM3, MM4 )		/* x0*m3+x1*m7     | x0*m2+x1*m6     */
    103     MOVQ      ( MM2, REGOFF(-16, EDX) )	/* write r0, r1                      */
    104 
    105     PFADD     ( MM4, MM5 )		/* r3              | r2              */
    106     MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
    107 
    108     DEC_L     ( ESI )			/* decrement vertex counter          */
    109     JNZ       ( LLBL( G3TPGR_1 ) )	/* cnt > 0 ? -> process next vertex  */
    110 
    111 LLBL( G3TPGR_2 ):
    112 
    113     FEMMS
    114     POP_L     ( EDI )
    115     POP_L     ( ESI )
    116     RET
    117 
    118 
    119 
    120 
    121 ALIGNTEXT16
    122 GLOBL GLNAME( _mesa_3dnow_transform_points3_perspective )
    123 HIDDEN(_mesa_3dnow_transform_points3_perspective)
    124 GLNAME( _mesa_3dnow_transform_points3_perspective ):
    125 
    126     PUSH_L    ( ESI )
    127 
    128     MOV_L     ( ARG_DEST, ECX )
    129     MOV_L     ( ARG_MATRIX, ESI )
    130     MOV_L     ( ARG_SOURCE, EAX )
    131     MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
    132     OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
    133     MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
    134     MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
    135 
    136     PUSH_L    ( EDI )
    137 
    138     MOV_L     ( REGOFF(V4F_START, ECX), EDX )
    139     MOV_L     ( ESI, ECX )
    140     MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
    141     MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
    142     MOV_L     ( REGOFF(V4F_START, EAX), EAX )
    143 
    144     TEST_L    ( ESI, ESI )
    145     JZ        ( LLBL( G3TPPR_2 ) )
    146 
    147     PREFETCH  ( REGIND(EAX) )
    148     PREFETCHW ( REGIND(EDX) )
    149 
    150     MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
    151     PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
    152 
    153     MOVQ      ( REGOFF(32, ECX), MM1 )	/* m21             | m20             */
    154     MOVD      ( REGOFF(40, ECX), MM2 )	/*                 | m22             */
    155 
    156     MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
    157 
    158 ALIGNTEXT16
    159 LLBL( G3TPPR_1 ):
    160 
    161     PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
    162 
    163     MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
    164     MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
    165 
    166     ADD_L     ( EDI, EAX )		/* next vertex                       */
    167     PREFETCH  ( REGIND(EAX) )
    168 
    169     PXOR      ( MM7, MM7 )		/* 0               | 0               */
    170     MOVQ      ( MM5, MM6 )		/*                 | x2              */
    171 
    172     PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
    173     PFSUB     ( MM5, MM7 )		/*                 | -x2             */
    174 
    175     PFMUL     ( MM2, MM6 )		/*                 | x2*m22          */
    176     PUNPCKLDQ ( MM5, MM5 )		/* x2              | x2              */
    177 
    178     ADD_L     ( CONST(16), EDX )	/* next r                            */
    179     PFMUL     ( MM1, MM5 )		/* x2*m21          | x2*m20          */
    180 
    181     PFADD     ( MM3, MM6 )		/*                 | x2*m22+m32      */
    182     PFADD     ( MM4, MM5 )		/* x1*m11+x2*m21   | x0*m00+x2*m20   */
    183 
    184     MOVQ      ( MM5, REGOFF(-16, EDX) )	/* write r0, r1                      */
    185     MOVD      ( MM6, REGOFF(-8, EDX) )	/* write r2                          */
    186 
    187     MOVD      ( MM7, REGOFF(-4, EDX) )	/* write r3                          */
    188 
    189     DEC_L     ( ESI )			/* decrement vertex counter          */
    190     JNZ       ( LLBL( G3TPPR_1 ) )	/* cnt > 0 ? -> process next vertex  */
    191 
    192 LLBL( G3TPPR_2 ):
    193 
    194     FEMMS
    195     POP_L     ( EDI )
    196     POP_L     ( ESI )
    197     RET
    198 
    199 
    200 
    201 
    202 ALIGNTEXT16
    203 GLOBL GLNAME( _mesa_3dnow_transform_points3_3d )
    204 HIDDEN(_mesa_3dnow_transform_points3_3d)
    205 GLNAME( _mesa_3dnow_transform_points3_3d ):
    206 
    207     PUSH_L    ( ESI )
    208 
    209     MOV_L     ( ARG_DEST, ECX )
    210     MOV_L     ( ARG_MATRIX, ESI )
    211     MOV_L     ( ARG_SOURCE, EAX )
    212     MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
    213     OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
    214     MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
    215     MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
    216 
    217     PUSH_L    ( EDI )
    218 
    219     MOV_L     ( REGOFF(V4F_START, ECX), EDX )
    220     MOV_L     ( ESI, ECX )
    221     MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
    222     MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
    223     MOV_L     ( REGOFF(V4F_START, EAX), EAX )
    224 
    225     TEST_L    ( ESI, ESI )
    226     JZ        ( LLBL( G3TP3R_2 ) )
    227 
    228     PREFETCH  ( REGIND(EAX) )
    229     PREFETCH  ( REGIND(EDX) )
    230 
    231     MOVD      ( REGOFF(8, ECX), MM7 )	/*                 | m2              */
    232     PUNPCKLDQ ( REGOFF(24, ECX), MM7 )	/* m6              | m2              */
    233 
    234 
    235 ALIGNTEXT16
    236 LLBL( G3TP3R_1 ):
    237 
    238     PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
    239 
    240     MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
    241     MOVD      ( REGOFF(8, EAX), MM1 )	/*                 | x2              */
    242 
    243     ADD_L     ( EDI, EAX )		/* next vertex                       */
    244     PREFETCH  ( REGIND(EAX) )
    245 
    246     MOVQ      ( MM0, MM2 )		/* x1              | x0              */
    247     ADD_L     ( CONST(16), EDX )	/* next r                            */
    248 
    249     PUNPCKLDQ ( MM2, MM2 )		/* x0              | x0              */
    250     MOVQ      ( MM0, MM3 )		/* x1              | x0              */
    251 
    252     PFMUL     ( REGIND(ECX), MM2 )	/* x0*m1           | x0*m0           */
    253     PUNPCKHDQ ( MM3, MM3 )		/* x1              | x1              */
    254 
    255     MOVQ      ( MM1, MM4 )		/*                 | x2              */
    256     PFMUL     ( REGOFF(16, ECX), MM3 )	/* x1*m5           | x1*m4           */
    257 
    258     PUNPCKLDQ ( MM4, MM4 )		/* x2              | x2              */
    259     PFADD     ( MM2, MM3 )		/* x0*m1+x1*m5     | x0*m0+x1*m4     */
    260 
    261     PFMUL     ( REGOFF(32, ECX), MM4 )	/* x2*m9           | x2*m8           */
    262     PFADD     ( REGOFF(48, ECX), MM3 )	/* x0*m1+...+m11   | x0*m0+x1*m4+m12 */
    263 
    264     PFMUL     ( MM7, MM0 )		/* x1*m6           | x0*m2           */
    265     PFADD     ( MM4, MM3 )		/* r1              | r0              */
    266 
    267     PFMUL     ( REGOFF(40, ECX), MM1 )	/*                 | x2*m10          */
    268     PUNPCKLDQ ( REGOFF(56, ECX), MM1 )	/* m14             | x2*m10          */
    269 
    270     PFACC     ( MM0, MM1 )
    271 
    272     MOVQ      ( MM3, REGOFF(-16, EDX) )	/* write r0, r1                      */
    273     PFACC     ( MM1, MM1 )		/*                 | r2              */
    274 
    275     MOVD      ( MM1, REGOFF(-8, EDX) )	/* write r2                          */
    276 
    277     DEC_L     ( ESI )			/* decrement vertex counter          */
    278     JNZ       ( LLBL( G3TP3R_1 ) )	/* cnt > 0 ? -> process next vertex  */
    279 
    280 LLBL( G3TP3R_2 ):
    281 
    282     FEMMS
    283     POP_L     ( EDI )
    284     POP_L     ( ESI )
    285     RET
    286 
    287 
    288 
    289 
    290 ALIGNTEXT16
    291 GLOBL GLNAME( _mesa_3dnow_transform_points3_3d_no_rot )
    292 HIDDEN(_mesa_3dnow_transform_points3_3d_no_rot)
    293 GLNAME( _mesa_3dnow_transform_points3_3d_no_rot ):
    294 
    295     PUSH_L    ( ESI )
    296 
    297     MOV_L     ( ARG_DEST, ECX )
    298     MOV_L     ( ARG_MATRIX, ESI )
    299     MOV_L     ( ARG_SOURCE, EAX )
    300     MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
    301     OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
    302     MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
    303     MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
    304 
    305     PUSH_L    ( EDI )
    306 
    307     MOV_L     ( REGOFF(V4F_START, ECX), EDX )
    308     MOV_L     ( ESI, ECX )
    309     MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
    310     MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
    311     MOV_L     ( REGOFF(V4F_START, EAX), EAX )
    312 
    313     TEST_L    ( ESI, ESI )
    314     JZ        ( LLBL( G3TP3NRR_2 ) )
    315 
    316     PREFETCH  ( REGIND(EAX) )
    317     PREFETCHW ( REGIND(EDX) )
    318 
    319     MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
    320     PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
    321 
    322     MOVD      ( REGOFF(40, ECX), MM2 )	/*                 | m22             */
    323     PUNPCKLDQ ( MM2, MM2 )		/* m22             | m22             */
    324 
    325     MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
    326     MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
    327 
    328     PUNPCKLDQ ( MM3, MM3 )		/* m32             | m32             */
    329 
    330 
    331 ALIGNTEXT16
    332 LLBL( G3TP3NRR_1 ):
    333 
    334     PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
    335 
    336     MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
    337     MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
    338 
    339     ADD_L     ( EDI, EAX )		/* next vertex                       */
    340     PREFETCHW ( REGIND(EAX) )
    341 
    342     PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
    343 
    344     PFADD     ( MM1, MM4 )		/* x1*m11+m31      | x0*m00+m30      */
    345     PFMUL     ( MM2, MM5 )		/*                 | x2*m22          */
    346 
    347     PFADD     ( MM3, MM5 )		/*                 | x2*m22+m32      */
    348     MOVQ      ( MM4, REGIND(EDX) )	/* write r0, r1                      */
    349 
    350     ADD_L     ( CONST(16), EDX )	/* next r                            */
    351     DEC_L     ( ESI )			/* decrement vertex counter          */
    352 
    353     MOVD      ( MM5, REGOFF(-8, EDX) )	/* write r2                          */
    354     JNZ       ( LLBL( G3TP3NRR_1 ) )	/* cnt > 0 ? -> process next vertex  */
    355 
    356 LLBL( G3TP3NRR_2 ):
    357 
    358     FEMMS
    359     POP_L     ( EDI )
    360     POP_L     ( ESI )
    361     RET
    362 
    363 
    364 
    365 
    366 ALIGNTEXT16
    367 GLOBL GLNAME( _mesa_3dnow_transform_points3_2d )
    368 HIDDEN(_mesa_3dnow_transform_points3_2d)
    369 GLNAME( _mesa_3dnow_transform_points3_2d ):
    370 
    371     PUSH_L    ( ESI )
    372 
    373     MOV_L     ( ARG_DEST, ECX )
    374     MOV_L     ( ARG_MATRIX, ESI )
    375     MOV_L     ( ARG_SOURCE, EAX )
    376     MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
    377     OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
    378     MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
    379     MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
    380 
    381     PUSH_L    ( EDI )
    382 
    383     MOV_L     ( REGOFF(V4F_START, ECX), EDX )
    384     MOV_L     ( ESI, ECX )
    385     MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
    386     MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
    387     MOV_L     ( REGOFF(V4F_START, EAX), EAX )
    388 
    389     TEST_L    ( ESI, ESI )
    390     JZ        ( LLBL( G3TP2R_3) )
    391 
    392     PREFETCH  ( REGIND(EAX) )
    393     PREFETCHW ( REGIND(EDX) )
    394 
    395     MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
    396     PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
    397 
    398     MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
    399     PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
    400 
    401     MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
    402 
    403 ALIGNTEXT16
    404 LLBL( G3TP2R_2 ):
    405 
    406     PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
    407 
    408     MOVQ      ( REGIND(EAX), MM3 )	/* x1              | x0              */
    409     MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
    410 
    411     ADD_L     ( EDI, EAX )		/* next vertex                       */
    412     PREFETCH  ( REGIND(EAX) )
    413 
    414     MOVQ      ( MM3, MM4 )		/* x1              | x0              */
    415     PFMUL     ( MM0, MM3 )		/* x1*m10          | x0*m00          */
    416 
    417     ADD_L     ( CONST(16), EDX )	/* next r                            */
    418     PFMUL     ( MM1, MM4 )		/* x1*m11          | x0*m01          */
    419 
    420     PFACC     ( MM4, MM3 )		/* x0*m00+x1*m10   | x0*m01+x1*m11   */
    421     MOVD      ( MM5, REGOFF(-8, EDX) )	/* write r2 (=x2)                    */
    422 
    423     PFADD     ( MM2, MM3 )		/* x0*...*m10+m30  | x0*...*m11+m31  */
    424     MOVQ      ( MM3, REGOFF(-16, EDX) )	/* write r0, r1                      */
    425 
    426     DEC_L     ( ESI )			/* decrement vertex counter          */
    427     JNZ       ( LLBL( G3TP2R_2 ) )	/* cnt > 0 ? -> process next vertex  */
    428 
    429 LLBL( G3TP2R_3 ):
    430 
    431     FEMMS
    432     POP_L     ( EDI )
    433     POP_L     ( ESI )
    434     RET
    435 
    436 
    437 
    438 
    439 ALIGNTEXT16
    440 GLOBL GLNAME( _mesa_3dnow_transform_points3_2d_no_rot )
    441 HIDDEN(_mesa_3dnow_transform_points3_2d_no_rot)
    442 GLNAME( _mesa_3dnow_transform_points3_2d_no_rot ):
    443 
    444     PUSH_L    ( ESI )
    445 
    446     MOV_L     ( ARG_DEST, ECX )
    447     MOV_L     ( ARG_MATRIX, ESI )
    448     MOV_L     ( ARG_SOURCE, EAX )
    449     MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
    450     OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
    451     MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
    452     MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
    453 
    454     PUSH_L    ( EDI )
    455 
    456     MOV_L     ( REGOFF(V4F_START, ECX), EDX )
    457     MOV_L     ( ESI, ECX )
    458     MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
    459     MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
    460     MOV_L     ( REGOFF(V4F_START, EAX), EAX )
    461 
    462     TEST_L    ( ESI, ESI )
    463     JZ        ( LLBL( G3TP2NRR_2 ) )
    464 
    465     PREFETCH  ( REGIND(EAX) )
    466     PREFETCHW ( REGIND(EDX) )
    467 
    468     MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
    469     PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
    470 
    471     MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
    472 
    473 
    474 ALIGNTEXT16
    475 LLBL( G3TP2NRR_1 ):
    476 
    477     PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
    478 
    479     MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
    480     MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
    481 
    482     ADD_L     ( EDI, EAX )		/* next vertex                       */
    483     PREFETCH  ( REGIND(EAX) )
    484 
    485     PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
    486     ADD_L     ( CONST(16), EDX )	/* next r                            */
    487 
    488     PFADD     ( MM1, MM4 )		/* x1*m11+m31      | x0*m00+m30      */
    489 
    490     MOVQ      ( MM4, REGOFF(-16, EDX) )	/* write r0, r1                      */
    491     MOVD      ( MM5, REGOFF(-8, EDX) )	/* write r2 (=x2)                    */
    492 
    493     DEC_L     ( ESI )			/* decrement vertex counter          */
    494     JNZ       ( LLBL( G3TP2NRR_1 ) )	/* cnt > 0 ? -> process next vertex  */
    495 
    496 LLBL( G3TP2NRR_2 ):
    497 
    498     FEMMS
    499     POP_L     ( EDI )
    500     POP_L     ( ESI )
    501     RET
    502 
    503 
    504 
    505 
    506 ALIGNTEXT16
    507 GLOBL GLNAME( _mesa_3dnow_transform_points3_identity )
    508 HIDDEN(_mesa_3dnow_transform_points3_identity)
    509 GLNAME( _mesa_3dnow_transform_points3_identity ):
    510 
    511     PUSH_L    ( ESI )
    512 
    513     MOV_L     ( ARG_DEST, ECX )
    514     MOV_L     ( ARG_MATRIX, ESI )
    515     MOV_L     ( ARG_SOURCE, EAX )
    516     MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
    517     OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
    518     MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
    519     MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
    520 
    521     PUSH_L    ( EDI )
    522 
    523     MOV_L     ( REGOFF(V4F_START, ECX), EDX )
    524     MOV_L     ( ESI, ECX )
    525     MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
    526     MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
    527     MOV_L     ( REGOFF(V4F_START, EAX), EAX )
    528 
    529     TEST_L    ( ESI, ESI )
    530     JZ        ( LLBL( G3TPIR_2 ) )
    531 
    532     PREFETCHW ( REGIND(EDX) )
    533 
    534 ALIGNTEXT16
    535 LLBL( G3TPIR_1 ):
    536 
    537     PREFETCHW ( REGOFF(32, EDX) )
    538 
    539     MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
    540     MOVD      ( REGOFF(8, EAX), MM1 )	/*                 | x2              */
    541 
    542     ADD_L     ( EDI, EAX )		/* next vertex                       */
    543     ADD_L     ( CONST(16), EDX )	/* next r                            */
    544 
    545     DEC_L     ( ESI )			/* decrement vertex counter          */
    546     MOVQ      ( MM0, REGOFF(-16, EDX) )	/* r1              | r0              */
    547 
    548     MOVD      ( MM1, REGOFF(-8, EDX) )	/*                 | r2              */
    549     JNZ       ( LLBL( G3TPIR_1 ) )	/* cnt > 0 ? -> process next vertex  */
    550 
    551 LLBL( G3TPIR_2 ):
    552 
    553     FEMMS
    554     POP_L     ( EDI )
    555     POP_L     ( ESI )
    556     RET
    557 #endif
    558 
    559 #if defined (__ELF__) && defined (__linux__)
    560 	.section .note.GNU-stack,"",%progbits
    561 #endif
    562