Home | History | Annotate | Download | only in x86
      1 
      2 /*
      3  * Mesa 3-D graphics library
      4  *
      5  * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the "Software"),
      9  * to deal in the Software without restriction, including without limitation
     10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     11  * and/or sell copies of the Software, and to permit persons to whom the
     12  * Software is furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included
     15  * in all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     23  * OTHER DEALINGS IN THE SOFTWARE.
     24  */
     25 
     26 #ifdef USE_3DNOW_ASM
     27 #include "assyntax.h"
     28 #include "matypes.h"
     29 #include "xform_args.h"
     30 
     31     SEG_TEXT
     32 
     33 #define FRAME_OFFSET	4
     34 
     35 
     36 ALIGNTEXT16
     37 GLOBL GLNAME( _mesa_3dnow_transform_points4_general )
     38 HIDDEN(_mesa_3dnow_transform_points4_general)
     39 GLNAME( _mesa_3dnow_transform_points4_general ):
     40 
     41     PUSH_L    ( ESI )
     42 
     43     MOV_L     ( ARG_DEST, ECX )
     44     MOV_L     ( ARG_MATRIX, ESI )
     45     MOV_L     ( ARG_SOURCE, EAX )
     46     MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
     47     OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
     48     MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
     49     MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
     50 
     51     PUSH_L    ( EDI )
     52 
     53     MOV_L     ( REGOFF(V4F_START, ECX), EDX )
     54     MOV_L     ( ESI, ECX )
     55     MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
     56     MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
     57     MOV_L     ( REGOFF(V4F_START, EAX), EAX )
     58 
     59     TEST_L    ( ESI, ESI )
     60     JZ        ( LLBL( G3TPGR_2 ) )
     61 
     62     PREFETCHW ( REGIND(EDX) )
     63 
     64 ALIGNTEXT16
     65 LLBL( G3TPGR_1 ):
     66 
     67     PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
     68 
     69     MOVQ      ( REGIND(EAX), MM0 )	/* x1            | x0                */
     70     MOVQ      ( REGOFF(8, EAX), MM4 )	/* x3            | x2                */
     71 
     72     ADD_L     ( EDI, EAX )		/* next vertex                       */
     73     PREFETCH  ( REGIND(EAX) )
     74 
     75     MOVQ      ( MM0, MM2 )		/* x1              | x0              */
     76     MOVQ      ( MM4, MM6 )		/* x3              | x2              */
     77 
     78     PUNPCKLDQ ( MM0, MM0 )		/* x0              | x0              */
     79     PUNPCKHDQ ( MM2, MM2 )		/* x1              | x1              */
     80 
     81     MOVQ      ( MM0, MM1 )		/* x0              | x0              */
     82     ADD_L     ( CONST(16), EDX )	/* next r                            */
     83 
     84     PFMUL     ( REGIND(ECX), MM0 )	/* x0*m1           | x0*m0           */
     85     MOVQ      ( MM2, MM3 )		/* x1              | x1              */
     86 
     87     PFMUL     ( REGOFF(8, ECX), MM1 )	/* x0*m3           | x0*m2           */
     88     PUNPCKLDQ ( MM4, MM4 )		/* x2              | x2              */
     89 
     90     PFMUL     ( REGOFF(16, ECX), MM2 )	/* x1*m5           | x1*m4           */
     91     MOVQ      ( MM4, MM5 )		/* x2              | x2              */
     92 
     93     PFMUL     ( REGOFF(24, ECX), MM3 )	/* x1*m7           | x1*m6           */
     94     PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
     95 
     96     PFMUL     ( REGOFF(32, ECX), MM4 )	/* x2*m9           | x2*m8           */
     97     MOVQ      ( MM6, MM7 )		/* x3              | x3              */
     98 
     99     PFMUL     ( REGOFF(40, ECX), MM5 )	/* x2*m11          | x2*m10          */
    100     PFADD     ( MM0, MM2 )
    101 
    102     PFMUL     ( REGOFF(48, ECX), MM6 )	/* x3*m13          | x3*m12          */
    103     PFADD     ( MM1, MM3 )
    104 
    105     PFMUL     ( REGOFF(56, ECX), MM7 )	/* x3*m15          | x3*m14          */
    106     PFADD     ( MM4, MM6 )
    107 
    108     PFADD     ( MM5, MM7 )
    109     PFADD     ( MM2, MM6 )
    110 
    111     PFADD     ( MM3, MM7 )
    112     MOVQ      ( MM6, REGOFF(-16, EDX) )
    113 
    114     MOVQ      ( MM7, REGOFF(-8, EDX) )
    115 
    116     DEC_L     ( ESI )			/* decrement vertex counter          */
    117     JNZ       ( LLBL( G3TPGR_1 ) )	/* cnt > 0 ? -> process next vertex  */
    118 
    119 LLBL( G3TPGR_2 ):
    120 
    121     FEMMS
    122     POP_L     ( EDI )
    123     POP_L     ( ESI )
    124     RET
    125 
    126 
    127 
    128 
    129 ALIGNTEXT16
    130 GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective )
    131 HIDDEN(_mesa_3dnow_transform_points4_perspective)
    132 GLNAME( _mesa_3dnow_transform_points4_perspective ):
    133 
    134     PUSH_L    ( ESI )
    135 
    136     MOV_L     ( ARG_DEST, ECX )
    137     MOV_L     ( ARG_MATRIX, ESI )
    138     MOV_L     ( ARG_SOURCE, EAX )
    139     MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
    140     OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
    141     MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
    142     MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
    143 
    144     PUSH_L    ( EDI )
    145 
    146     MOV_L     ( REGOFF(V4F_START, ECX), EDX )
    147     MOV_L     ( ESI, ECX )
    148     MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
    149     MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
    150     MOV_L     ( REGOFF(V4F_START, EAX), EAX )
    151 
    152     TEST_L    ( ESI, ESI )
    153     JZ        ( LLBL( G3TPPR_2 ) )
    154 
    155     PREFETCH  ( REGIND(EAX) )
    156     PREFETCHW ( REGIND(EDX) )
    157 
    158     MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
    159     PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
    160 
    161     MOVD      ( REGOFF(40, ECX), MM1 )	/*                 | m22             */
    162     PUNPCKLDQ ( REGOFF(56, ECX), MM1 )	/* m32             | m22             */
    163 
    164     MOVQ      ( REGOFF(32, ECX), MM2 )	/* m21             | m20             */
    165     PXOR      ( MM7, MM7 )		/* 0               | 0               */
    166 
    167 ALIGNTEXT16
    168 LLBL( G3TPPR_1 ):
    169 
    170     PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
    171 
    172     MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
    173     MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
    174     MOVD      ( REGOFF(8, EAX), MM3 )	/*                 | x2              */
    175 
    176     ADD_L     ( EDI, EAX )		/* next vertex                       */
    177     PREFETCH  ( REGOFF(32, EAX) )	/* hopefully stride is zero          */
    178 
    179     MOVQ      ( MM5, MM6 )		/* x3              | x2              */
    180     PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
    181 
    182     PUNPCKLDQ ( MM5, MM5 )		/* x2              | x2              */
    183     ADD_L     ( CONST(16), EDX )	/* next r                            */
    184 
    185     PFMUL     ( MM2, MM5 )		/* x2*m21          | x2*m20          */
    186     PFSUBR    ( MM7, MM3 )		/*                 | -x2             */
    187 
    188     PFMUL     ( MM1, MM6 )		/* x3*m32          | x2*m22          */
    189     PFADD     ( MM4, MM5 )		/* x1*m11+x2*m21   | x0*m00+x2*m20   */
    190 
    191     PFACC     ( MM3, MM6 )		/* -x2             | x2*m22+x3*m32   */
    192     MOVQ      ( MM5, REGOFF(-16, EDX) )	/* write r0, r1                      */
    193 
    194     MOVQ      ( MM6, REGOFF(-8, EDX) )	/* write r2, r3                      */
    195     DEC_L     ( ESI )			/* decrement vertex counter          */
    196 
    197     JNZ       ( LLBL( G3TPPR_1 ) )	/* cnt > 0 ? -> process next vertex  */
    198 
    199 LLBL( G3TPPR_2 ):
    200 
    201     FEMMS
    202     POP_L     ( EDI )
    203     POP_L     ( ESI )
    204     RET
    205 
    206 
    207 
    208 
    209 ALIGNTEXT16
    210 GLOBL GLNAME( _mesa_3dnow_transform_points4_3d )
    211 HIDDEN(_mesa_3dnow_transform_points4_3d)
    212 GLNAME( _mesa_3dnow_transform_points4_3d ):
    213 
    214     PUSH_L    ( ESI )
    215 
    216     MOV_L     ( ARG_DEST, ECX )
    217     MOV_L     ( ARG_MATRIX, ESI )
    218     MOV_L     ( ARG_SOURCE, EAX )
    219     MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
    220     OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
    221     MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
    222     MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
    223 
    224     PUSH_L    ( EDI )
    225 
    226     MOV_L     ( REGOFF(V4F_START, ECX), EDX )
    227     MOV_L     ( ESI, ECX )
    228     MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
    229     MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
    230     MOV_L     ( REGOFF(V4F_START, EAX), EAX )
    231 
    232     TEST_L    ( ESI, ESI )
    233     JZ        ( LLBL( G3TP3R_2 ) )
    234 
    235     MOVD      ( REGOFF(8, ECX), MM6 )	/*                 | m2              */
    236     PUNPCKLDQ ( REGOFF(24, ECX), MM6 )	/* m6              | m2              */
    237 
    238     MOVD      ( REGOFF(40, ECX), MM7 )	/*                 | m10             */
    239     PUNPCKLDQ ( REGOFF(56, ECX), MM7 )	/* m14             | m10             */
    240 
    241 ALIGNTEXT16
    242 LLBL( G3TP3R_1 ):
    243 
    244     PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
    245     PREFETCH  ( REGOFF(32, EAX) )	/* hopefully array is tightly packed */
    246 
    247     MOVQ      ( REGIND(EAX), MM2 )	/* x1              | x0              */
    248     MOVQ      ( REGOFF(8, EAX), MM3 )	/* x3              | x2              */
    249 
    250     MOVQ      ( MM2, MM0 )		/* x1              | x0              */
    251     MOVQ      ( MM3, MM4 )		/* x3              | x2              */
    252 
    253     MOVQ      ( MM0, MM1 )		/* x1              | x0              */
    254     MOVQ      ( MM4, MM5 )		/* x3              | x2              */
    255 
    256     PUNPCKLDQ ( MM0, MM0 )		/* x0              | x0              */
    257     PUNPCKHDQ ( MM1, MM1 )		/* x1              | x1              */
    258 
    259     PFMUL     ( REGIND(ECX), MM0 )	/* x0*m1           | x0*m0           */
    260     PUNPCKLDQ ( MM3, MM3 )		/* x2              | x2              */
    261 
    262     PFMUL     ( REGOFF(16, ECX), MM1 )	/* x1*m5           | x1*m4           */
    263     PUNPCKHDQ ( MM4, MM4 )		/* x3              | x3              */
    264 
    265     PFMUL     ( MM6, MM2 )		/* x1*m6           | x0*m2           */
    266     PFADD     ( MM0, MM1 )		/* x0*m1+x1*m5     | x0*m0+x1*m4     */
    267 
    268     PFMUL     ( REGOFF(32, ECX), MM3 )	/* x2*m9           | x2*m8           */
    269     ADD_L     ( CONST(16), EDX )	/* next r                            */
    270 
    271     PFMUL     ( REGOFF(48, ECX), MM4 )	/* x3*m13          | x3*m12          */
    272     PFADD     ( MM1, MM3 )		/* x0*m1+..+x2*m9  | x0*m0+...+x2*m8 */
    273 
    274     PFMUL     ( MM7, MM5 )		/* x3*m14          | x2*m10          */
    275     PFADD     ( MM3, MM4 )		/* r1              | r0              */
    276 
    277     PFACC     ( MM2, MM5 )		/* x0*m2+x1*m6     | x2*m10+x3*m14   */
    278     MOVD      ( REGOFF(12, EAX), MM0 )	/*                 | x3              */
    279 
    280     ADD_L     ( EDI, EAX )		/* next vertex                       */
    281     PFACC     ( MM0, MM5 )		/* r3              | r2              */
    282 
    283     MOVQ      ( MM4, REGOFF(-16, EDX) )	/* write r0, r1                      */
    284     MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
    285 
    286     DEC_L     ( ESI )			/* decrement vertex counter          */
    287     JNZ       ( LLBL( G3TP3R_1 ) )	/* cnt > 0 ? -> process next vertex  */
    288 
    289 LLBL( G3TP3R_2 ):
    290 
    291     FEMMS
    292     POP_L     ( EDI )
    293     POP_L     ( ESI )
    294     RET
    295 
    296 
    297 
    298 
    299 ALIGNTEXT16
    300 GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot )
    301 HIDDEN(_mesa_3dnow_transform_points4_3d_no_rot)
    302 GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ):
    303 
    304     PUSH_L    ( ESI )
    305     MOV_L     ( ARG_DEST, ECX )
    306     MOV_L     ( ARG_MATRIX, ESI )
    307     MOV_L     ( ARG_SOURCE, EAX )
    308     MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
    309     OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
    310     MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
    311     MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
    312 
    313     PUSH_L    ( EDI )
    314 
    315     MOV_L     ( REGOFF(V4F_START, ECX), EDX )
    316     MOV_L     ( ESI, ECX )
    317     MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
    318     MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
    319     MOV_L     ( REGOFF(V4F_START, EAX), EAX )
    320 
    321     TEST_L    ( ESI, ESI )
    322     JZ        ( LLBL( G3TP3NRR_2 ) )
    323 
    324     MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
    325     PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
    326 
    327     MOVD      ( REGOFF(40, ECX), MM2 )	/*                 | m22             */
    328     PUNPCKLDQ ( REGOFF(56, ECX), MM2 )	/* m32             | m22             */
    329 
    330     MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
    331 
    332 ALIGNTEXT16
    333 LLBL( G3TP3NRR_1 ):
    334 
    335     PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
    336 
    337     MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
    338     MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
    339     MOVD      ( REGOFF(12, EAX), MM7 )	/*                 | x3              */
    340 
    341     ADD_L     ( EDI, EAX )		/* next vertex                       */
    342     PREFETCH  ( REGOFF(32, EAX) )	/* hopefully stride is zero          */
    343 
    344     MOVQ      ( MM5, MM6 )		/* x3              | x2              */
    345     PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
    346 
    347     PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
    348     PFMUL     ( MM2, MM5 )		/* x3*m32          | x2*m22          */
    349 
    350     PFMUL     ( MM1, MM6 )		/* x3*m31          | x3*m30          */
    351     PFACC     ( MM7, MM5 )		/* x3              | x2*m22+x3*m32   */
    352 
    353     PFADD     ( MM6, MM4 )		/* x1*m11+x3*m31   | x0*m00+x3*m30   */
    354     ADD_L     ( CONST(16), EDX )	/* next r                            */
    355 
    356     MOVQ      ( MM4, REGOFF(-16, EDX) )	/* write r0, r1                      */
    357     MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
    358 
    359     DEC_L     ( ESI )			/* decrement vertex counter          */
    360     JNZ       ( LLBL( G3TP3NRR_1 ) )	/* cnt > 0 ? -> process next vertex  */
    361 
    362 LLBL( G3TP3NRR_2 ):
    363 
    364     FEMMS
    365     POP_L     ( EDI )
    366     POP_L     ( ESI )
    367     RET
    368 
    369 
    370 
    371 
    372 ALIGNTEXT16
    373 GLOBL GLNAME( _mesa_3dnow_transform_points4_2d )
    374 HIDDEN(_mesa_3dnow_transform_points4_2d)
    375 GLNAME( _mesa_3dnow_transform_points4_2d ):
    376 
    377     PUSH_L    ( ESI )
    378 
    379     MOV_L     ( ARG_DEST, ECX )
    380     MOV_L     ( ARG_MATRIX, ESI )
    381     MOV_L     ( ARG_SOURCE, EAX )
    382     MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
    383     OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
    384     MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
    385     MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
    386 
    387     PUSH_L    ( EDI )
    388 
    389     MOV_L     ( REGOFF(V4F_START, ECX), EDX )
    390     MOV_L     ( ESI, ECX )
    391     MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
    392     MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
    393     MOV_L     ( REGOFF(V4F_START, EAX), EAX )
    394 
    395     TEST_L    ( ESI, ESI )
    396     JZ        ( LLBL( G3TP2R_2 ) )
    397 
    398     MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
    399     PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
    400 
    401     MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
    402     PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
    403 
    404     MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
    405 
    406 ALIGNTEXT16
    407 LLBL( G3TP2R_1 ):
    408 
    409     PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
    410 
    411     MOVQ      ( REGIND(EAX), MM3 )	/* x1              | x0              */
    412     MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
    413 
    414     ADD_L     ( EDI, EAX )		/* next vertex                       */
    415     PREFETCH  ( REGIND(EAX) )
    416 
    417     MOVQ      ( MM3, MM4 )		/* x1              | x0              */
    418     MOVQ      ( MM5, MM6 )		/* x3              | x2              */
    419 
    420     PFMUL     ( MM1, MM4 )		/* x1*m11          | x0*m01          */
    421     PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
    422 
    423     PFMUL     ( MM0, MM3 )		/* x1*m10          | x0*m00          */
    424     ADD_L     ( CONST(16), EDX )	/* next r                            */
    425 
    426     PFACC     ( MM4, MM3 )		/* x0*m01+x1*m11   | x0*m00+x1*m10   */
    427     PFMUL     ( MM2, MM6 )		/* x3*m31          | x3*m30          */
    428 
    429     PFADD     ( MM6, MM3 )		/* r1              | r0              */
    430     MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
    431 
    432     MOVQ      ( MM3, REGOFF(-16, EDX) )	/* write r0, r1                      */
    433 
    434     DEC_L     ( ESI )			/* decrement vertex counter          */
    435     JNZ       ( LLBL( G3TP2R_1 ) )	/* cnt > 0 ? -> process next vertex  */
    436 
    437 LLBL( G3TP2R_2 ):
    438 
    439     FEMMS
    440     POP_L     ( EDI )
    441     POP_L     ( ESI )
    442     RET
    443 
    444 
    445 
    446 
    447 ALIGNTEXT16
    448 GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot )
    449 HIDDEN(_mesa_3dnow_transform_points4_2d_no_rot)
    450 GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ):
    451 
    452     PUSH_L    ( ESI )
    453 
    454     MOV_L     ( ARG_DEST, ECX )
    455     MOV_L     ( ARG_MATRIX, ESI )
    456     MOV_L     ( ARG_SOURCE, EAX )
    457     MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
    458     OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
    459     MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
    460     MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
    461 
    462     PUSH_L    ( EDI )
    463 
    464     MOV_L     ( REGOFF(V4F_START, ECX), EDX )
    465     MOV_L     ( ESI, ECX )
    466     MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
    467     MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
    468     MOV_L     ( REGOFF(V4F_START, EAX), EAX )
    469 
    470     TEST_L    ( ESI, ESI )
    471     JZ        ( LLBL( G3TP2NRR_3 ) )
    472 
    473     MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
    474     PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
    475 
    476     MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
    477 
    478 ALIGNTEXT16
    479 LLBL( G3TP2NRR_2 ):
    480 
    481     PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
    482 
    483     MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
    484     MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
    485 
    486     ADD_L     ( EDI, EAX )		/* next vertex                       */
    487     PREFETCH  ( REGIND(EAX) )
    488 
    489     PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
    490     MOVQ      ( MM5, MM6 )		/* x3              | x2              */
    491 
    492     ADD_L     ( CONST(16), EDX )	/* next r                            */
    493     PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
    494 
    495     PFMUL     ( MM1, MM6 )		/* x3*m31          | x3*m30          */
    496     PFADD     ( MM4, MM6 )		/* x1*m11+x3*m31   | x0*m00+x3*m30   */
    497 
    498     MOVQ      ( MM6, REGOFF(-16, EDX) )	/* write r0, r1                      */
    499     MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
    500 
    501     DEC_L     ( ESI )			/* decrement vertex counter          */
    502 
    503     JNZ       ( LLBL( G3TP2NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
    504 
    505 LLBL( G3TP2NRR_3 ):
    506 
    507     FEMMS
    508     POP_L     ( EDI )
    509     POP_L     ( ESI )
    510     RET
    511 
    512 
    513 
    514 
    515 ALIGNTEXT16
    516 GLOBL GLNAME( _mesa_3dnow_transform_points4_identity )
    517 HIDDEN(_mesa_3dnow_transform_points4_identity)
    518 GLNAME( _mesa_3dnow_transform_points4_identity ):
    519 
    520     PUSH_L    ( ESI )
    521 
    522     MOV_L     ( ARG_DEST, ECX )
    523     MOV_L     ( ARG_MATRIX, ESI )
    524     MOV_L     ( ARG_SOURCE, EAX )
    525     MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
    526     OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
    527     MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
    528     MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
    529 
    530     PUSH_L    ( EDI )
    531 
    532     MOV_L     ( REGOFF(V4F_START, ECX), EDX )
    533     MOV_L     ( ESI, ECX )
    534     MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
    535     MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
    536     MOV_L     ( REGOFF(V4F_START, EAX), EAX )
    537 
    538     TEST_L    ( ESI, ESI )
    539     JZ        ( LLBL( G3TPIR_2 ) )
    540 
    541 ALIGNTEXT16
    542 LLBL( G3TPIR_1 ):
    543 
    544     PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
    545 
    546     MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
    547     MOVQ      ( REGOFF(8, EAX), MM1 )	/* x3              | x2              */
    548 
    549     ADD_L     ( EDI, EAX )		/* next vertex                       */
    550     PREFETCH  ( REGIND(EAX) )
    551 
    552     ADD_L     ( CONST(16), EDX )	/* next r                            */
    553     MOVQ      ( MM0, REGOFF(-16, EDX) )	/* r1              | r0              */
    554 
    555     MOVQ      ( MM1, REGOFF(-8, EDX) )	/* r3              | r2              */
    556 
    557     DEC_L     ( ESI )			/* decrement vertex counter          */
    558     JNZ       ( LLBL( G3TPIR_1 ) )	/* cnt > 0 ? -> process next vertex  */
    559 
    560 LLBL( G3TPIR_2 ):
    561 
    562     FEMMS
    563     POP_L     ( EDI )
    564     POP_L     ( ESI )
    565     RET
    566 #endif
    567 
    568 #if defined (__ELF__) && defined (__linux__)
    569 	.section .note.GNU-stack,"",%progbits
    570 #endif
    571