1 2 /* 3 * Mesa 3-D graphics library 4 * Version: 3.5 5 * 6 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included 16 * in all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26 /** TODO: 27 * - insert PREFETCH instructions to avoid cache-misses ! 28 * - some more optimizations are possible... 29 * - for 40-50% more performance in the SSE-functions, the 30 * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! 31 */ 32 33 #ifdef USE_SSE_ASM 34 #include "assyntax.h" 35 #include "matypes.h" 36 #include "xform_args.h" 37 38 SEG_TEXT 39 40 #define S(i) REGOFF(i * 4, ESI) 41 #define D(i) REGOFF(i * 4, EDI) 42 #define M(i) REGOFF(i * 4, EDX) 43 44 45 ALIGNTEXT4 46 GLOBL GLNAME(_mesa_sse_transform_points1_general) 47 HIDDEN( _mesa_sse_transform_points1_general ) 48 GLNAME( _mesa_sse_transform_points1_general ): 49 50 #define FRAME_OFFSET 8 51 PUSH_L ( ESI ) 52 PUSH_L ( EDI ) 53 54 MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ 55 MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ 56 57 MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ 58 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ 59 60 CMP_L( CONST(0), ECX ) /* count == 0 ? */ 61 JE( LLBL(K_GTP1GR_finish) ) /* yes -> nothing to do. */ 62 63 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ 64 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ 65 66 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ 67 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ 68 69 SHL_L( CONST(4), ECX ) /* count *= 16 */ 70 MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ 71 72 MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ 73 ADD_L( EDI, ECX ) /* count += dest ptr */ 74 75 76 ALIGNTEXT32 77 MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ 78 MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ 79 80 ALIGNTEXT32 81 LLBL(K_GTP1GR_top): 82 MOVSS( S(0), XMM2 ) /* ox */ 83 SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ 84 MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ 85 ADDPS( XMM1, XMM2 ) /* + | + | + | + */ 86 MOVUPS( XMM2, D(0) ) 87 88 LLBL(K_GTP1GR_skip): 89 ADD_L ( CONST(16), EDI ) 90 ADD_L ( EAX, ESI ) 91 CMP_L ( ECX, EDI ) 92 JNE ( LLBL(K_GTP1GR_top) ) 93 94 LLBL(K_GTP1GR_finish): 95 POP_L ( EDI ) 96 POP_L ( ESI ) 97 RET 98 #undef FRAME_OFFSET 99 100 101 102 ALIGNTEXT4 103 GLOBL GLNAME(_mesa_sse_transform_points1_identity) 104 HIDDEN(_mesa_sse_transform_points1_identity) 105 GLNAME( _mesa_sse_transform_points1_identity ): 106 107 #define FRAME_OFFSET 8 108 PUSH_L ( ESI ) 109 PUSH_L ( EDI ) 110 111 MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ 112 MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ 113 114 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ 115 116 TEST_L( ECX, ECX) 117 JZ( LLBL(K_GTP1IR_finish) ) /* count was zero; go to finish */ 118 119 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ 120 OR_L( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ 121 122 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ 123 MOV_L( CONST(1), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ 124 125 SHL_L( CONST(4), ECX ) /* count *= 16 */ 126 MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ 127 128 MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ 129 ADD_L( EDI, ECX ) /* count += dest ptr */ 130 131 CMP_L( ESI, EDI ) 132 JE( LLBL(K_GTP1IR_finish) ) 133 134 135 ALIGNTEXT32 136 LLBL(K_GTP1IR_top): 137 MOV_L( S(0), EDX ) 138 MOV_L( EDX, D(0) ) 139 140 LLBL(K_GTP1IR_skip): 141 ADD_L ( CONST(16), EDI ) 142 ADD_L ( EAX, ESI ) 143 CMP_L ( ECX, EDI ) 144 JNE ( LLBL(K_GTP1IR_top) ) 145 146 LLBL(K_GTP1IR_finish): 147 POP_L ( EDI ) 148 POP_L ( ESI ) 149 RET 150 #undef FRAME_OFFSET 151 152 153 154 ALIGNTEXT4 155 GLOBL GLNAME(_mesa_sse_transform_points1_3d_no_rot) 156 HIDDEN(_mesa_sse_transform_points1_3d_no_rot) 157 GLNAME(_mesa_sse_transform_points1_3d_no_rot): 158 159 #define FRAME_OFFSET 8 160 PUSH_L( ESI ) 161 PUSH_L( EDI ) 162 163 MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ 164 MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ 165 166 MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ 167 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ 168 169 TEST_L( ECX, ECX) 170 JZ( LLBL(K_GTP13DNRR_finish) ) /* count was zero; go to finish */ 171 172 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ 173 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ 174 175 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ 176 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ 177 178 SHL_L( CONST(4), ECX ) /* count *= 16 */ 179 MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ 180 181 MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ 182 ADD_L( EDI, ECX ) /* count += dest ptr */ 183 184 185 ALIGNTEXT32 186 MOVSS( M(0), XMM0 ) /* m0 */ 187 MOVSS( M(12), XMM1 ) /* m12 */ 188 MOVSS( M(13), XMM2 ) /* m13 */ 189 MOVSS( M(14), XMM3 ) /* m14 */ 190 191 ALIGNTEXT32 192 LLBL(K_GTP13DNRR_top): 193 MOVSS( S(0), XMM4 ) /* ox */ 194 MULSS( XMM0, XMM4 ) /* ox*m0 */ 195 ADDSS( XMM1, XMM4 ) /* ox*m0+m12 */ 196 MOVSS( XMM4, D(0) ) 197 198 MOVSS( XMM2, D(1) ) 199 MOVSS( XMM3, D(2) ) 200 201 LLBL(K_GTP13DNRR_skip): 202 ADD_L ( CONST(16), EDI ) 203 ADD_L ( EAX, ESI ) 204 CMP_L ( ECX, EDI ) 205 JNE ( LLBL(K_GTP13DNRR_top) ) 206 207 LLBL(K_GTP13DNRR_finish): 208 POP_L ( EDI ) 209 POP_L ( ESI ) 210 RET 211 #undef FRAME_OFFSET 212 213 214 215 ALIGNTEXT4 216 GLOBL GLNAME(_mesa_sse_transform_points1_perspective) 217 HIDDEN(_mesa_sse_transform_points1_perspective) 218 GLNAME(_mesa_sse_transform_points1_perspective): 219 220 #define FRAME_OFFSET 8 221 PUSH_L ( ESI ) 222 PUSH_L ( EDI ) 223 224 MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ 225 MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ 226 227 MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ 228 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ 229 230 TEST_L( ECX, ECX) 231 JZ( LLBL(K_GTP13PR_finish) ) /* count was zero; go to finish */ 232 233 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ 234 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ 235 236 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ 237 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ 238 239 SHL_L( CONST(4), ECX ) /* count *= 16 */ 240 MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ 241 242 MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ 243 ADD_L( EDI, ECX ) /* count += dest ptr */ 244 245 246 ALIGNTEXT32 247 XORPS( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */ 248 MOVSS( M(0), XMM1 ) /* m0 */ 249 MOVSS( M(14), XMM2 ) /* m14 */ 250 251 ALIGNTEXT32 252 LLBL(K_GTP13PR_top): 253 MOVSS( S(0), XMM3 ) /* ox */ 254 MULSS( XMM1, XMM3 ) /* ox*m0 */ 255 MOVSS( XMM3, D(0) ) /* ox*m0->D(0) */ 256 MOVSS( XMM2, D(2) ) /* m14->D(2) */ 257 258 MOVSS( XMM0, D(1) ) 259 MOVSS( XMM0, D(3) ) 260 261 LLBL(K_GTP13PR_skip): 262 ADD_L( CONST(16), EDI ) 263 ADD_L( EAX, ESI ) 264 CMP_L( ECX, EDI ) 265 JNE( LLBL(K_GTP13PR_top) ) 266 267 LLBL(K_GTP13PR_finish): 268 POP_L ( EDI ) 269 POP_L ( ESI ) 270 RET 271 #undef FRAME_OFFSET 272 273 274 ALIGNTEXT4 275 GLOBL GLNAME(_mesa_sse_transform_points1_2d) 276 HIDDEN(_mesa_sse_transform_points1_2d) 277 GLNAME(_mesa_sse_transform_points1_2d): 278 279 #define FRAME_OFFSET 8 280 PUSH_L( ESI ) 281 PUSH_L( EDI ) 282 283 MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ 284 MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ 285 286 MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ 287 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ 288 289 TEST_L( ECX, ECX) 290 JZ( LLBL(K_GTP13P2DR_finish) ) /* count was zero; go to finish */ 291 292 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ 293 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ 294 295 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ 296 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ 297 298 SHL_L( CONST(4), ECX ) /* count *= 16 */ 299 MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ 300 301 MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ 302 ADD_L( EDI, ECX ) /* count += dest ptr */ 303 304 ALIGNTEXT32 305 MOVLPS( M(0), XMM0 ) /* m1 | m0 */ 306 MOVLPS( M(12), XMM1 ) /* m13 | m12 */ 307 308 ALIGNTEXT32 309 LLBL(K_GTP13P2DR_top): 310 MOVSS( S(0), XMM2 ) /* ox */ 311 SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ 312 MULPS( XMM0, XMM2 ) /* - | - | ox*m1 | ox*m0 */ 313 ADDPS( XMM1, XMM2 ) /* - | - | ox*m1+m13 | ox*m0+m12 */ 314 MOVLPS( XMM2, D(0) ) 315 316 LLBL(K_GTP13P2DR_skip): 317 ADD_L ( CONST(16), EDI ) 318 ADD_L ( EAX, ESI ) 319 CMP_L ( ECX, EDI ) 320 JNE ( LLBL(K_GTP13P2DR_top) ) 321 322 LLBL(K_GTP13P2DR_finish): 323 POP_L ( EDI ) 324 POP_L ( ESI ) 325 RET 326 #undef FRAME_OFFSET 327 328 329 ALIGNTEXT4 330 GLOBL GLNAME(_mesa_sse_transform_points1_2d_no_rot) 331 HIDDEN(_mesa_sse_transform_points1_2d_no_rot) 332 GLNAME(_mesa_sse_transform_points1_2d_no_rot): 333 334 #define FRAME_OFFSET 8 335 PUSH_L( ESI ) 336 PUSH_L( EDI ) 337 338 MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ 339 MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ 340 341 MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ 342 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ 343 344 TEST_L( ECX, ECX) 345 JZ( LLBL(K_GTP13P2DNRR_finish) ) /* count was zero; go to finish */ 346 347 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ 348 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ 349 350 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ 351 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ 352 353 SHL_L( CONST(4), ECX ) /* count *= 16 */ 354 MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ 355 356 MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ 357 ADD_L( EDI, ECX ) /* count += dest ptr */ 358 359 ALIGNTEXT32 360 MOVSS( M(0), XMM0 ) /* m0 */ 361 MOVSS( M(12), XMM1 ) /* m12 */ 362 MOVSS( M(13), XMM2 ) /* m13 */ 363 364 ALIGNTEXT32 365 LLBL(K_GTP13P2DNRR_top): 366 MOVSS( S(0), XMM3 ) /* ox */ 367 MULSS( XMM0, XMM3 ) /* ox*m0 */ 368 ADDSS( XMM1, XMM3 ) /* ox*m0+m12 */ 369 MOVSS( XMM3, D(0) ) 370 MOVSS( XMM2, D(1) ) 371 372 LLBL(K_GTP13P2DNRR_skip): 373 ADD_L( CONST(16), EDI ) 374 ADD_L( EAX, ESI ) 375 CMP_L( ECX, EDI ) 376 JNE( LLBL(K_GTP13P2DNRR_top) ) 377 378 LLBL(K_GTP13P2DNRR_finish): 379 POP_L( EDI ) 380 POP_L( ESI ) 381 RET 382 #undef FRAME_OFFSET 383 384 385 386 ALIGNTEXT4 387 GLOBL GLNAME(_mesa_sse_transform_points1_3d) 388 HIDDEN(_mesa_sse_transform_points1_3d) 389 GLNAME(_mesa_sse_transform_points1_3d): 390 391 #define FRAME_OFFSET 8 392 PUSH_L( ESI ) 393 PUSH_L( EDI ) 394 395 MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ 396 MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ 397 398 MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ 399 MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ 400 401 TEST_L( ECX, ECX) 402 JZ( LLBL(K_GTP13P3DR_finish) ) /* count was zero; go to finish */ 403 404 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ 405 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ 406 407 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ 408 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ 409 410 SHL_L( CONST(4), ECX ) /* count *= 16 */ 411 MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ 412 413 MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ 414 ADD_L( EDI, ECX ) /* count += dest ptr */ 415 416 417 ALIGNTEXT32 418 MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ 419 MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ 420 421 ALIGNTEXT32 422 LLBL(K_GTP13P3DR_top): 423 MOVSS( S(0), XMM2 ) /* ox */ 424 SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ 425 MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ 426 ADDPS( XMM1, XMM2 ) /* +m15 | +m14 | +m13 | +m12 */ 427 MOVLPS( XMM2, D(0) ) /* - | - | ->D(1)| ->D(0)*/ 428 UNPCKHPS( XMM2, XMM2 ) /* ox*m3+m15 | ox*m3+m15 | ox*m2+m14 | ox*m2+m14 */ 429 MOVSS( XMM2, D(2) ) 430 431 LLBL(K_GTP13P3DR_skip): 432 ADD_L( CONST(16), EDI ) 433 ADD_L( EAX, ESI ) 434 CMP_L( ECX, EDI ) 435 JNE( LLBL(K_GTP13P3DR_top) ) 436 437 LLBL(K_GTP13P3DR_finish): 438 POP_L( EDI ) 439 POP_L( ESI ) 440 RET 441 #undef FRAME_OFFSET 442 #endif 443 444 #if defined (__ELF__) && defined (__linux__) 445 .section .note.GNU-stack,"",%progbits 446 #endif 447