1 2 #include "sparc_matrix.h" 3 4 .register %g2, #scratch 5 .register %g3, #scratch 6 7 .text 8 9 #ifdef __arch64__ 10 #define STACK_VAR_OFF (2047 + (8 * 16)) 11 #else 12 #define STACK_VAR_OFF (4 * 16) 13 #endif 14 15 /* Newton-Raphson approximation turns out to be slower 16 * (and less accurate) than direct fsqrts/fdivs. 17 */ 18 #define ONE_DOT_ZERO 0x3f800000 19 20 .globl _mesa_sparc_transform_normalize_normals 21 _mesa_sparc_transform_normalize_normals: 22 /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 23 24 sethi %hi(ONE_DOT_ZERO), %g2 25 sub %sp, 16, %sp 26 st %g2, [%sp + STACK_VAR_OFF+0x0] 27 st %o1, [%sp + STACK_VAR_OFF+0x4] 28 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f 29 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale 30 add %sp, 16, %sp 31 32 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 33 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 34 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 35 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 36 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 37 38 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0) 39 40 /* dest->count = in->count */ 41 st %g1, [%o4 + V4F_COUNT] 42 43 cmp %g1, 1 44 bl 7f 45 cmp %o3, 0 46 bne 4f 47 clr %o4 ! 'i' for STRIDE_LOOP 48 49 1: /* LENGTHS == NULL */ 50 ld [%o5 + 0x00], %f0 ! ux = from[0] 51 ld [%o5 + 0x04], %f1 ! uy = from[1] 52 ld [%o5 + 0x08], %f2 ! uz = from[2] 53 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 54 add %o4, 1, %o4 ! i++ 55 56 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2) 57 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6) 58 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10) 59 */ 60 fmuls %f0, M0, %f3 ! FGM Group 61 fmuls %f1, M1, %f4 ! FGM Group 62 fmuls %f0, M4, %f5 ! FGM Group 63 fmuls %f1, M5, %f6 ! FGM Group 64 fmuls %f0, M8, %f7 ! FGM Group f3 available 65 fmuls %f1, M9, %f8 ! FGM Group f4 available 66 fadds %f3, %f4, %f3 ! FGA 67 fmuls %f2, M2, %f10 ! FGM Group f5 available 68 fmuls %f2, M6, %f0 ! FGM Group f6 available 69 fadds %f5, %f6, %f5 ! FGA 70 fmuls %f2, M10, %f4 ! FGM Group f7 available 71 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available 72 fadds %f3, %f10, %f3 ! FGA Group f10 available 73 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available 74 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available 75 76 /* f3=tx, f5=ty, f7=tz */ 77 78 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */ 79 fmuls %f3, %f3, %f6 ! FGM Group f3 available 80 fmuls %f5, %f5, %f8 ! FGM Group f5 available 81 fmuls %f7, %f7, %f10 ! FGM Group f7 available 82 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available 83 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available 84 85 /* scale (f6) = 1.0 / sqrt(len) */ 86 fsqrts %f6, %f6 ! FDIV 20 cycles 87 fdivs %f12, %f6, %f6 ! FDIV 14 cycles 88 89 fmuls %f3, %f6, %f3 90 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale 91 fmuls %f5, %f6, %f5 92 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale 93 fmuls %f7, %f6, %f7 94 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale 95 96 cmp %o4, %g1 ! continue if (i < count) 97 bl 1b 98 add %g3, 0x10, %g3 ! advance out vector pointer 99 100 ba 7f 101 nop 102 103 4: /* LENGTHS != NULL */ 104 fmuls M0, %f15, M0 105 fmuls M1, %f15, M1 106 fmuls M2, %f15, M2 107 fmuls M4, %f15, M4 108 fmuls M5, %f15, M5 109 fmuls M6, %f15, M6 110 fmuls M8, %f15, M8 111 fmuls M9, %f15, M9 112 fmuls M10, %f15, M10 113 114 5: 115 ld [%o5 + 0x00], %f0 ! ux = from[0] 116 ld [%o5 + 0x04], %f1 ! uy = from[1] 117 ld [%o5 + 0x08], %f2 ! uz = from[2] 118 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 119 add %o4, 1, %o4 ! i++ 120 121 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2) 122 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6) 123 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10) 124 */ 125 fmuls %f0, M0, %f3 ! FGM Group 126 fmuls %f1, M1, %f4 ! FGM Group 127 fmuls %f0, M4, %f5 ! FGM Group 128 fmuls %f1, M5, %f6 ! FGM Group 129 fmuls %f0, M8, %f7 ! FGM Group f3 available 130 fmuls %f1, M9, %f8 ! FGM Group f4 available 131 fadds %f3, %f4, %f3 ! FGA 132 fmuls %f2, M2, %f10 ! FGM Group f5 available 133 fmuls %f2, M6, %f0 ! FGM Group f6 available 134 fadds %f5, %f6, %f5 ! FGA 135 fmuls %f2, M10, %f4 ! FGM Group f7 available 136 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available 137 fadds %f3, %f10, %f3 ! FGA Group f10 available 138 ld [%o3], %f13 ! LSU 139 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available 140 add %o3, 4, %o3 ! IEU0 141 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available 142 143 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */ 144 145 fmuls %f3, %f13, %f3 146 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len 147 fmuls %f5, %f13, %f5 148 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len 149 fmuls %f7, %f13, %f7 150 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len 151 152 cmp %o4, %g1 ! continue if (i < count) 153 bl 5b 154 add %g3, 0x10, %g3 ! advance out vector pointer 155 156 7: retl 157 nop 158 159 .globl _mesa_sparc_transform_normalize_normals_no_rot 160 _mesa_sparc_transform_normalize_normals_no_rot: 161 /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 162 163 sethi %hi(ONE_DOT_ZERO), %g2 164 sub %sp, 16, %sp 165 st %g2, [%sp + STACK_VAR_OFF+0x0] 166 st %o1, [%sp + STACK_VAR_OFF+0x4] 167 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f 168 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale 169 add %sp, 16, %sp 170 171 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 172 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 173 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 174 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 175 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 176 177 LDMATRIX_0_5_10(%o0) 178 179 /* dest->count = in->count */ 180 st %g1, [%o4 + V4F_COUNT] 181 182 cmp %g1, 1 183 bl 7f 184 cmp %o3, 0 185 bne 4f 186 clr %o4 ! 'i' for STRIDE_LOOP 187 188 1: /* LENGTHS == NULL */ 189 ld [%o5 + 0x00], %f0 ! ux = from[0] 190 ld [%o5 + 0x04], %f1 ! uy = from[1] 191 ld [%o5 + 0x08], %f2 ! uz = from[2] 192 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 193 add %o4, 1, %o4 ! i++ 194 195 /* tx (f3) = (ux * m0) 196 * ty (f5) = (uy * m5) 197 * tz (f7) = (uz * m10) 198 */ 199 fmuls %f0, M0, %f3 ! FGM Group 200 fmuls %f1, M5, %f5 ! FGM Group 201 fmuls %f2, M10, %f7 ! FGM Group 202 203 /* f3=tx, f5=ty, f7=tz */ 204 205 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */ 206 fmuls %f3, %f3, %f6 ! FGM Group stall, f3 available 207 fmuls %f5, %f5, %f8 ! FGM Group f5 available 208 fmuls %f7, %f7, %f10 ! FGM Group f7 available 209 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available 210 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available 211 212 /* scale (f6) = 1.0 / sqrt(len) */ 213 fsqrts %f6, %f6 ! FDIV 20 cycles 214 fdivs %f12, %f6, %f6 ! FDIV 14 cycles 215 216 fmuls %f3, %f6, %f3 217 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale 218 fmuls %f5, %f6, %f5 219 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale 220 fmuls %f7, %f6, %f7 221 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale 222 223 cmp %o4, %g1 ! continue if (i < count) 224 bl 1b 225 add %g3, 0x10, %g3 ! advance out vector pointer 226 227 ba 7f 228 nop 229 230 4: /* LENGTHS != NULL */ 231 fmuls M0, %f15, M0 232 fmuls M5, %f15, M5 233 fmuls M10, %f15, M10 234 235 5: 236 ld [%o5 + 0x00], %f0 ! ux = from[0] 237 ld [%o5 + 0x04], %f1 ! uy = from[1] 238 ld [%o5 + 0x08], %f2 ! uz = from[2] 239 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 240 add %o4, 1, %o4 ! i++ 241 242 /* tx (f3) = (ux * m0) 243 * ty (f5) = (uy * m5) 244 * tz (f7) = (uz * m10) 245 */ 246 fmuls %f0, M0, %f3 ! FGM Group 247 ld [%o3], %f13 ! LSU 248 fmuls %f1, M5, %f5 ! FGM Group 249 add %o3, 4, %o3 ! IEU0 250 fmuls %f2, M10, %f7 ! FGM Group 251 252 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */ 253 254 fmuls %f3, %f13, %f3 255 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len 256 fmuls %f5, %f13, %f5 257 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len 258 fmuls %f7, %f13, %f7 259 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len 260 261 cmp %o4, %g1 ! continue if (i < count) 262 bl 5b 263 add %g3, 0x10, %g3 ! advance out vector pointer 264 265 7: retl 266 nop 267 268 .globl _mesa_sparc_transform_rescale_normals_no_rot 269 _mesa_sparc_transform_rescale_normals_no_rot: 270 /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 271 sub %sp, 16, %sp 272 st %o1, [%sp + STACK_VAR_OFF+0x0] 273 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale 274 add %sp, 16, %sp 275 276 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 277 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 278 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 279 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 280 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 281 282 LDMATRIX_0_5_10(%o0) 283 284 /* dest->count = in->count */ 285 st %g1, [%o4 + V4F_COUNT] 286 287 cmp %g1, 1 288 bl 7f 289 clr %o4 ! 'i' for STRIDE_LOOP 290 291 fmuls M0, %f15, M0 292 fmuls M5, %f15, M5 293 fmuls M10, %f15, M10 294 295 1: ld [%o5 + 0x00], %f0 ! ux = from[0] 296 ld [%o5 + 0x04], %f1 ! uy = from[1] 297 ld [%o5 + 0x08], %f2 ! uz = from[2] 298 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 299 add %o4, 1, %o4 ! i++ 300 301 /* tx (f3) = (ux * m0) 302 * ty (f5) = (uy * m5) 303 * tz (f7) = (uz * m10) 304 */ 305 fmuls %f0, M0, %f3 ! FGM Group 306 st %f3, [%g3 + 0x00] ! LSU 307 fmuls %f1, M5, %f5 ! FGM Group 308 st %f5, [%g3 + 0x04] ! LSU 309 fmuls %f2, M10, %f7 ! FGM Group 310 st %f7, [%g3 + 0x08] ! LSU 311 312 cmp %o4, %g1 ! continue if (i < count) 313 bl 1b 314 add %g3, 0x10, %g3 ! advance out vector pointer 315 316 7: retl 317 nop 318 319 .globl _mesa_sparc_transform_rescale_normals 320 _mesa_sparc_transform_rescale_normals: 321 /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 322 sub %sp, 16, %sp 323 st %o1, [%sp + STACK_VAR_OFF+0x0] 324 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale 325 add %sp, 16, %sp 326 327 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 328 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 329 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 330 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 331 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 332 333 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0) 334 335 /* dest->count = in->count */ 336 st %g1, [%o4 + V4F_COUNT] 337 338 cmp %g1, 1 339 bl 7f 340 clr %o4 ! 'i' for STRIDE_LOOP 341 342 fmuls M0, %f15, M0 343 fmuls M1, %f15, M1 344 fmuls M2, %f15, M2 345 fmuls M4, %f15, M4 346 fmuls M5, %f15, M5 347 fmuls M6, %f15, M6 348 fmuls M8, %f15, M8 349 fmuls M9, %f15, M9 350 fmuls M10, %f15, M10 351 352 1: ld [%o5 + 0x00], %f0 ! ux = from[0] 353 ld [%o5 + 0x04], %f1 ! uy = from[1] 354 ld [%o5 + 0x08], %f2 ! uz = from[2] 355 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 356 add %o4, 1, %o4 ! i++ 357 358 fmuls %f0, M0, %f3 ! FGM Group 359 fmuls %f1, M1, %f4 ! FGM Group 360 fmuls %f0, M4, %f5 ! FGM Group 361 fmuls %f1, M5, %f6 ! FGM Group 362 fmuls %f0, M8, %f7 ! FGM Group f3 available 363 fmuls %f1, M9, %f8 ! FGM Group f4 available 364 fadds %f3, %f4, %f3 ! FGA 365 fmuls %f2, M2, %f10 ! FGM Group f5 available 366 fmuls %f2, M6, %f0 ! FGM Group f6 available 367 fadds %f5, %f6, %f5 ! FGA 368 fmuls %f2, M10, %f4 ! FGM Group f7 available 369 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available 370 fadds %f3, %f10, %f3 ! FGA Group f10 available 371 st %f3, [%g3 + 0x00] ! LSU 372 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available 373 st %f5, [%g3 + 0x04] ! LSU 374 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available 375 st %f7, [%g3 + 0x08] ! LSU 376 377 cmp %o4, %g1 ! continue if (i < count) 378 bl 1b 379 add %g3, 0x10, %g3 ! advance out vector pointer 380 381 7: retl 382 nop 383 384 .globl _mesa_sparc_transform_normals_no_rot 385 _mesa_sparc_transform_normals_no_rot: 386 /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 387 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 388 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 389 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 390 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 391 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 392 393 LDMATRIX_0_5_10(%o0) 394 395 /* dest->count = in->count */ 396 st %g1, [%o4 + V4F_COUNT] 397 398 cmp %g1, 1 399 bl 7f 400 clr %o4 ! 'i' for STRIDE_LOOP 401 402 1: ld [%o5 + 0x00], %f0 ! ux = from[0] 403 ld [%o5 + 0x04], %f1 ! uy = from[1] 404 ld [%o5 + 0x08], %f2 ! uz = from[2] 405 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 406 add %o4, 1, %o4 ! i++ 407 408 /* tx (f3) = (ux * m0) 409 * ty (f5) = (uy * m5) 410 * tz (f7) = (uz * m10) 411 */ 412 fmuls %f0, M0, %f3 ! FGM Group 413 st %f3, [%g3 + 0x00] ! LSU 414 fmuls %f1, M5, %f5 ! FGM Group 415 st %f5, [%g3 + 0x04] ! LSU 416 fmuls %f2, M10, %f7 ! FGM Group 417 st %f7, [%g3 + 0x08] ! LSU 418 419 cmp %o4, %g1 ! continue if (i < count) 420 bl 1b 421 add %g3, 0x10, %g3 ! advance out vector pointer 422 423 7: retl 424 nop 425 426 .globl _mesa_sparc_transform_normals 427 _mesa_sparc_transform_normals: 428 /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 429 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 430 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 431 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 432 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 433 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 434 435 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0) 436 437 /* dest->count = in->count */ 438 st %g1, [%o4 + V4F_COUNT] 439 440 cmp %g1, 1 441 bl 7f 442 clr %o4 ! 'i' for STRIDE_LOOP 443 444 1: ld [%o5 + 0x00], %f0 ! ux = from[0] 445 ld [%o5 + 0x04], %f1 ! uy = from[1] 446 ld [%o5 + 0x08], %f2 ! uz = from[2] 447 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 448 add %o4, 1, %o4 ! i++ 449 450 fmuls %f0, M0, %f3 ! FGM Group 451 fmuls %f1, M1, %f4 ! FGM Group 452 fmuls %f0, M4, %f5 ! FGM Group 453 fmuls %f1, M5, %f6 ! FGM Group 454 fmuls %f0, M8, %f7 ! FGM Group f3 available 455 fmuls %f1, M9, %f8 ! FGM Group f4 available 456 fadds %f3, %f4, %f3 ! FGA 457 fmuls %f2, M2, %f10 ! FGM Group f5 available 458 fmuls %f2, M6, %f0 ! FGM Group f6 available 459 fadds %f5, %f6, %f5 ! FGA 460 fmuls %f2, M10, %f4 ! FGM Group f7 available 461 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available 462 fadds %f3, %f10, %f3 ! FGA Group f10 available 463 st %f3, [%g3 + 0x00] ! LSU 464 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available 465 st %f5, [%g3 + 0x04] ! LSU 466 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available 467 st %f7, [%g3 + 0x08] ! LSU 468 469 cmp %o4, %g1 ! continue if (i < count) 470 bl 1b 471 add %g3, 0x10, %g3 ! advance out vector pointer 472 473 7: retl 474 nop 475 476 .globl _mesa_sparc_normalize_normals 477 _mesa_sparc_normalize_normals: 478 /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 479 480 sethi %hi(ONE_DOT_ZERO), %g2 481 sub %sp, 16, %sp 482 st %g2, [%sp + STACK_VAR_OFF+0x0] 483 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f 484 add %sp, 16, %sp 485 486 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 487 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 488 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 489 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 490 491 /* dest->count = in->count */ 492 st %g1, [%o4 + V4F_COUNT] 493 494 cmp %g1, 1 495 bl 7f 496 cmp %o3, 0 497 bne 4f 498 clr %o4 ! 'i' for STRIDE_LOOP 499 500 1: /* LENGTHS == NULL */ 501 ld [%o5 + 0x00], %f3 ! ux = from[0] 502 ld [%o5 + 0x04], %f5 ! uy = from[1] 503 ld [%o5 + 0x08], %f7 ! uz = from[2] 504 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 505 add %o4, 1, %o4 ! i++ 506 507 /* f3=tx, f5=ty, f7=tz */ 508 509 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */ 510 fmuls %f3, %f3, %f6 ! FGM Group f3 available 511 fmuls %f5, %f5, %f8 ! FGM Group f5 available 512 fmuls %f7, %f7, %f10 ! FGM Group f7 available 513 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available 514 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available 515 516 /* scale (f6) = 1.0 / sqrt(len) */ 517 fsqrts %f6, %f6 ! FDIV 20 cycles 518 fdivs %f12, %f6, %f6 ! FDIV 14 cycles 519 520 fmuls %f3, %f6, %f3 521 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale 522 fmuls %f5, %f6, %f5 523 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale 524 fmuls %f7, %f6, %f7 525 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale 526 527 cmp %o4, %g1 ! continue if (i < count) 528 bl 1b 529 add %g3, 0x10, %g3 ! advance out vector pointer 530 531 ba 7f 532 nop 533 534 4: /* LENGTHS != NULL */ 535 536 5: 537 ld [%o5 + 0x00], %f3 ! ux = from[0] 538 ld [%o5 + 0x04], %f5 ! uy = from[1] 539 ld [%o5 + 0x08], %f7 ! uz = from[2] 540 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 541 add %o4, 1, %o4 ! i++ 542 543 ld [%o3], %f13 ! LSU 544 add %o3, 4, %o3 ! IEU0 545 546 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */ 547 548 fmuls %f3, %f13, %f3 549 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len 550 fmuls %f5, %f13, %f5 551 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len 552 fmuls %f7, %f13, %f7 553 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len 554 555 cmp %o4, %g1 ! continue if (i < count) 556 bl 5b 557 add %g3, 0x10, %g3 ! advance out vector pointer 558 559 7: retl 560 nop 561 562 .globl _mesa_sparc_rescale_normals 563 _mesa_sparc_rescale_normals: 564 /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 565 566 sethi %hi(ONE_DOT_ZERO), %g2 567 sub %sp, 16, %sp 568 st %o1, [%sp + STACK_VAR_OFF+0x0] 569 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale 570 add %sp, 16, %sp 571 572 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 573 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 574 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 575 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 576 577 /* dest->count = in->count */ 578 st %g1, [%o4 + V4F_COUNT] 579 580 cmp %g1, 1 581 bl 7f 582 clr %o4 ! 'i' for STRIDE_LOOP 583 584 1: 585 ld [%o5 + 0x00], %f3 ! ux = from[0] 586 ld [%o5 + 0x04], %f5 ! uy = from[1] 587 ld [%o5 + 0x08], %f7 ! uz = from[2] 588 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 589 add %o4, 1, %o4 ! i++ 590 591 /* f3=tx, f5=ty, f7=tz */ 592 593 fmuls %f3, %f15, %f3 594 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale 595 fmuls %f5, %f15, %f5 596 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale 597 fmuls %f7, %f15, %f7 598 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale 599 600 cmp %o4, %g1 ! continue if (i < count) 601 bl 1b 602 add %g3, 0x10, %g3 ! advance out vector pointer 603 604 7: retl 605 nop 606