1 /************************************************************************** 2 * 3 * Copyright 2007-2008 VMware, Inc. 4 * All Rights Reserved. 5 * Copyright 2009-2010 VMware, Inc. All rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29 /** 30 * TGSI interpreter/executor. 31 * 32 * Flow control information: 33 * 34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 36 * care since a condition may be true for some quad components but false 37 * for other components. 38 * 39 * We basically execute all statements (even if they're in the part of 40 * an IF/ELSE clause that's "not taken") and use a special mask to 41 * control writing to destination registers. This is the ExecMask. 42 * See store_dest(). 43 * 44 * The ExecMask is computed from three other masks (CondMask, LoopMask and 45 * ContMask) which are controlled by the flow control instructions (namely: 46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 47 * 48 * 49 * Authors: 50 * Michal Krol 51 * Brian Paul 52 */ 53 54 #include "pipe/p_compiler.h" 55 #include "pipe/p_state.h" 56 #include "pipe/p_shader_tokens.h" 57 #include "tgsi/tgsi_dump.h" 58 #include "tgsi/tgsi_parse.h" 59 #include "tgsi/tgsi_util.h" 60 #include "tgsi_exec.h" 61 #include "util/u_half.h" 62 #include "util/u_memory.h" 63 #include "util/u_math.h" 64 #include "util/rounding.h" 65 66 67 #define DEBUG_EXECUTION 0 68 69 70 #define FAST_MATH 0 71 72 #define TILE_TOP_LEFT 0 73 #define TILE_TOP_RIGHT 1 74 #define TILE_BOTTOM_LEFT 2 75 #define TILE_BOTTOM_RIGHT 3 76 77 union tgsi_double_channel { 78 double d[TGSI_QUAD_SIZE]; 79 unsigned u[TGSI_QUAD_SIZE][2]; 80 uint64_t u64[TGSI_QUAD_SIZE]; 81 int64_t i64[TGSI_QUAD_SIZE]; 82 }; 83 84 struct tgsi_double_vector { 85 union tgsi_double_channel xy; 86 union tgsi_double_channel zw; 87 }; 88 89 static void 90 micro_abs(union tgsi_exec_channel *dst, 91 const union tgsi_exec_channel *src) 92 { 93 dst->f[0] = fabsf(src->f[0]); 94 dst->f[1] = fabsf(src->f[1]); 95 dst->f[2] = fabsf(src->f[2]); 96 dst->f[3] = fabsf(src->f[3]); 97 } 98 99 static void 100 micro_arl(union tgsi_exec_channel *dst, 101 const union tgsi_exec_channel *src) 102 { 103 dst->i[0] = (int)floorf(src->f[0]); 104 dst->i[1] = (int)floorf(src->f[1]); 105 dst->i[2] = (int)floorf(src->f[2]); 106 dst->i[3] = (int)floorf(src->f[3]); 107 } 108 109 static void 110 micro_arr(union tgsi_exec_channel *dst, 111 const union tgsi_exec_channel *src) 112 { 113 dst->i[0] = (int)floorf(src->f[0] + 0.5f); 114 dst->i[1] = (int)floorf(src->f[1] + 0.5f); 115 dst->i[2] = (int)floorf(src->f[2] + 0.5f); 116 dst->i[3] = (int)floorf(src->f[3] + 0.5f); 117 } 118 119 static void 120 micro_ceil(union tgsi_exec_channel *dst, 121 const union tgsi_exec_channel *src) 122 { 123 dst->f[0] = ceilf(src->f[0]); 124 dst->f[1] = ceilf(src->f[1]); 125 dst->f[2] = ceilf(src->f[2]); 126 dst->f[3] = ceilf(src->f[3]); 127 } 128 129 static void 130 micro_cmp(union tgsi_exec_channel *dst, 131 const union tgsi_exec_channel *src0, 132 const union tgsi_exec_channel *src1, 133 const union tgsi_exec_channel *src2) 134 { 135 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0]; 136 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1]; 137 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2]; 138 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; 139 } 140 141 static void 142 micro_cos(union tgsi_exec_channel *dst, 143 const union tgsi_exec_channel *src) 144 { 145 dst->f[0] = cosf(src->f[0]); 146 dst->f[1] = cosf(src->f[1]); 147 dst->f[2] = cosf(src->f[2]); 148 dst->f[3] = cosf(src->f[3]); 149 } 150 151 static void 152 micro_d2f(union tgsi_exec_channel *dst, 153 const union tgsi_double_channel *src) 154 { 155 dst->f[0] = (float)src->d[0]; 156 dst->f[1] = (float)src->d[1]; 157 dst->f[2] = (float)src->d[2]; 158 dst->f[3] = (float)src->d[3]; 159 } 160 161 static void 162 micro_d2i(union tgsi_exec_channel *dst, 163 const union tgsi_double_channel *src) 164 { 165 dst->i[0] = (int)src->d[0]; 166 dst->i[1] = (int)src->d[1]; 167 dst->i[2] = (int)src->d[2]; 168 dst->i[3] = (int)src->d[3]; 169 } 170 171 static void 172 micro_d2u(union tgsi_exec_channel *dst, 173 const union tgsi_double_channel *src) 174 { 175 dst->u[0] = (unsigned)src->d[0]; 176 dst->u[1] = (unsigned)src->d[1]; 177 dst->u[2] = (unsigned)src->d[2]; 178 dst->u[3] = (unsigned)src->d[3]; 179 } 180 static void 181 micro_dabs(union tgsi_double_channel *dst, 182 const union tgsi_double_channel *src) 183 { 184 dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0]; 185 dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1]; 186 dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2]; 187 dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3]; 188 } 189 190 static void 191 micro_dadd(union tgsi_double_channel *dst, 192 const union tgsi_double_channel *src) 193 { 194 dst->d[0] = src[0].d[0] + src[1].d[0]; 195 dst->d[1] = src[0].d[1] + src[1].d[1]; 196 dst->d[2] = src[0].d[2] + src[1].d[2]; 197 dst->d[3] = src[0].d[3] + src[1].d[3]; 198 } 199 200 static void 201 micro_ddiv(union tgsi_double_channel *dst, 202 const union tgsi_double_channel *src) 203 { 204 dst->d[0] = src[0].d[0] / src[1].d[0]; 205 dst->d[1] = src[0].d[1] / src[1].d[1]; 206 dst->d[2] = src[0].d[2] / src[1].d[2]; 207 dst->d[3] = src[0].d[3] / src[1].d[3]; 208 } 209 210 static void 211 micro_ddx(union tgsi_exec_channel *dst, 212 const union tgsi_exec_channel *src) 213 { 214 dst->f[0] = 215 dst->f[1] = 216 dst->f[2] = 217 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 218 } 219 220 static void 221 micro_ddy(union tgsi_exec_channel *dst, 222 const union tgsi_exec_channel *src) 223 { 224 dst->f[0] = 225 dst->f[1] = 226 dst->f[2] = 227 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 228 } 229 230 static void 231 micro_dmul(union tgsi_double_channel *dst, 232 const union tgsi_double_channel *src) 233 { 234 dst->d[0] = src[0].d[0] * src[1].d[0]; 235 dst->d[1] = src[0].d[1] * src[1].d[1]; 236 dst->d[2] = src[0].d[2] * src[1].d[2]; 237 dst->d[3] = src[0].d[3] * src[1].d[3]; 238 } 239 240 static void 241 micro_dmax(union tgsi_double_channel *dst, 242 const union tgsi_double_channel *src) 243 { 244 dst->d[0] = src[0].d[0] > src[1].d[0] ? src[0].d[0] : src[1].d[0]; 245 dst->d[1] = src[0].d[1] > src[1].d[1] ? src[0].d[1] : src[1].d[1]; 246 dst->d[2] = src[0].d[2] > src[1].d[2] ? src[0].d[2] : src[1].d[2]; 247 dst->d[3] = src[0].d[3] > src[1].d[3] ? src[0].d[3] : src[1].d[3]; 248 } 249 250 static void 251 micro_dmin(union tgsi_double_channel *dst, 252 const union tgsi_double_channel *src) 253 { 254 dst->d[0] = src[0].d[0] < src[1].d[0] ? src[0].d[0] : src[1].d[0]; 255 dst->d[1] = src[0].d[1] < src[1].d[1] ? src[0].d[1] : src[1].d[1]; 256 dst->d[2] = src[0].d[2] < src[1].d[2] ? src[0].d[2] : src[1].d[2]; 257 dst->d[3] = src[0].d[3] < src[1].d[3] ? src[0].d[3] : src[1].d[3]; 258 } 259 260 static void 261 micro_dneg(union tgsi_double_channel *dst, 262 const union tgsi_double_channel *src) 263 { 264 dst->d[0] = -src->d[0]; 265 dst->d[1] = -src->d[1]; 266 dst->d[2] = -src->d[2]; 267 dst->d[3] = -src->d[3]; 268 } 269 270 static void 271 micro_dslt(union tgsi_double_channel *dst, 272 const union tgsi_double_channel *src) 273 { 274 dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U; 275 dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U; 276 dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U; 277 dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U; 278 } 279 280 static void 281 micro_dsne(union tgsi_double_channel *dst, 282 const union tgsi_double_channel *src) 283 { 284 dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U; 285 dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U; 286 dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U; 287 dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U; 288 } 289 290 static void 291 micro_dsge(union tgsi_double_channel *dst, 292 const union tgsi_double_channel *src) 293 { 294 dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U; 295 dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U; 296 dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U; 297 dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U; 298 } 299 300 static void 301 micro_dseq(union tgsi_double_channel *dst, 302 const union tgsi_double_channel *src) 303 { 304 dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U; 305 dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U; 306 dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U; 307 dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U; 308 } 309 310 static void 311 micro_drcp(union tgsi_double_channel *dst, 312 const union tgsi_double_channel *src) 313 { 314 dst->d[0] = 1.0 / src->d[0]; 315 dst->d[1] = 1.0 / src->d[1]; 316 dst->d[2] = 1.0 / src->d[2]; 317 dst->d[3] = 1.0 / src->d[3]; 318 } 319 320 static void 321 micro_dsqrt(union tgsi_double_channel *dst, 322 const union tgsi_double_channel *src) 323 { 324 dst->d[0] = sqrt(src->d[0]); 325 dst->d[1] = sqrt(src->d[1]); 326 dst->d[2] = sqrt(src->d[2]); 327 dst->d[3] = sqrt(src->d[3]); 328 } 329 330 static void 331 micro_drsq(union tgsi_double_channel *dst, 332 const union tgsi_double_channel *src) 333 { 334 dst->d[0] = 1.0 / sqrt(src->d[0]); 335 dst->d[1] = 1.0 / sqrt(src->d[1]); 336 dst->d[2] = 1.0 / sqrt(src->d[2]); 337 dst->d[3] = 1.0 / sqrt(src->d[3]); 338 } 339 340 static void 341 micro_dmad(union tgsi_double_channel *dst, 342 const union tgsi_double_channel *src) 343 { 344 dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0]; 345 dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1]; 346 dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2]; 347 dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3]; 348 } 349 350 static void 351 micro_dfrac(union tgsi_double_channel *dst, 352 const union tgsi_double_channel *src) 353 { 354 dst->d[0] = src->d[0] - floor(src->d[0]); 355 dst->d[1] = src->d[1] - floor(src->d[1]); 356 dst->d[2] = src->d[2] - floor(src->d[2]); 357 dst->d[3] = src->d[3] - floor(src->d[3]); 358 } 359 360 static void 361 micro_dldexp(union tgsi_double_channel *dst, 362 const union tgsi_double_channel *src0, 363 union tgsi_exec_channel *src1) 364 { 365 dst->d[0] = ldexp(src0->d[0], src1->i[0]); 366 dst->d[1] = ldexp(src0->d[1], src1->i[1]); 367 dst->d[2] = ldexp(src0->d[2], src1->i[2]); 368 dst->d[3] = ldexp(src0->d[3], src1->i[3]); 369 } 370 371 static void 372 micro_dfracexp(union tgsi_double_channel *dst, 373 union tgsi_exec_channel *dst_exp, 374 const union tgsi_double_channel *src) 375 { 376 dst->d[0] = frexp(src->d[0], &dst_exp->i[0]); 377 dst->d[1] = frexp(src->d[1], &dst_exp->i[1]); 378 dst->d[2] = frexp(src->d[2], &dst_exp->i[2]); 379 dst->d[3] = frexp(src->d[3], &dst_exp->i[3]); 380 } 381 382 static void 383 micro_exp2(union tgsi_exec_channel *dst, 384 const union tgsi_exec_channel *src) 385 { 386 #if FAST_MATH 387 dst->f[0] = util_fast_exp2(src->f[0]); 388 dst->f[1] = util_fast_exp2(src->f[1]); 389 dst->f[2] = util_fast_exp2(src->f[2]); 390 dst->f[3] = util_fast_exp2(src->f[3]); 391 #else 392 #if DEBUG 393 /* Inf is okay for this instruction, so clamp it to silence assertions. */ 394 uint i; 395 union tgsi_exec_channel clamped; 396 397 for (i = 0; i < 4; i++) { 398 if (src->f[i] > 127.99999f) { 399 clamped.f[i] = 127.99999f; 400 } else if (src->f[i] < -126.99999f) { 401 clamped.f[i] = -126.99999f; 402 } else { 403 clamped.f[i] = src->f[i]; 404 } 405 } 406 src = &clamped; 407 #endif /* DEBUG */ 408 409 dst->f[0] = powf(2.0f, src->f[0]); 410 dst->f[1] = powf(2.0f, src->f[1]); 411 dst->f[2] = powf(2.0f, src->f[2]); 412 dst->f[3] = powf(2.0f, src->f[3]); 413 #endif /* FAST_MATH */ 414 } 415 416 static void 417 micro_f2d(union tgsi_double_channel *dst, 418 const union tgsi_exec_channel *src) 419 { 420 dst->d[0] = (double)src->f[0]; 421 dst->d[1] = (double)src->f[1]; 422 dst->d[2] = (double)src->f[2]; 423 dst->d[3] = (double)src->f[3]; 424 } 425 426 static void 427 micro_flr(union tgsi_exec_channel *dst, 428 const union tgsi_exec_channel *src) 429 { 430 dst->f[0] = floorf(src->f[0]); 431 dst->f[1] = floorf(src->f[1]); 432 dst->f[2] = floorf(src->f[2]); 433 dst->f[3] = floorf(src->f[3]); 434 } 435 436 static void 437 micro_frc(union tgsi_exec_channel *dst, 438 const union tgsi_exec_channel *src) 439 { 440 dst->f[0] = src->f[0] - floorf(src->f[0]); 441 dst->f[1] = src->f[1] - floorf(src->f[1]); 442 dst->f[2] = src->f[2] - floorf(src->f[2]); 443 dst->f[3] = src->f[3] - floorf(src->f[3]); 444 } 445 446 static void 447 micro_i2d(union tgsi_double_channel *dst, 448 const union tgsi_exec_channel *src) 449 { 450 dst->d[0] = (double)src->i[0]; 451 dst->d[1] = (double)src->i[1]; 452 dst->d[2] = (double)src->i[2]; 453 dst->d[3] = (double)src->i[3]; 454 } 455 456 static void 457 micro_iabs(union tgsi_exec_channel *dst, 458 const union tgsi_exec_channel *src) 459 { 460 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; 461 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; 462 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; 463 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; 464 } 465 466 static void 467 micro_ineg(union tgsi_exec_channel *dst, 468 const union tgsi_exec_channel *src) 469 { 470 dst->i[0] = -src->i[0]; 471 dst->i[1] = -src->i[1]; 472 dst->i[2] = -src->i[2]; 473 dst->i[3] = -src->i[3]; 474 } 475 476 static void 477 micro_lg2(union tgsi_exec_channel *dst, 478 const union tgsi_exec_channel *src) 479 { 480 #if FAST_MATH 481 dst->f[0] = util_fast_log2(src->f[0]); 482 dst->f[1] = util_fast_log2(src->f[1]); 483 dst->f[2] = util_fast_log2(src->f[2]); 484 dst->f[3] = util_fast_log2(src->f[3]); 485 #else 486 dst->f[0] = logf(src->f[0]) * 1.442695f; 487 dst->f[1] = logf(src->f[1]) * 1.442695f; 488 dst->f[2] = logf(src->f[2]) * 1.442695f; 489 dst->f[3] = logf(src->f[3]) * 1.442695f; 490 #endif 491 } 492 493 static void 494 micro_lrp(union tgsi_exec_channel *dst, 495 const union tgsi_exec_channel *src0, 496 const union tgsi_exec_channel *src1, 497 const union tgsi_exec_channel *src2) 498 { 499 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0]; 500 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1]; 501 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2]; 502 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3]; 503 } 504 505 static void 506 micro_mad(union tgsi_exec_channel *dst, 507 const union tgsi_exec_channel *src0, 508 const union tgsi_exec_channel *src1, 509 const union tgsi_exec_channel *src2) 510 { 511 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0]; 512 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1]; 513 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2]; 514 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3]; 515 } 516 517 static void 518 micro_mov(union tgsi_exec_channel *dst, 519 const union tgsi_exec_channel *src) 520 { 521 dst->u[0] = src->u[0]; 522 dst->u[1] = src->u[1]; 523 dst->u[2] = src->u[2]; 524 dst->u[3] = src->u[3]; 525 } 526 527 static void 528 micro_rcp(union tgsi_exec_channel *dst, 529 const union tgsi_exec_channel *src) 530 { 531 #if 0 /* for debugging */ 532 assert(src->f[0] != 0.0f); 533 assert(src->f[1] != 0.0f); 534 assert(src->f[2] != 0.0f); 535 assert(src->f[3] != 0.0f); 536 #endif 537 dst->f[0] = 1.0f / src->f[0]; 538 dst->f[1] = 1.0f / src->f[1]; 539 dst->f[2] = 1.0f / src->f[2]; 540 dst->f[3] = 1.0f / src->f[3]; 541 } 542 543 static void 544 micro_rnd(union tgsi_exec_channel *dst, 545 const union tgsi_exec_channel *src) 546 { 547 dst->f[0] = _mesa_roundevenf(src->f[0]); 548 dst->f[1] = _mesa_roundevenf(src->f[1]); 549 dst->f[2] = _mesa_roundevenf(src->f[2]); 550 dst->f[3] = _mesa_roundevenf(src->f[3]); 551 } 552 553 static void 554 micro_rsq(union tgsi_exec_channel *dst, 555 const union tgsi_exec_channel *src) 556 { 557 #if 0 /* for debugging */ 558 assert(src->f[0] != 0.0f); 559 assert(src->f[1] != 0.0f); 560 assert(src->f[2] != 0.0f); 561 assert(src->f[3] != 0.0f); 562 #endif 563 dst->f[0] = 1.0f / sqrtf(src->f[0]); 564 dst->f[1] = 1.0f / sqrtf(src->f[1]); 565 dst->f[2] = 1.0f / sqrtf(src->f[2]); 566 dst->f[3] = 1.0f / sqrtf(src->f[3]); 567 } 568 569 static void 570 micro_sqrt(union tgsi_exec_channel *dst, 571 const union tgsi_exec_channel *src) 572 { 573 dst->f[0] = sqrtf(src->f[0]); 574 dst->f[1] = sqrtf(src->f[1]); 575 dst->f[2] = sqrtf(src->f[2]); 576 dst->f[3] = sqrtf(src->f[3]); 577 } 578 579 static void 580 micro_seq(union tgsi_exec_channel *dst, 581 const union tgsi_exec_channel *src0, 582 const union tgsi_exec_channel *src1) 583 { 584 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f; 585 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f; 586 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f; 587 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f; 588 } 589 590 static void 591 micro_sge(union tgsi_exec_channel *dst, 592 const union tgsi_exec_channel *src0, 593 const union tgsi_exec_channel *src1) 594 { 595 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f; 596 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f; 597 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f; 598 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f; 599 } 600 601 static void 602 micro_sgn(union tgsi_exec_channel *dst, 603 const union tgsi_exec_channel *src) 604 { 605 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 606 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 607 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 608 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 609 } 610 611 static void 612 micro_isgn(union tgsi_exec_channel *dst, 613 const union tgsi_exec_channel *src) 614 { 615 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0; 616 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0; 617 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0; 618 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0; 619 } 620 621 static void 622 micro_sgt(union tgsi_exec_channel *dst, 623 const union tgsi_exec_channel *src0, 624 const union tgsi_exec_channel *src1) 625 { 626 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f; 627 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f; 628 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f; 629 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f; 630 } 631 632 static void 633 micro_sin(union tgsi_exec_channel *dst, 634 const union tgsi_exec_channel *src) 635 { 636 dst->f[0] = sinf(src->f[0]); 637 dst->f[1] = sinf(src->f[1]); 638 dst->f[2] = sinf(src->f[2]); 639 dst->f[3] = sinf(src->f[3]); 640 } 641 642 static void 643 micro_sle(union tgsi_exec_channel *dst, 644 const union tgsi_exec_channel *src0, 645 const union tgsi_exec_channel *src1) 646 { 647 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f; 648 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f; 649 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f; 650 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f; 651 } 652 653 static void 654 micro_slt(union tgsi_exec_channel *dst, 655 const union tgsi_exec_channel *src0, 656 const union tgsi_exec_channel *src1) 657 { 658 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f; 659 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f; 660 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f; 661 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f; 662 } 663 664 static void 665 micro_sne(union tgsi_exec_channel *dst, 666 const union tgsi_exec_channel *src0, 667 const union tgsi_exec_channel *src1) 668 { 669 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f; 670 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f; 671 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f; 672 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f; 673 } 674 675 static void 676 micro_trunc(union tgsi_exec_channel *dst, 677 const union tgsi_exec_channel *src) 678 { 679 dst->f[0] = truncf(src->f[0]); 680 dst->f[1] = truncf(src->f[1]); 681 dst->f[2] = truncf(src->f[2]); 682 dst->f[3] = truncf(src->f[3]); 683 } 684 685 static void 686 micro_u2d(union tgsi_double_channel *dst, 687 const union tgsi_exec_channel *src) 688 { 689 dst->d[0] = (double)src->u[0]; 690 dst->d[1] = (double)src->u[1]; 691 dst->d[2] = (double)src->u[2]; 692 dst->d[3] = (double)src->u[3]; 693 } 694 695 static void 696 micro_i64abs(union tgsi_double_channel *dst, 697 const union tgsi_double_channel *src) 698 { 699 dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0]; 700 dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1]; 701 dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2]; 702 dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3]; 703 } 704 705 static void 706 micro_i64sgn(union tgsi_double_channel *dst, 707 const union tgsi_double_channel *src) 708 { 709 dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0; 710 dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0; 711 dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0; 712 dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0; 713 } 714 715 static void 716 micro_i64neg(union tgsi_double_channel *dst, 717 const union tgsi_double_channel *src) 718 { 719 dst->i64[0] = -src->i64[0]; 720 dst->i64[1] = -src->i64[1]; 721 dst->i64[2] = -src->i64[2]; 722 dst->i64[3] = -src->i64[3]; 723 } 724 725 static void 726 micro_u64seq(union tgsi_double_channel *dst, 727 const union tgsi_double_channel *src) 728 { 729 dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U; 730 dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U; 731 dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U; 732 dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U; 733 } 734 735 static void 736 micro_u64sne(union tgsi_double_channel *dst, 737 const union tgsi_double_channel *src) 738 { 739 dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U; 740 dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U; 741 dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U; 742 dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U; 743 } 744 745 static void 746 micro_i64slt(union tgsi_double_channel *dst, 747 const union tgsi_double_channel *src) 748 { 749 dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U; 750 dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U; 751 dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U; 752 dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U; 753 } 754 755 static void 756 micro_u64slt(union tgsi_double_channel *dst, 757 const union tgsi_double_channel *src) 758 { 759 dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U; 760 dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U; 761 dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U; 762 dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U; 763 } 764 765 static void 766 micro_i64sge(union tgsi_double_channel *dst, 767 const union tgsi_double_channel *src) 768 { 769 dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U; 770 dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U; 771 dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U; 772 dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U; 773 } 774 775 static void 776 micro_u64sge(union tgsi_double_channel *dst, 777 const union tgsi_double_channel *src) 778 { 779 dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U; 780 dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U; 781 dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U; 782 dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U; 783 } 784 785 static void 786 micro_u64max(union tgsi_double_channel *dst, 787 const union tgsi_double_channel *src) 788 { 789 dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; 790 dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; 791 dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; 792 dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; 793 } 794 795 static void 796 micro_i64max(union tgsi_double_channel *dst, 797 const union tgsi_double_channel *src) 798 { 799 dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; 800 dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; 801 dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; 802 dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; 803 } 804 805 static void 806 micro_u64min(union tgsi_double_channel *dst, 807 const union tgsi_double_channel *src) 808 { 809 dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; 810 dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; 811 dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; 812 dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; 813 } 814 815 static void 816 micro_i64min(union tgsi_double_channel *dst, 817 const union tgsi_double_channel *src) 818 { 819 dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; 820 dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; 821 dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; 822 dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; 823 } 824 825 static void 826 micro_u64add(union tgsi_double_channel *dst, 827 const union tgsi_double_channel *src) 828 { 829 dst->u64[0] = src[0].u64[0] + src[1].u64[0]; 830 dst->u64[1] = src[0].u64[1] + src[1].u64[1]; 831 dst->u64[2] = src[0].u64[2] + src[1].u64[2]; 832 dst->u64[3] = src[0].u64[3] + src[1].u64[3]; 833 } 834 835 static void 836 micro_u64mul(union tgsi_double_channel *dst, 837 const union tgsi_double_channel *src) 838 { 839 dst->u64[0] = src[0].u64[0] * src[1].u64[0]; 840 dst->u64[1] = src[0].u64[1] * src[1].u64[1]; 841 dst->u64[2] = src[0].u64[2] * src[1].u64[2]; 842 dst->u64[3] = src[0].u64[3] * src[1].u64[3]; 843 } 844 845 static void 846 micro_u64div(union tgsi_double_channel *dst, 847 const union tgsi_double_channel *src) 848 { 849 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull; 850 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull; 851 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull; 852 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull; 853 } 854 855 static void 856 micro_i64div(union tgsi_double_channel *dst, 857 const union tgsi_double_channel *src) 858 { 859 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0; 860 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0; 861 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0; 862 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0; 863 } 864 865 static void 866 micro_u64mod(union tgsi_double_channel *dst, 867 const union tgsi_double_channel *src) 868 { 869 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull; 870 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull; 871 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull; 872 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull; 873 } 874 875 static void 876 micro_i64mod(union tgsi_double_channel *dst, 877 const union tgsi_double_channel *src) 878 { 879 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll; 880 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll; 881 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll; 882 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll; 883 } 884 885 static void 886 micro_u64shl(union tgsi_double_channel *dst, 887 const union tgsi_double_channel *src0, 888 union tgsi_exec_channel *src1) 889 { 890 unsigned masked_count; 891 masked_count = src1->u[0] & 0x3f; 892 dst->u64[0] = src0->u64[0] << masked_count; 893 masked_count = src1->u[1] & 0x3f; 894 dst->u64[1] = src0->u64[1] << masked_count; 895 masked_count = src1->u[2] & 0x3f; 896 dst->u64[2] = src0->u64[2] << masked_count; 897 masked_count = src1->u[3] & 0x3f; 898 dst->u64[3] = src0->u64[3] << masked_count; 899 } 900 901 static void 902 micro_i64shr(union tgsi_double_channel *dst, 903 const union tgsi_double_channel *src0, 904 union tgsi_exec_channel *src1) 905 { 906 unsigned masked_count; 907 masked_count = src1->u[0] & 0x3f; 908 dst->i64[0] = src0->i64[0] >> masked_count; 909 masked_count = src1->u[1] & 0x3f; 910 dst->i64[1] = src0->i64[1] >> masked_count; 911 masked_count = src1->u[2] & 0x3f; 912 dst->i64[2] = src0->i64[2] >> masked_count; 913 masked_count = src1->u[3] & 0x3f; 914 dst->i64[3] = src0->i64[3] >> masked_count; 915 } 916 917 static void 918 micro_u64shr(union tgsi_double_channel *dst, 919 const union tgsi_double_channel *src0, 920 union tgsi_exec_channel *src1) 921 { 922 unsigned masked_count; 923 masked_count = src1->u[0] & 0x3f; 924 dst->u64[0] = src0->u64[0] >> masked_count; 925 masked_count = src1->u[1] & 0x3f; 926 dst->u64[1] = src0->u64[1] >> masked_count; 927 masked_count = src1->u[2] & 0x3f; 928 dst->u64[2] = src0->u64[2] >> masked_count; 929 masked_count = src1->u[3] & 0x3f; 930 dst->u64[3] = src0->u64[3] >> masked_count; 931 } 932 933 enum tgsi_exec_datatype { 934 TGSI_EXEC_DATA_FLOAT, 935 TGSI_EXEC_DATA_INT, 936 TGSI_EXEC_DATA_UINT, 937 TGSI_EXEC_DATA_DOUBLE, 938 TGSI_EXEC_DATA_INT64, 939 TGSI_EXEC_DATA_UINT64, 940 }; 941 942 /* 943 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 944 */ 945 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 946 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 947 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 948 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 949 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 950 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 951 952 953 /** The execution mask depends on the conditional mask and the loop mask */ 954 #define UPDATE_EXEC_MASK(MACH) \ 955 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask 956 957 958 static const union tgsi_exec_channel ZeroVec = 959 { { 0.0, 0.0, 0.0, 0.0 } }; 960 961 static const union tgsi_exec_channel OneVec = { 962 {1.0f, 1.0f, 1.0f, 1.0f} 963 }; 964 965 static const union tgsi_exec_channel P128Vec = { 966 {128.0f, 128.0f, 128.0f, 128.0f} 967 }; 968 969 static const union tgsi_exec_channel M128Vec = { 970 {-128.0f, -128.0f, -128.0f, -128.0f} 971 }; 972 973 974 /** 975 * Assert that none of the float values in 'chan' are infinite or NaN. 976 * NaN and Inf may occur normally during program execution and should 977 * not lead to crashes, etc. But when debugging, it's helpful to catch 978 * them. 979 */ 980 static inline void 981 check_inf_or_nan(const union tgsi_exec_channel *chan) 982 { 983 assert(!util_is_inf_or_nan((chan)->f[0])); 984 assert(!util_is_inf_or_nan((chan)->f[1])); 985 assert(!util_is_inf_or_nan((chan)->f[2])); 986 assert(!util_is_inf_or_nan((chan)->f[3])); 987 } 988 989 990 #ifdef DEBUG 991 static void 992 print_chan(const char *msg, const union tgsi_exec_channel *chan) 993 { 994 debug_printf("%s = {%f, %f, %f, %f}\n", 995 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 996 } 997 #endif 998 999 1000 #ifdef DEBUG 1001 static void 1002 print_temp(const struct tgsi_exec_machine *mach, uint index) 1003 { 1004 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 1005 int i; 1006 debug_printf("Temp[%u] =\n", index); 1007 for (i = 0; i < 4; i++) { 1008 debug_printf(" %c: { %f, %f, %f, %f }\n", 1009 "XYZW"[i], 1010 tmp->xyzw[i].f[0], 1011 tmp->xyzw[i].f[1], 1012 tmp->xyzw[i].f[2], 1013 tmp->xyzw[i].f[3]); 1014 } 1015 } 1016 #endif 1017 1018 1019 void 1020 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, 1021 unsigned num_bufs, 1022 const void **bufs, 1023 const unsigned *buf_sizes) 1024 { 1025 unsigned i; 1026 1027 for (i = 0; i < num_bufs; i++) { 1028 mach->Consts[i] = bufs[i]; 1029 mach->ConstsSize[i] = buf_sizes[i]; 1030 } 1031 } 1032 1033 1034 /** 1035 * Check if there's a potential src/dst register data dependency when 1036 * using SOA execution. 1037 * Example: 1038 * MOV T, T.yxwz; 1039 * This would expand into: 1040 * MOV t0, t1; 1041 * MOV t1, t0; 1042 * MOV t2, t3; 1043 * MOV t3, t2; 1044 * The second instruction will have the wrong value for t0 if executed as-is. 1045 */ 1046 boolean 1047 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 1048 { 1049 uint i, chan; 1050 1051 uint writemask = inst->Dst[0].Register.WriteMask; 1052 if (writemask == TGSI_WRITEMASK_X || 1053 writemask == TGSI_WRITEMASK_Y || 1054 writemask == TGSI_WRITEMASK_Z || 1055 writemask == TGSI_WRITEMASK_W || 1056 writemask == TGSI_WRITEMASK_NONE) { 1057 /* no chance of data dependency */ 1058 return FALSE; 1059 } 1060 1061 /* loop over src regs */ 1062 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1063 if ((inst->Src[i].Register.File == 1064 inst->Dst[0].Register.File) && 1065 ((inst->Src[i].Register.Index == 1066 inst->Dst[0].Register.Index) || 1067 inst->Src[i].Register.Indirect || 1068 inst->Dst[0].Register.Indirect)) { 1069 /* loop over dest channels */ 1070 uint channelsWritten = 0x0; 1071 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 1072 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1073 /* check if we're reading a channel that's been written */ 1074 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); 1075 if (channelsWritten & (1 << swizzle)) { 1076 return TRUE; 1077 } 1078 1079 channelsWritten |= (1 << chan); 1080 } 1081 } 1082 } 1083 } 1084 return FALSE; 1085 } 1086 1087 1088 /** 1089 * Initialize machine state by expanding tokens to full instructions, 1090 * allocating temporary storage, setting up constants, etc. 1091 * After this, we can call tgsi_exec_machine_run() many times. 1092 */ 1093 void 1094 tgsi_exec_machine_bind_shader( 1095 struct tgsi_exec_machine *mach, 1096 const struct tgsi_token *tokens, 1097 struct tgsi_sampler *sampler, 1098 struct tgsi_image *image, 1099 struct tgsi_buffer *buffer) 1100 { 1101 uint k; 1102 struct tgsi_parse_context parse; 1103 struct tgsi_full_instruction *instructions; 1104 struct tgsi_full_declaration *declarations; 1105 uint maxInstructions = 10, numInstructions = 0; 1106 uint maxDeclarations = 10, numDeclarations = 0; 1107 1108 #if 0 1109 tgsi_dump(tokens, 0); 1110 #endif 1111 1112 util_init_math(); 1113 1114 1115 mach->Tokens = tokens; 1116 mach->Sampler = sampler; 1117 mach->Image = image; 1118 mach->Buffer = buffer; 1119 1120 if (!tokens) { 1121 /* unbind and free all */ 1122 FREE(mach->Declarations); 1123 mach->Declarations = NULL; 1124 mach->NumDeclarations = 0; 1125 1126 FREE(mach->Instructions); 1127 mach->Instructions = NULL; 1128 mach->NumInstructions = 0; 1129 1130 return; 1131 } 1132 1133 k = tgsi_parse_init (&parse, mach->Tokens); 1134 if (k != TGSI_PARSE_OK) { 1135 debug_printf( "Problem parsing!\n" ); 1136 return; 1137 } 1138 1139 mach->ImmLimit = 0; 1140 mach->NumOutputs = 0; 1141 1142 for (k = 0; k < TGSI_SEMANTIC_COUNT; k++) 1143 mach->SysSemanticToIndex[k] = -1; 1144 1145 if (mach->ShaderType == PIPE_SHADER_GEOMETRY && 1146 !mach->UsedGeometryShader) { 1147 struct tgsi_exec_vector *inputs; 1148 struct tgsi_exec_vector *outputs; 1149 1150 inputs = align_malloc(sizeof(struct tgsi_exec_vector) * 1151 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS, 1152 16); 1153 1154 if (!inputs) 1155 return; 1156 1157 outputs = align_malloc(sizeof(struct tgsi_exec_vector) * 1158 TGSI_MAX_TOTAL_VERTICES, 16); 1159 1160 if (!outputs) { 1161 align_free(inputs); 1162 return; 1163 } 1164 1165 align_free(mach->Inputs); 1166 align_free(mach->Outputs); 1167 1168 mach->Inputs = inputs; 1169 mach->Outputs = outputs; 1170 mach->UsedGeometryShader = TRUE; 1171 } 1172 1173 declarations = (struct tgsi_full_declaration *) 1174 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 1175 1176 if (!declarations) { 1177 return; 1178 } 1179 1180 instructions = (struct tgsi_full_instruction *) 1181 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 1182 1183 if (!instructions) { 1184 FREE( declarations ); 1185 return; 1186 } 1187 1188 while( !tgsi_parse_end_of_tokens( &parse ) ) { 1189 uint i; 1190 1191 tgsi_parse_token( &parse ); 1192 switch( parse.FullToken.Token.Type ) { 1193 case TGSI_TOKEN_TYPE_DECLARATION: 1194 /* save expanded declaration */ 1195 if (numDeclarations == maxDeclarations) { 1196 declarations = REALLOC(declarations, 1197 maxDeclarations 1198 * sizeof(struct tgsi_full_declaration), 1199 (maxDeclarations + 10) 1200 * sizeof(struct tgsi_full_declaration)); 1201 maxDeclarations += 10; 1202 } 1203 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { 1204 unsigned reg; 1205 for (reg = parse.FullToken.FullDeclaration.Range.First; 1206 reg <= parse.FullToken.FullDeclaration.Range.Last; 1207 ++reg) { 1208 ++mach->NumOutputs; 1209 } 1210 } 1211 else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1212 const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; 1213 mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First; 1214 } 1215 1216 memcpy(declarations + numDeclarations, 1217 &parse.FullToken.FullDeclaration, 1218 sizeof(declarations[0])); 1219 numDeclarations++; 1220 break; 1221 1222 case TGSI_TOKEN_TYPE_IMMEDIATE: 1223 { 1224 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1225 assert( size <= 4 ); 1226 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); 1227 1228 for( i = 0; i < size; i++ ) { 1229 mach->Imms[mach->ImmLimit][i] = 1230 parse.FullToken.FullImmediate.u[i].Float; 1231 } 1232 mach->ImmLimit += 1; 1233 } 1234 break; 1235 1236 case TGSI_TOKEN_TYPE_INSTRUCTION: 1237 1238 /* save expanded instruction */ 1239 if (numInstructions == maxInstructions) { 1240 instructions = REALLOC(instructions, 1241 maxInstructions 1242 * sizeof(struct tgsi_full_instruction), 1243 (maxInstructions + 10) 1244 * sizeof(struct tgsi_full_instruction)); 1245 maxInstructions += 10; 1246 } 1247 1248 memcpy(instructions + numInstructions, 1249 &parse.FullToken.FullInstruction, 1250 sizeof(instructions[0])); 1251 1252 numInstructions++; 1253 break; 1254 1255 case TGSI_TOKEN_TYPE_PROPERTY: 1256 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { 1257 if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) { 1258 mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data; 1259 } 1260 } 1261 break; 1262 1263 default: 1264 assert( 0 ); 1265 } 1266 } 1267 tgsi_parse_free (&parse); 1268 1269 FREE(mach->Declarations); 1270 mach->Declarations = declarations; 1271 mach->NumDeclarations = numDeclarations; 1272 1273 FREE(mach->Instructions); 1274 mach->Instructions = instructions; 1275 mach->NumInstructions = numInstructions; 1276 } 1277 1278 1279 struct tgsi_exec_machine * 1280 tgsi_exec_machine_create(enum pipe_shader_type shader_type) 1281 { 1282 struct tgsi_exec_machine *mach; 1283 uint i; 1284 1285 mach = align_malloc( sizeof *mach, 16 ); 1286 if (!mach) 1287 goto fail; 1288 1289 memset(mach, 0, sizeof(*mach)); 1290 1291 mach->ShaderType = shader_type; 1292 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 1293 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; 1294 1295 if (shader_type != PIPE_SHADER_COMPUTE) { 1296 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16); 1297 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16); 1298 if (!mach->Inputs || !mach->Outputs) 1299 goto fail; 1300 } 1301 1302 /* Setup constants needed by the SSE2 executor. */ 1303 for( i = 0; i < 4; i++ ) { 1304 mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000; 1305 mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF; 1306 mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000; 1307 mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */ 1308 mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f; 1309 mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */ 1310 mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f; 1311 mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f; 1312 mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f; 1313 mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f; 1314 } 1315 1316 #ifdef DEBUG 1317 /* silence warnings */ 1318 (void) print_chan; 1319 (void) print_temp; 1320 #endif 1321 1322 return mach; 1323 1324 fail: 1325 if (mach) { 1326 align_free(mach->Inputs); 1327 align_free(mach->Outputs); 1328 align_free(mach); 1329 } 1330 return NULL; 1331 } 1332 1333 1334 void 1335 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 1336 { 1337 if (mach) { 1338 FREE(mach->Instructions); 1339 FREE(mach->Declarations); 1340 1341 align_free(mach->Inputs); 1342 align_free(mach->Outputs); 1343 1344 align_free(mach); 1345 } 1346 } 1347 1348 static void 1349 micro_add(union tgsi_exec_channel *dst, 1350 const union tgsi_exec_channel *src0, 1351 const union tgsi_exec_channel *src1) 1352 { 1353 dst->f[0] = src0->f[0] + src1->f[0]; 1354 dst->f[1] = src0->f[1] + src1->f[1]; 1355 dst->f[2] = src0->f[2] + src1->f[2]; 1356 dst->f[3] = src0->f[3] + src1->f[3]; 1357 } 1358 1359 static void 1360 micro_div( 1361 union tgsi_exec_channel *dst, 1362 const union tgsi_exec_channel *src0, 1363 const union tgsi_exec_channel *src1 ) 1364 { 1365 if (src1->f[0] != 0) { 1366 dst->f[0] = src0->f[0] / src1->f[0]; 1367 } 1368 if (src1->f[1] != 0) { 1369 dst->f[1] = src0->f[1] / src1->f[1]; 1370 } 1371 if (src1->f[2] != 0) { 1372 dst->f[2] = src0->f[2] / src1->f[2]; 1373 } 1374 if (src1->f[3] != 0) { 1375 dst->f[3] = src0->f[3] / src1->f[3]; 1376 } 1377 } 1378 1379 static void 1380 micro_lt( 1381 union tgsi_exec_channel *dst, 1382 const union tgsi_exec_channel *src0, 1383 const union tgsi_exec_channel *src1, 1384 const union tgsi_exec_channel *src2, 1385 const union tgsi_exec_channel *src3 ) 1386 { 1387 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 1388 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 1389 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 1390 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 1391 } 1392 1393 static void 1394 micro_max(union tgsi_exec_channel *dst, 1395 const union tgsi_exec_channel *src0, 1396 const union tgsi_exec_channel *src1) 1397 { 1398 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 1399 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 1400 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 1401 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 1402 } 1403 1404 static void 1405 micro_min(union tgsi_exec_channel *dst, 1406 const union tgsi_exec_channel *src0, 1407 const union tgsi_exec_channel *src1) 1408 { 1409 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 1410 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 1411 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 1412 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 1413 } 1414 1415 static void 1416 micro_mul(union tgsi_exec_channel *dst, 1417 const union tgsi_exec_channel *src0, 1418 const union tgsi_exec_channel *src1) 1419 { 1420 dst->f[0] = src0->f[0] * src1->f[0]; 1421 dst->f[1] = src0->f[1] * src1->f[1]; 1422 dst->f[2] = src0->f[2] * src1->f[2]; 1423 dst->f[3] = src0->f[3] * src1->f[3]; 1424 } 1425 1426 static void 1427 micro_neg( 1428 union tgsi_exec_channel *dst, 1429 const union tgsi_exec_channel *src ) 1430 { 1431 dst->f[0] = -src->f[0]; 1432 dst->f[1] = -src->f[1]; 1433 dst->f[2] = -src->f[2]; 1434 dst->f[3] = -src->f[3]; 1435 } 1436 1437 static void 1438 micro_pow( 1439 union tgsi_exec_channel *dst, 1440 const union tgsi_exec_channel *src0, 1441 const union tgsi_exec_channel *src1 ) 1442 { 1443 #if FAST_MATH 1444 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 1445 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 1446 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 1447 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 1448 #else 1449 dst->f[0] = powf( src0->f[0], src1->f[0] ); 1450 dst->f[1] = powf( src0->f[1], src1->f[1] ); 1451 dst->f[2] = powf( src0->f[2], src1->f[2] ); 1452 dst->f[3] = powf( src0->f[3], src1->f[3] ); 1453 #endif 1454 } 1455 1456 static void 1457 micro_ldexp(union tgsi_exec_channel *dst, 1458 const union tgsi_exec_channel *src0, 1459 const union tgsi_exec_channel *src1) 1460 { 1461 dst->f[0] = ldexpf(src0->f[0], src1->i[0]); 1462 dst->f[1] = ldexpf(src0->f[1], src1->i[1]); 1463 dst->f[2] = ldexpf(src0->f[2], src1->i[2]); 1464 dst->f[3] = ldexpf(src0->f[3], src1->i[3]); 1465 } 1466 1467 static void 1468 micro_sub(union tgsi_exec_channel *dst, 1469 const union tgsi_exec_channel *src0, 1470 const union tgsi_exec_channel *src1) 1471 { 1472 dst->f[0] = src0->f[0] - src1->f[0]; 1473 dst->f[1] = src0->f[1] - src1->f[1]; 1474 dst->f[2] = src0->f[2] - src1->f[2]; 1475 dst->f[3] = src0->f[3] - src1->f[3]; 1476 } 1477 1478 static void 1479 fetch_src_file_channel(const struct tgsi_exec_machine *mach, 1480 const uint chan_index, 1481 const uint file, 1482 const uint swizzle, 1483 const union tgsi_exec_channel *index, 1484 const union tgsi_exec_channel *index2D, 1485 union tgsi_exec_channel *chan) 1486 { 1487 uint i; 1488 1489 assert(swizzle < 4); 1490 1491 switch (file) { 1492 case TGSI_FILE_CONSTANT: 1493 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1494 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS); 1495 assert(mach->Consts[index2D->i[i]]); 1496 1497 if (index->i[i] < 0) { 1498 chan->u[i] = 0; 1499 } else { 1500 /* NOTE: copying the const value as a uint instead of float */ 1501 const uint constbuf = index2D->i[i]; 1502 const uint *buf = (const uint *)mach->Consts[constbuf]; 1503 const int pos = index->i[i] * 4 + swizzle; 1504 /* const buffer bounds check */ 1505 if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) { 1506 if (0) { 1507 /* Debug: print warning */ 1508 static int count = 0; 1509 if (count++ < 100) 1510 debug_printf("TGSI Exec: const buffer index %d" 1511 " out of bounds\n", pos); 1512 } 1513 chan->u[i] = 0; 1514 } 1515 else 1516 chan->u[i] = buf[pos]; 1517 } 1518 } 1519 break; 1520 1521 case TGSI_FILE_INPUT: 1522 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1523 /* 1524 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 1525 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", 1526 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], 1527 index2D->i[i], index->i[i]); 1528 }*/ 1529 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; 1530 assert(pos >= 0); 1531 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); 1532 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; 1533 } 1534 break; 1535 1536 case TGSI_FILE_SYSTEM_VALUE: 1537 /* XXX no swizzling at this point. Will be needed if we put 1538 * gl_FragCoord, for example, in a sys value register. 1539 */ 1540 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1541 chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i]; 1542 } 1543 break; 1544 1545 case TGSI_FILE_TEMPORARY: 1546 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1547 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); 1548 assert(index2D->i[i] == 0); 1549 1550 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; 1551 } 1552 break; 1553 1554 case TGSI_FILE_IMMEDIATE: 1555 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1556 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); 1557 assert(index2D->i[i] == 0); 1558 1559 chan->f[i] = mach->Imms[index->i[i]][swizzle]; 1560 } 1561 break; 1562 1563 case TGSI_FILE_ADDRESS: 1564 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1565 assert(index->i[i] >= 0); 1566 assert(index2D->i[i] == 0); 1567 1568 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; 1569 } 1570 break; 1571 1572 case TGSI_FILE_OUTPUT: 1573 /* vertex/fragment output vars can be read too */ 1574 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1575 assert(index->i[i] >= 0); 1576 assert(index2D->i[i] == 0); 1577 1578 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; 1579 } 1580 break; 1581 1582 default: 1583 assert(0); 1584 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1585 chan->u[i] = 0; 1586 } 1587 } 1588 } 1589 1590 static void 1591 fetch_source_d(const struct tgsi_exec_machine *mach, 1592 union tgsi_exec_channel *chan, 1593 const struct tgsi_full_src_register *reg, 1594 const uint chan_index, 1595 enum tgsi_exec_datatype src_datatype) 1596 { 1597 union tgsi_exec_channel index; 1598 union tgsi_exec_channel index2D; 1599 uint swizzle; 1600 1601 /* We start with a direct index into a register file. 1602 * 1603 * file[1], 1604 * where: 1605 * file = Register.File 1606 * [1] = Register.Index 1607 */ 1608 index.i[0] = 1609 index.i[1] = 1610 index.i[2] = 1611 index.i[3] = reg->Register.Index; 1612 1613 /* There is an extra source register that indirectly subscripts 1614 * a register file. The direct index now becomes an offset 1615 * that is being added to the indirect register. 1616 * 1617 * file[ind[2].x+1], 1618 * where: 1619 * ind = Indirect.File 1620 * [2] = Indirect.Index 1621 * .x = Indirect.SwizzleX 1622 */ 1623 if (reg->Register.Indirect) { 1624 union tgsi_exec_channel index2; 1625 union tgsi_exec_channel indir_index; 1626 const uint execmask = mach->ExecMask; 1627 uint i; 1628 1629 /* which address register (always zero now) */ 1630 index2.i[0] = 1631 index2.i[1] = 1632 index2.i[2] = 1633 index2.i[3] = reg->Indirect.Index; 1634 /* get current value of address register[swizzle] */ 1635 swizzle = reg->Indirect.Swizzle; 1636 fetch_src_file_channel(mach, 1637 chan_index, 1638 reg->Indirect.File, 1639 swizzle, 1640 &index2, 1641 &ZeroVec, 1642 &indir_index); 1643 1644 /* add value of address register to the offset */ 1645 index.i[0] += indir_index.i[0]; 1646 index.i[1] += indir_index.i[1]; 1647 index.i[2] += indir_index.i[2]; 1648 index.i[3] += indir_index.i[3]; 1649 1650 /* for disabled execution channels, zero-out the index to 1651 * avoid using a potential garbage value. 1652 */ 1653 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1654 if ((execmask & (1 << i)) == 0) 1655 index.i[i] = 0; 1656 } 1657 } 1658 1659 /* There is an extra source register that is a second 1660 * subscript to a register file. Effectively it means that 1661 * the register file is actually a 2D array of registers. 1662 * 1663 * file[3][1], 1664 * where: 1665 * [3] = Dimension.Index 1666 */ 1667 if (reg->Register.Dimension) { 1668 index2D.i[0] = 1669 index2D.i[1] = 1670 index2D.i[2] = 1671 index2D.i[3] = reg->Dimension.Index; 1672 1673 /* Again, the second subscript index can be addressed indirectly 1674 * identically to the first one. 1675 * Nothing stops us from indirectly addressing the indirect register, 1676 * but there is no need for that, so we won't exercise it. 1677 * 1678 * file[ind[4].y+3][1], 1679 * where: 1680 * ind = DimIndirect.File 1681 * [4] = DimIndirect.Index 1682 * .y = DimIndirect.SwizzleX 1683 */ 1684 if (reg->Dimension.Indirect) { 1685 union tgsi_exec_channel index2; 1686 union tgsi_exec_channel indir_index; 1687 const uint execmask = mach->ExecMask; 1688 uint i; 1689 1690 index2.i[0] = 1691 index2.i[1] = 1692 index2.i[2] = 1693 index2.i[3] = reg->DimIndirect.Index; 1694 1695 swizzle = reg->DimIndirect.Swizzle; 1696 fetch_src_file_channel(mach, 1697 chan_index, 1698 reg->DimIndirect.File, 1699 swizzle, 1700 &index2, 1701 &ZeroVec, 1702 &indir_index); 1703 1704 index2D.i[0] += indir_index.i[0]; 1705 index2D.i[1] += indir_index.i[1]; 1706 index2D.i[2] += indir_index.i[2]; 1707 index2D.i[3] += indir_index.i[3]; 1708 1709 /* for disabled execution channels, zero-out the index to 1710 * avoid using a potential garbage value. 1711 */ 1712 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1713 if ((execmask & (1 << i)) == 0) { 1714 index2D.i[i] = 0; 1715 } 1716 } 1717 } 1718 1719 /* If by any chance there was a need for a 3D array of register 1720 * files, we would have to check whether Dimension is followed 1721 * by a dimension register and continue the saga. 1722 */ 1723 } else { 1724 index2D.i[0] = 1725 index2D.i[1] = 1726 index2D.i[2] = 1727 index2D.i[3] = 0; 1728 } 1729 1730 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1731 fetch_src_file_channel(mach, 1732 chan_index, 1733 reg->Register.File, 1734 swizzle, 1735 &index, 1736 &index2D, 1737 chan); 1738 } 1739 1740 static void 1741 fetch_source(const struct tgsi_exec_machine *mach, 1742 union tgsi_exec_channel *chan, 1743 const struct tgsi_full_src_register *reg, 1744 const uint chan_index, 1745 enum tgsi_exec_datatype src_datatype) 1746 { 1747 fetch_source_d(mach, chan, reg, chan_index, src_datatype); 1748 1749 if (reg->Register.Absolute) { 1750 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1751 micro_abs(chan, chan); 1752 } else { 1753 micro_iabs(chan, chan); 1754 } 1755 } 1756 1757 if (reg->Register.Negate) { 1758 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1759 micro_neg(chan, chan); 1760 } else { 1761 micro_ineg(chan, chan); 1762 } 1763 } 1764 } 1765 1766 static union tgsi_exec_channel * 1767 store_dest_dstret(struct tgsi_exec_machine *mach, 1768 const union tgsi_exec_channel *chan, 1769 const struct tgsi_full_dst_register *reg, 1770 const struct tgsi_full_instruction *inst, 1771 uint chan_index, 1772 enum tgsi_exec_datatype dst_datatype) 1773 { 1774 static union tgsi_exec_channel null; 1775 union tgsi_exec_channel *dst; 1776 union tgsi_exec_channel index2D; 1777 int offset = 0; /* indirection offset */ 1778 int index; 1779 1780 /* for debugging */ 1781 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) { 1782 check_inf_or_nan(chan); 1783 } 1784 1785 /* There is an extra source register that indirectly subscripts 1786 * a register file. The direct index now becomes an offset 1787 * that is being added to the indirect register. 1788 * 1789 * file[ind[2].x+1], 1790 * where: 1791 * ind = Indirect.File 1792 * [2] = Indirect.Index 1793 * .x = Indirect.SwizzleX 1794 */ 1795 if (reg->Register.Indirect) { 1796 union tgsi_exec_channel index; 1797 union tgsi_exec_channel indir_index; 1798 uint swizzle; 1799 1800 /* which address register (always zero for now) */ 1801 index.i[0] = 1802 index.i[1] = 1803 index.i[2] = 1804 index.i[3] = reg->Indirect.Index; 1805 1806 /* get current value of address register[swizzle] */ 1807 swizzle = reg->Indirect.Swizzle; 1808 1809 /* fetch values from the address/indirection register */ 1810 fetch_src_file_channel(mach, 1811 chan_index, 1812 reg->Indirect.File, 1813 swizzle, 1814 &index, 1815 &ZeroVec, 1816 &indir_index); 1817 1818 /* save indirection offset */ 1819 offset = indir_index.i[0]; 1820 } 1821 1822 /* There is an extra source register that is a second 1823 * subscript to a register file. Effectively it means that 1824 * the register file is actually a 2D array of registers. 1825 * 1826 * file[3][1], 1827 * where: 1828 * [3] = Dimension.Index 1829 */ 1830 if (reg->Register.Dimension) { 1831 index2D.i[0] = 1832 index2D.i[1] = 1833 index2D.i[2] = 1834 index2D.i[3] = reg->Dimension.Index; 1835 1836 /* Again, the second subscript index can be addressed indirectly 1837 * identically to the first one. 1838 * Nothing stops us from indirectly addressing the indirect register, 1839 * but there is no need for that, so we won't exercise it. 1840 * 1841 * file[ind[4].y+3][1], 1842 * where: 1843 * ind = DimIndirect.File 1844 * [4] = DimIndirect.Index 1845 * .y = DimIndirect.SwizzleX 1846 */ 1847 if (reg->Dimension.Indirect) { 1848 union tgsi_exec_channel index2; 1849 union tgsi_exec_channel indir_index; 1850 const uint execmask = mach->ExecMask; 1851 unsigned swizzle; 1852 uint i; 1853 1854 index2.i[0] = 1855 index2.i[1] = 1856 index2.i[2] = 1857 index2.i[3] = reg->DimIndirect.Index; 1858 1859 swizzle = reg->DimIndirect.Swizzle; 1860 fetch_src_file_channel(mach, 1861 chan_index, 1862 reg->DimIndirect.File, 1863 swizzle, 1864 &index2, 1865 &ZeroVec, 1866 &indir_index); 1867 1868 index2D.i[0] += indir_index.i[0]; 1869 index2D.i[1] += indir_index.i[1]; 1870 index2D.i[2] += indir_index.i[2]; 1871 index2D.i[3] += indir_index.i[3]; 1872 1873 /* for disabled execution channels, zero-out the index to 1874 * avoid using a potential garbage value. 1875 */ 1876 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1877 if ((execmask & (1 << i)) == 0) { 1878 index2D.i[i] = 0; 1879 } 1880 } 1881 } 1882 1883 /* If by any chance there was a need for a 3D array of register 1884 * files, we would have to check whether Dimension is followed 1885 * by a dimension register and continue the saga. 1886 */ 1887 } else { 1888 index2D.i[0] = 1889 index2D.i[1] = 1890 index2D.i[2] = 1891 index2D.i[3] = 0; 1892 } 1893 1894 switch (reg->Register.File) { 1895 case TGSI_FILE_NULL: 1896 dst = &null; 1897 break; 1898 1899 case TGSI_FILE_OUTPUT: 1900 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1901 + reg->Register.Index; 1902 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1903 #if 0 1904 debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n", 1905 mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0], 1906 reg->Register.Index); 1907 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 1908 debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask); 1909 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1910 if (execmask & (1 << i)) 1911 debug_printf("%f, ", chan->f[i]); 1912 debug_printf(")\n"); 1913 } 1914 #endif 1915 break; 1916 1917 case TGSI_FILE_TEMPORARY: 1918 index = reg->Register.Index; 1919 assert( index < TGSI_EXEC_NUM_TEMPS ); 1920 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1921 break; 1922 1923 case TGSI_FILE_ADDRESS: 1924 index = reg->Register.Index; 1925 dst = &mach->Addrs[index].xyzw[chan_index]; 1926 break; 1927 1928 default: 1929 assert( 0 ); 1930 return NULL; 1931 } 1932 1933 return dst; 1934 } 1935 1936 static void 1937 store_dest_double(struct tgsi_exec_machine *mach, 1938 const union tgsi_exec_channel *chan, 1939 const struct tgsi_full_dst_register *reg, 1940 const struct tgsi_full_instruction *inst, 1941 uint chan_index, 1942 enum tgsi_exec_datatype dst_datatype) 1943 { 1944 union tgsi_exec_channel *dst; 1945 const uint execmask = mach->ExecMask; 1946 int i; 1947 1948 dst = store_dest_dstret(mach, chan, reg, inst, chan_index, 1949 dst_datatype); 1950 if (!dst) 1951 return; 1952 1953 /* doubles path */ 1954 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1955 if (execmask & (1 << i)) 1956 dst->i[i] = chan->i[i]; 1957 } 1958 1959 static void 1960 store_dest(struct tgsi_exec_machine *mach, 1961 const union tgsi_exec_channel *chan, 1962 const struct tgsi_full_dst_register *reg, 1963 const struct tgsi_full_instruction *inst, 1964 uint chan_index, 1965 enum tgsi_exec_datatype dst_datatype) 1966 { 1967 union tgsi_exec_channel *dst; 1968 const uint execmask = mach->ExecMask; 1969 int i; 1970 1971 dst = store_dest_dstret(mach, chan, reg, inst, chan_index, 1972 dst_datatype); 1973 if (!dst) 1974 return; 1975 1976 if (!inst->Instruction.Saturate) { 1977 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1978 if (execmask & (1 << i)) 1979 dst->i[i] = chan->i[i]; 1980 } 1981 else { 1982 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1983 if (execmask & (1 << i)) { 1984 if (chan->f[i] < 0.0f) 1985 dst->f[i] = 0.0f; 1986 else if (chan->f[i] > 1.0f) 1987 dst->f[i] = 1.0f; 1988 else 1989 dst->i[i] = chan->i[i]; 1990 } 1991 } 1992 } 1993 1994 #define FETCH(VAL,INDEX,CHAN)\ 1995 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) 1996 1997 #define IFETCH(VAL,INDEX,CHAN)\ 1998 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT) 1999 2000 2001 /** 2002 * Execute ARB-style KIL which is predicated by a src register. 2003 * Kill fragment if any of the four values is less than zero. 2004 */ 2005 static void 2006 exec_kill_if(struct tgsi_exec_machine *mach, 2007 const struct tgsi_full_instruction *inst) 2008 { 2009 uint uniquemask; 2010 uint chan_index; 2011 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 2012 union tgsi_exec_channel r[1]; 2013 2014 /* This mask stores component bits that were already tested. */ 2015 uniquemask = 0; 2016 2017 for (chan_index = 0; chan_index < 4; chan_index++) 2018 { 2019 uint swizzle; 2020 uint i; 2021 2022 /* unswizzle channel */ 2023 swizzle = tgsi_util_get_full_src_register_swizzle ( 2024 &inst->Src[0], 2025 chan_index); 2026 2027 /* check if the component has not been already tested */ 2028 if (uniquemask & (1 << swizzle)) 2029 continue; 2030 uniquemask |= 1 << swizzle; 2031 2032 FETCH(&r[0], 0, chan_index); 2033 for (i = 0; i < 4; i++) 2034 if (r[0].f[i] < 0.0f) 2035 kilmask |= 1 << i; 2036 } 2037 2038 /* restrict to fragments currently executing */ 2039 kilmask &= mach->ExecMask; 2040 2041 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 2042 } 2043 2044 /** 2045 * Unconditional fragment kill/discard. 2046 */ 2047 static void 2048 exec_kill(struct tgsi_exec_machine *mach, 2049 const struct tgsi_full_instruction *inst) 2050 { 2051 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 2052 2053 /* kill fragment for all fragments currently executing */ 2054 kilmask = mach->ExecMask; 2055 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 2056 } 2057 2058 static void 2059 emit_vertex(struct tgsi_exec_machine *mach) 2060 { 2061 /* FIXME: check for exec mask correctly 2062 unsigned i; 2063 for (i = 0; i < TGSI_QUAD_SIZE; ++i) { 2064 if ((mach->ExecMask & (1 << i))) 2065 */ 2066 if (mach->ExecMask) { 2067 if (mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] >= mach->MaxOutputVertices) 2068 return; 2069 2070 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; 2071 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 2072 } 2073 } 2074 2075 static void 2076 emit_primitive(struct tgsi_exec_machine *mach) 2077 { 2078 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; 2079 /* FIXME: check for exec mask correctly 2080 unsigned i; 2081 for (i = 0; i < TGSI_QUAD_SIZE; ++i) { 2082 if ((mach->ExecMask & (1 << i))) 2083 */ 2084 if (mach->ExecMask) { 2085 ++(*prim_count); 2086 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); 2087 mach->Primitives[*prim_count] = 0; 2088 } 2089 } 2090 2091 static void 2092 conditional_emit_primitive(struct tgsi_exec_machine *mach) 2093 { 2094 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 2095 int emitted_verts = 2096 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]; 2097 if (emitted_verts) { 2098 emit_primitive(mach); 2099 } 2100 } 2101 } 2102 2103 2104 /* 2105 * Fetch four texture samples using STR texture coordinates. 2106 */ 2107 static void 2108 fetch_texel( struct tgsi_sampler *sampler, 2109 const unsigned sview_idx, 2110 const unsigned sampler_idx, 2111 const union tgsi_exec_channel *s, 2112 const union tgsi_exec_channel *t, 2113 const union tgsi_exec_channel *p, 2114 const union tgsi_exec_channel *c0, 2115 const union tgsi_exec_channel *c1, 2116 float derivs[3][2][TGSI_QUAD_SIZE], 2117 const int8_t offset[3], 2118 enum tgsi_sampler_control control, 2119 union tgsi_exec_channel *r, 2120 union tgsi_exec_channel *g, 2121 union tgsi_exec_channel *b, 2122 union tgsi_exec_channel *a ) 2123 { 2124 uint j; 2125 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 2126 2127 /* FIXME: handle explicit derivs, offsets */ 2128 sampler->get_samples(sampler, sview_idx, sampler_idx, 2129 s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba); 2130 2131 for (j = 0; j < 4; j++) { 2132 r->f[j] = rgba[0][j]; 2133 g->f[j] = rgba[1][j]; 2134 b->f[j] = rgba[2][j]; 2135 a->f[j] = rgba[3][j]; 2136 } 2137 } 2138 2139 2140 #define TEX_MODIFIER_NONE 0 2141 #define TEX_MODIFIER_PROJECTED 1 2142 #define TEX_MODIFIER_LOD_BIAS 2 2143 #define TEX_MODIFIER_EXPLICIT_LOD 3 2144 #define TEX_MODIFIER_LEVEL_ZERO 4 2145 #define TEX_MODIFIER_GATHER 5 2146 2147 /* 2148 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array. 2149 */ 2150 static void 2151 fetch_texel_offsets(struct tgsi_exec_machine *mach, 2152 const struct tgsi_full_instruction *inst, 2153 int8_t offsets[3]) 2154 { 2155 if (inst->Texture.NumOffsets == 1) { 2156 union tgsi_exec_channel index; 2157 union tgsi_exec_channel offset[3]; 2158 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index; 2159 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, 2160 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]); 2161 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, 2162 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]); 2163 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, 2164 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]); 2165 offsets[0] = offset[0].i[0]; 2166 offsets[1] = offset[1].i[0]; 2167 offsets[2] = offset[2].i[0]; 2168 } else { 2169 assert(inst->Texture.NumOffsets == 0); 2170 offsets[0] = offsets[1] = offsets[2] = 0; 2171 } 2172 } 2173 2174 2175 /* 2176 * Fetch dx and dy values for one channel (s, t or r). 2177 * Put dx values into one float array, dy values into another. 2178 */ 2179 static void 2180 fetch_assign_deriv_channel(struct tgsi_exec_machine *mach, 2181 const struct tgsi_full_instruction *inst, 2182 unsigned regdsrcx, 2183 unsigned chan, 2184 float derivs[2][TGSI_QUAD_SIZE]) 2185 { 2186 union tgsi_exec_channel d; 2187 FETCH(&d, regdsrcx, chan); 2188 derivs[0][0] = d.f[0]; 2189 derivs[0][1] = d.f[1]; 2190 derivs[0][2] = d.f[2]; 2191 derivs[0][3] = d.f[3]; 2192 FETCH(&d, regdsrcx + 1, chan); 2193 derivs[1][0] = d.f[0]; 2194 derivs[1][1] = d.f[1]; 2195 derivs[1][2] = d.f[2]; 2196 derivs[1][3] = d.f[3]; 2197 } 2198 2199 static uint 2200 fetch_sampler_unit(struct tgsi_exec_machine *mach, 2201 const struct tgsi_full_instruction *inst, 2202 uint sampler) 2203 { 2204 uint unit = 0; 2205 int i; 2206 if (inst->Src[sampler].Register.Indirect) { 2207 const struct tgsi_full_src_register *reg = &inst->Src[sampler]; 2208 union tgsi_exec_channel indir_index, index2; 2209 const uint execmask = mach->ExecMask; 2210 index2.i[0] = 2211 index2.i[1] = 2212 index2.i[2] = 2213 index2.i[3] = reg->Indirect.Index; 2214 2215 fetch_src_file_channel(mach, 2216 0, 2217 reg->Indirect.File, 2218 reg->Indirect.Swizzle, 2219 &index2, 2220 &ZeroVec, 2221 &indir_index); 2222 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2223 if (execmask & (1 << i)) { 2224 unit = inst->Src[sampler].Register.Index + indir_index.i[i]; 2225 break; 2226 } 2227 } 2228 2229 } else { 2230 unit = inst->Src[sampler].Register.Index; 2231 } 2232 return unit; 2233 } 2234 2235 /* 2236 * execute a texture instruction. 2237 * 2238 * modifier is used to control the channel routing for the 2239 * instruction variants like proj, lod, and texture with lod bias. 2240 * sampler indicates which src register the sampler is contained in. 2241 */ 2242 static void 2243 exec_tex(struct tgsi_exec_machine *mach, 2244 const struct tgsi_full_instruction *inst, 2245 uint modifier, uint sampler) 2246 { 2247 const union tgsi_exec_channel *args[5], *proj = NULL; 2248 union tgsi_exec_channel r[5]; 2249 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; 2250 uint chan; 2251 uint unit; 2252 int8_t offsets[3]; 2253 int dim, shadow_ref, i; 2254 2255 unit = fetch_sampler_unit(mach, inst, sampler); 2256 /* always fetch all 3 offsets, overkill but keeps code simple */ 2257 fetch_texel_offsets(mach, inst, offsets); 2258 2259 assert(modifier != TEX_MODIFIER_LEVEL_ZERO); 2260 assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER); 2261 2262 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); 2263 shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture); 2264 2265 assert(dim <= 4); 2266 if (shadow_ref >= 0) 2267 assert(shadow_ref >= dim && shadow_ref < ARRAY_SIZE(args)); 2268 2269 /* fetch modifier to the last argument */ 2270 if (modifier != TEX_MODIFIER_NONE) { 2271 const int last = ARRAY_SIZE(args) - 1; 2272 2273 /* fetch modifier from src0.w or src1.x */ 2274 if (sampler == 1) { 2275 assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W); 2276 FETCH(&r[last], 0, TGSI_CHAN_W); 2277 } 2278 else { 2279 assert(shadow_ref != 4); 2280 FETCH(&r[last], 1, TGSI_CHAN_X); 2281 } 2282 2283 if (modifier != TEX_MODIFIER_PROJECTED) { 2284 args[last] = &r[last]; 2285 } 2286 else { 2287 proj = &r[last]; 2288 args[last] = &ZeroVec; 2289 } 2290 2291 /* point unused arguments to zero vector */ 2292 for (i = dim; i < last; i++) 2293 args[i] = &ZeroVec; 2294 2295 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) 2296 control = TGSI_SAMPLER_LOD_EXPLICIT; 2297 else if (modifier == TEX_MODIFIER_LOD_BIAS) 2298 control = TGSI_SAMPLER_LOD_BIAS; 2299 else if (modifier == TEX_MODIFIER_GATHER) 2300 control = TGSI_SAMPLER_GATHER; 2301 } 2302 else { 2303 for (i = dim; i < ARRAY_SIZE(args); i++) 2304 args[i] = &ZeroVec; 2305 } 2306 2307 /* fetch coordinates */ 2308 for (i = 0; i < dim; i++) { 2309 FETCH(&r[i], 0, TGSI_CHAN_X + i); 2310 2311 if (proj) 2312 micro_div(&r[i], &r[i], proj); 2313 2314 args[i] = &r[i]; 2315 } 2316 2317 /* fetch reference value */ 2318 if (shadow_ref >= 0) { 2319 FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4)); 2320 2321 if (proj) 2322 micro_div(&r[shadow_ref], &r[shadow_ref], proj); 2323 2324 args[shadow_ref] = &r[shadow_ref]; 2325 } 2326 2327 fetch_texel(mach->Sampler, unit, unit, 2328 args[0], args[1], args[2], args[3], args[4], 2329 NULL, offsets, control, 2330 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2331 2332 #if 0 2333 debug_printf("fetch r: %g %g %g %g\n", 2334 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]); 2335 debug_printf("fetch g: %g %g %g %g\n", 2336 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]); 2337 debug_printf("fetch b: %g %g %g %g\n", 2338 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]); 2339 debug_printf("fetch a: %g %g %g %g\n", 2340 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]); 2341 #endif 2342 2343 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2344 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2345 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2346 } 2347 } 2348 } 2349 2350 static void 2351 exec_lodq(struct tgsi_exec_machine *mach, 2352 const struct tgsi_full_instruction *inst) 2353 { 2354 uint resource_unit, sampler_unit; 2355 int dim; 2356 int i; 2357 union tgsi_exec_channel coords[4]; 2358 const union tgsi_exec_channel *args[ARRAY_SIZE(coords)]; 2359 union tgsi_exec_channel r[2]; 2360 2361 resource_unit = fetch_sampler_unit(mach, inst, 1); 2362 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) { 2363 uint target = mach->SamplerViews[resource_unit].Resource; 2364 dim = tgsi_util_get_texture_coord_dim(target); 2365 sampler_unit = fetch_sampler_unit(mach, inst, 2); 2366 } else { 2367 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); 2368 sampler_unit = resource_unit; 2369 } 2370 assert(dim <= ARRAY_SIZE(coords)); 2371 /* fetch coordinates */ 2372 for (i = 0; i < dim; i++) { 2373 FETCH(&coords[i], 0, TGSI_CHAN_X + i); 2374 args[i] = &coords[i]; 2375 } 2376 for (i = dim; i < ARRAY_SIZE(coords); i++) { 2377 args[i] = &ZeroVec; 2378 } 2379 mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit, 2380 args[0]->f, 2381 args[1]->f, 2382 args[2]->f, 2383 args[3]->f, 2384 TGSI_SAMPLER_LOD_NONE, 2385 r[0].f, 2386 r[1].f); 2387 2388 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2389 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, 2390 TGSI_EXEC_DATA_FLOAT); 2391 } 2392 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2393 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, 2394 TGSI_EXEC_DATA_FLOAT); 2395 } 2396 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) { 2397 unsigned char swizzles[4]; 2398 unsigned chan; 2399 swizzles[0] = inst->Src[1].Register.SwizzleX; 2400 swizzles[1] = inst->Src[1].Register.SwizzleY; 2401 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2402 swizzles[3] = inst->Src[1].Register.SwizzleW; 2403 2404 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2405 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2406 if (swizzles[chan] >= 2) { 2407 store_dest(mach, &ZeroVec, 2408 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2409 } else { 2410 store_dest(mach, &r[swizzles[chan]], 2411 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2412 } 2413 } 2414 } 2415 } else { 2416 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2417 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, 2418 TGSI_EXEC_DATA_FLOAT); 2419 } 2420 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2421 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, 2422 TGSI_EXEC_DATA_FLOAT); 2423 } 2424 } 2425 } 2426 2427 static void 2428 exec_txd(struct tgsi_exec_machine *mach, 2429 const struct tgsi_full_instruction *inst) 2430 { 2431 union tgsi_exec_channel r[4]; 2432 float derivs[3][2][TGSI_QUAD_SIZE]; 2433 uint chan; 2434 uint unit; 2435 int8_t offsets[3]; 2436 2437 unit = fetch_sampler_unit(mach, inst, 3); 2438 /* always fetch all 3 offsets, overkill but keeps code simple */ 2439 fetch_texel_offsets(mach, inst, offsets); 2440 2441 switch (inst->Texture.Texture) { 2442 case TGSI_TEXTURE_1D: 2443 FETCH(&r[0], 0, TGSI_CHAN_X); 2444 2445 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2446 2447 fetch_texel(mach->Sampler, unit, unit, 2448 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2449 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2450 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2451 break; 2452 2453 case TGSI_TEXTURE_SHADOW1D: 2454 case TGSI_TEXTURE_1D_ARRAY: 2455 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2456 /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */ 2457 FETCH(&r[0], 0, TGSI_CHAN_X); 2458 FETCH(&r[1], 0, TGSI_CHAN_Y); 2459 FETCH(&r[2], 0, TGSI_CHAN_Z); 2460 2461 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2462 2463 fetch_texel(mach->Sampler, unit, unit, 2464 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2465 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2466 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2467 break; 2468 2469 case TGSI_TEXTURE_2D: 2470 case TGSI_TEXTURE_RECT: 2471 FETCH(&r[0], 0, TGSI_CHAN_X); 2472 FETCH(&r[1], 0, TGSI_CHAN_Y); 2473 2474 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2475 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2476 2477 fetch_texel(mach->Sampler, unit, unit, 2478 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2479 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2480 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2481 break; 2482 2483 2484 case TGSI_TEXTURE_SHADOW2D: 2485 case TGSI_TEXTURE_SHADOWRECT: 2486 case TGSI_TEXTURE_2D_ARRAY: 2487 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2488 /* only SHADOW2D_ARRAY actually needs W */ 2489 FETCH(&r[0], 0, TGSI_CHAN_X); 2490 FETCH(&r[1], 0, TGSI_CHAN_Y); 2491 FETCH(&r[2], 0, TGSI_CHAN_Z); 2492 FETCH(&r[3], 0, TGSI_CHAN_W); 2493 2494 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2495 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2496 2497 fetch_texel(mach->Sampler, unit, unit, 2498 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ 2499 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2500 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2501 break; 2502 2503 case TGSI_TEXTURE_3D: 2504 case TGSI_TEXTURE_CUBE: 2505 case TGSI_TEXTURE_CUBE_ARRAY: 2506 case TGSI_TEXTURE_SHADOWCUBE: 2507 /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */ 2508 FETCH(&r[0], 0, TGSI_CHAN_X); 2509 FETCH(&r[1], 0, TGSI_CHAN_Y); 2510 FETCH(&r[2], 0, TGSI_CHAN_Z); 2511 FETCH(&r[3], 0, TGSI_CHAN_W); 2512 2513 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2514 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2515 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]); 2516 2517 fetch_texel(mach->Sampler, unit, unit, 2518 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ 2519 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2520 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2521 break; 2522 2523 default: 2524 assert(0); 2525 } 2526 2527 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2528 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2529 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2530 } 2531 } 2532 } 2533 2534 2535 static void 2536 exec_txf(struct tgsi_exec_machine *mach, 2537 const struct tgsi_full_instruction *inst) 2538 { 2539 union tgsi_exec_channel r[4]; 2540 uint chan; 2541 uint unit; 2542 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 2543 int j; 2544 int8_t offsets[3]; 2545 unsigned target; 2546 2547 unit = fetch_sampler_unit(mach, inst, 1); 2548 /* always fetch all 3 offsets, overkill but keeps code simple */ 2549 fetch_texel_offsets(mach, inst, offsets); 2550 2551 IFETCH(&r[3], 0, TGSI_CHAN_W); 2552 2553 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || 2554 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { 2555 target = mach->SamplerViews[unit].Resource; 2556 } 2557 else { 2558 target = inst->Texture.Texture; 2559 } 2560 switch(target) { 2561 case TGSI_TEXTURE_3D: 2562 case TGSI_TEXTURE_2D_ARRAY: 2563 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2564 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2565 IFETCH(&r[2], 0, TGSI_CHAN_Z); 2566 /* fallthrough */ 2567 case TGSI_TEXTURE_2D: 2568 case TGSI_TEXTURE_RECT: 2569 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2570 case TGSI_TEXTURE_SHADOW2D: 2571 case TGSI_TEXTURE_SHADOWRECT: 2572 case TGSI_TEXTURE_1D_ARRAY: 2573 case TGSI_TEXTURE_2D_MSAA: 2574 IFETCH(&r[1], 0, TGSI_CHAN_Y); 2575 /* fallthrough */ 2576 case TGSI_TEXTURE_BUFFER: 2577 case TGSI_TEXTURE_1D: 2578 case TGSI_TEXTURE_SHADOW1D: 2579 IFETCH(&r[0], 0, TGSI_CHAN_X); 2580 break; 2581 default: 2582 assert(0); 2583 break; 2584 } 2585 2586 mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i, 2587 offsets, rgba); 2588 2589 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 2590 r[0].f[j] = rgba[0][j]; 2591 r[1].f[j] = rgba[1][j]; 2592 r[2].f[j] = rgba[2][j]; 2593 r[3].f[j] = rgba[3][j]; 2594 } 2595 2596 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || 2597 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { 2598 unsigned char swizzles[4]; 2599 swizzles[0] = inst->Src[1].Register.SwizzleX; 2600 swizzles[1] = inst->Src[1].Register.SwizzleY; 2601 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2602 swizzles[3] = inst->Src[1].Register.SwizzleW; 2603 2604 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2605 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2606 store_dest(mach, &r[swizzles[chan]], 2607 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2608 } 2609 } 2610 } 2611 else { 2612 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2613 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2614 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2615 } 2616 } 2617 } 2618 } 2619 2620 static void 2621 exec_txq(struct tgsi_exec_machine *mach, 2622 const struct tgsi_full_instruction *inst) 2623 { 2624 int result[4]; 2625 union tgsi_exec_channel r[4], src; 2626 uint chan; 2627 uint unit; 2628 int i,j; 2629 2630 unit = fetch_sampler_unit(mach, inst, 1); 2631 2632 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 2633 2634 /* XXX: This interface can't return per-pixel values */ 2635 mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result); 2636 2637 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2638 for (j = 0; j < 4; j++) { 2639 r[j].i[i] = result[j]; 2640 } 2641 } 2642 2643 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2644 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2645 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, 2646 TGSI_EXEC_DATA_INT); 2647 } 2648 } 2649 } 2650 2651 static void 2652 exec_sample(struct tgsi_exec_machine *mach, 2653 const struct tgsi_full_instruction *inst, 2654 uint modifier, boolean compare) 2655 { 2656 const uint resource_unit = inst->Src[1].Register.Index; 2657 const uint sampler_unit = inst->Src[2].Register.Index; 2658 union tgsi_exec_channel r[5], c1; 2659 const union tgsi_exec_channel *lod = &ZeroVec; 2660 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; 2661 uint chan; 2662 unsigned char swizzles[4]; 2663 int8_t offsets[3]; 2664 2665 /* always fetch all 3 offsets, overkill but keeps code simple */ 2666 fetch_texel_offsets(mach, inst, offsets); 2667 2668 assert(modifier != TEX_MODIFIER_PROJECTED); 2669 2670 if (modifier != TEX_MODIFIER_NONE) { 2671 if (modifier == TEX_MODIFIER_LOD_BIAS) { 2672 FETCH(&c1, 3, TGSI_CHAN_X); 2673 lod = &c1; 2674 control = TGSI_SAMPLER_LOD_BIAS; 2675 } 2676 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { 2677 FETCH(&c1, 3, TGSI_CHAN_X); 2678 lod = &c1; 2679 control = TGSI_SAMPLER_LOD_EXPLICIT; 2680 } 2681 else if (modifier == TEX_MODIFIER_GATHER) { 2682 control = TGSI_SAMPLER_GATHER; 2683 } 2684 else { 2685 assert(modifier == TEX_MODIFIER_LEVEL_ZERO); 2686 control = TGSI_SAMPLER_LOD_ZERO; 2687 } 2688 } 2689 2690 FETCH(&r[0], 0, TGSI_CHAN_X); 2691 2692 switch (mach->SamplerViews[resource_unit].Resource) { 2693 case TGSI_TEXTURE_1D: 2694 if (compare) { 2695 FETCH(&r[2], 3, TGSI_CHAN_X); 2696 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2697 &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ 2698 NULL, offsets, control, 2699 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2700 } 2701 else { 2702 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2703 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ 2704 NULL, offsets, control, 2705 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2706 } 2707 break; 2708 2709 case TGSI_TEXTURE_1D_ARRAY: 2710 case TGSI_TEXTURE_2D: 2711 case TGSI_TEXTURE_RECT: 2712 FETCH(&r[1], 0, TGSI_CHAN_Y); 2713 if (compare) { 2714 FETCH(&r[2], 3, TGSI_CHAN_X); 2715 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2716 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ 2717 NULL, offsets, control, 2718 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2719 } 2720 else { 2721 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2722 &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ 2723 NULL, offsets, control, 2724 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2725 } 2726 break; 2727 2728 case TGSI_TEXTURE_2D_ARRAY: 2729 case TGSI_TEXTURE_3D: 2730 case TGSI_TEXTURE_CUBE: 2731 FETCH(&r[1], 0, TGSI_CHAN_Y); 2732 FETCH(&r[2], 0, TGSI_CHAN_Z); 2733 if(compare) { 2734 FETCH(&r[3], 3, TGSI_CHAN_X); 2735 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2736 &r[0], &r[1], &r[2], &r[3], lod, 2737 NULL, offsets, control, 2738 &r[0], &r[1], &r[2], &r[3]); 2739 } 2740 else { 2741 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2742 &r[0], &r[1], &r[2], &ZeroVec, lod, 2743 NULL, offsets, control, 2744 &r[0], &r[1], &r[2], &r[3]); 2745 } 2746 break; 2747 2748 case TGSI_TEXTURE_CUBE_ARRAY: 2749 FETCH(&r[1], 0, TGSI_CHAN_Y); 2750 FETCH(&r[2], 0, TGSI_CHAN_Z); 2751 FETCH(&r[3], 0, TGSI_CHAN_W); 2752 if(compare) { 2753 FETCH(&r[4], 3, TGSI_CHAN_X); 2754 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2755 &r[0], &r[1], &r[2], &r[3], &r[4], 2756 NULL, offsets, control, 2757 &r[0], &r[1], &r[2], &r[3]); 2758 } 2759 else { 2760 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2761 &r[0], &r[1], &r[2], &r[3], lod, 2762 NULL, offsets, control, 2763 &r[0], &r[1], &r[2], &r[3]); 2764 } 2765 break; 2766 2767 2768 default: 2769 assert(0); 2770 } 2771 2772 swizzles[0] = inst->Src[1].Register.SwizzleX; 2773 swizzles[1] = inst->Src[1].Register.SwizzleY; 2774 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2775 swizzles[3] = inst->Src[1].Register.SwizzleW; 2776 2777 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2778 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2779 store_dest(mach, &r[swizzles[chan]], 2780 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2781 } 2782 } 2783 } 2784 2785 static void 2786 exec_sample_d(struct tgsi_exec_machine *mach, 2787 const struct tgsi_full_instruction *inst) 2788 { 2789 const uint resource_unit = inst->Src[1].Register.Index; 2790 const uint sampler_unit = inst->Src[2].Register.Index; 2791 union tgsi_exec_channel r[4]; 2792 float derivs[3][2][TGSI_QUAD_SIZE]; 2793 uint chan; 2794 unsigned char swizzles[4]; 2795 int8_t offsets[3]; 2796 2797 /* always fetch all 3 offsets, overkill but keeps code simple */ 2798 fetch_texel_offsets(mach, inst, offsets); 2799 2800 FETCH(&r[0], 0, TGSI_CHAN_X); 2801 2802 switch (mach->SamplerViews[resource_unit].Resource) { 2803 case TGSI_TEXTURE_1D: 2804 case TGSI_TEXTURE_1D_ARRAY: 2805 /* only 1D array actually needs Y */ 2806 FETCH(&r[1], 0, TGSI_CHAN_Y); 2807 2808 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2809 2810 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2811 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2812 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2813 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2814 break; 2815 2816 case TGSI_TEXTURE_2D: 2817 case TGSI_TEXTURE_RECT: 2818 case TGSI_TEXTURE_2D_ARRAY: 2819 /* only 2D array actually needs Z */ 2820 FETCH(&r[1], 0, TGSI_CHAN_Y); 2821 FETCH(&r[2], 0, TGSI_CHAN_Z); 2822 2823 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2824 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); 2825 2826 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2827 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */ 2828 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2829 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2830 break; 2831 2832 case TGSI_TEXTURE_3D: 2833 case TGSI_TEXTURE_CUBE: 2834 case TGSI_TEXTURE_CUBE_ARRAY: 2835 /* only cube array actually needs W */ 2836 FETCH(&r[1], 0, TGSI_CHAN_Y); 2837 FETCH(&r[2], 0, TGSI_CHAN_Z); 2838 FETCH(&r[3], 0, TGSI_CHAN_W); 2839 2840 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2841 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); 2842 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]); 2843 2844 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2845 &r[0], &r[1], &r[2], &r[3], &ZeroVec, 2846 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2847 &r[0], &r[1], &r[2], &r[3]); 2848 break; 2849 2850 default: 2851 assert(0); 2852 } 2853 2854 swizzles[0] = inst->Src[1].Register.SwizzleX; 2855 swizzles[1] = inst->Src[1].Register.SwizzleY; 2856 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2857 swizzles[3] = inst->Src[1].Register.SwizzleW; 2858 2859 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2860 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2861 store_dest(mach, &r[swizzles[chan]], 2862 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2863 } 2864 } 2865 } 2866 2867 2868 /** 2869 * Evaluate a constant-valued coefficient at the position of the 2870 * current quad. 2871 */ 2872 static void 2873 eval_constant_coef( 2874 struct tgsi_exec_machine *mach, 2875 unsigned attrib, 2876 unsigned chan ) 2877 { 2878 unsigned i; 2879 2880 for( i = 0; i < TGSI_QUAD_SIZE; i++ ) { 2881 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 2882 } 2883 } 2884 2885 /** 2886 * Evaluate a linear-valued coefficient at the position of the 2887 * current quad. 2888 */ 2889 static void 2890 eval_linear_coef( 2891 struct tgsi_exec_machine *mach, 2892 unsigned attrib, 2893 unsigned chan ) 2894 { 2895 const float x = mach->QuadPos.xyzw[0].f[0]; 2896 const float y = mach->QuadPos.xyzw[1].f[0]; 2897 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2898 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2899 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2900 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 2901 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 2902 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 2903 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 2904 } 2905 2906 /** 2907 * Evaluate a perspective-valued coefficient at the position of the 2908 * current quad. 2909 */ 2910 static void 2911 eval_perspective_coef( 2912 struct tgsi_exec_machine *mach, 2913 unsigned attrib, 2914 unsigned chan ) 2915 { 2916 const float x = mach->QuadPos.xyzw[0].f[0]; 2917 const float y = mach->QuadPos.xyzw[1].f[0]; 2918 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2919 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2920 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2921 const float *w = mach->QuadPos.xyzw[3].f; 2922 /* divide by W here */ 2923 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 2924 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 2925 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 2926 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 2927 } 2928 2929 2930 typedef void (* eval_coef_func)( 2931 struct tgsi_exec_machine *mach, 2932 unsigned attrib, 2933 unsigned chan ); 2934 2935 static void 2936 exec_declaration(struct tgsi_exec_machine *mach, 2937 const struct tgsi_full_declaration *decl) 2938 { 2939 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { 2940 mach->SamplerViews[decl->Range.First] = decl->SamplerView; 2941 return; 2942 } 2943 2944 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { 2945 if (decl->Declaration.File == TGSI_FILE_INPUT) { 2946 uint first, last, mask; 2947 2948 first = decl->Range.First; 2949 last = decl->Range.Last; 2950 mask = decl->Declaration.UsageMask; 2951 2952 /* XXX we could remove this special-case code since 2953 * mach->InterpCoefs[first].a0 should already have the 2954 * front/back-face value. But we should first update the 2955 * ureg code to emit the right UsageMask value (WRITEMASK_X). 2956 * Then, we could remove the tgsi_exec_machine::Face field. 2957 */ 2958 /* XXX make FACE a system value */ 2959 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { 2960 uint i; 2961 2962 assert(decl->Semantic.Index == 0); 2963 assert(first == last); 2964 2965 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2966 mach->Inputs[first].xyzw[0].f[i] = mach->Face; 2967 } 2968 } else { 2969 eval_coef_func eval; 2970 uint i, j; 2971 2972 switch (decl->Interp.Interpolate) { 2973 case TGSI_INTERPOLATE_CONSTANT: 2974 eval = eval_constant_coef; 2975 break; 2976 2977 case TGSI_INTERPOLATE_LINEAR: 2978 eval = eval_linear_coef; 2979 break; 2980 2981 case TGSI_INTERPOLATE_PERSPECTIVE: 2982 eval = eval_perspective_coef; 2983 break; 2984 2985 case TGSI_INTERPOLATE_COLOR: 2986 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef; 2987 break; 2988 2989 default: 2990 assert(0); 2991 return; 2992 } 2993 2994 for (j = 0; j < TGSI_NUM_CHANNELS; j++) { 2995 if (mask & (1 << j)) { 2996 for (i = first; i <= last; i++) { 2997 eval(mach, i, j); 2998 } 2999 } 3000 } 3001 } 3002 3003 if (DEBUG_EXECUTION) { 3004 uint i, j; 3005 for (i = first; i <= last; ++i) { 3006 debug_printf("IN[%2u] = ", i); 3007 for (j = 0; j < TGSI_NUM_CHANNELS; j++) { 3008 if (j > 0) { 3009 debug_printf(" "); 3010 } 3011 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 3012 mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j], 3013 mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j], 3014 mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j], 3015 mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]); 3016 } 3017 } 3018 } 3019 } 3020 } 3021 3022 } 3023 3024 typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, 3025 const union tgsi_exec_channel *src); 3026 3027 static void 3028 exec_scalar_unary(struct tgsi_exec_machine *mach, 3029 const struct tgsi_full_instruction *inst, 3030 micro_unary_op op, 3031 enum tgsi_exec_datatype dst_datatype, 3032 enum tgsi_exec_datatype src_datatype) 3033 { 3034 unsigned int chan; 3035 union tgsi_exec_channel src; 3036 union tgsi_exec_channel dst; 3037 3038 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); 3039 op(&dst, &src); 3040 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3041 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3042 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 3043 } 3044 } 3045 } 3046 3047 static void 3048 exec_vector_unary(struct tgsi_exec_machine *mach, 3049 const struct tgsi_full_instruction *inst, 3050 micro_unary_op op, 3051 enum tgsi_exec_datatype dst_datatype, 3052 enum tgsi_exec_datatype src_datatype) 3053 { 3054 unsigned int chan; 3055 struct tgsi_exec_vector dst; 3056 3057 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3058 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3059 union tgsi_exec_channel src; 3060 3061 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); 3062 op(&dst.xyzw[chan], &src); 3063 } 3064 } 3065 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3066 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3067 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 3068 } 3069 } 3070 } 3071 3072 typedef void (* micro_binary_op)(union tgsi_exec_channel *dst, 3073 const union tgsi_exec_channel *src0, 3074 const union tgsi_exec_channel *src1); 3075 3076 static void 3077 exec_scalar_binary(struct tgsi_exec_machine *mach, 3078 const struct tgsi_full_instruction *inst, 3079 micro_binary_op op, 3080 enum tgsi_exec_datatype dst_datatype, 3081 enum tgsi_exec_datatype src_datatype) 3082 { 3083 unsigned int chan; 3084 union tgsi_exec_channel src[2]; 3085 union tgsi_exec_channel dst; 3086 3087 fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype); 3088 fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype); 3089 op(&dst, &src[0], &src[1]); 3090 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3091 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3092 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 3093 } 3094 } 3095 } 3096 3097 static void 3098 exec_vector_binary(struct tgsi_exec_machine *mach, 3099 const struct tgsi_full_instruction *inst, 3100 micro_binary_op op, 3101 enum tgsi_exec_datatype dst_datatype, 3102 enum tgsi_exec_datatype src_datatype) 3103 { 3104 unsigned int chan; 3105 struct tgsi_exec_vector dst; 3106 3107 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3108 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3109 union tgsi_exec_channel src[2]; 3110 3111 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3112 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3113 op(&dst.xyzw[chan], &src[0], &src[1]); 3114 } 3115 } 3116 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3117 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3118 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 3119 } 3120 } 3121 } 3122 3123 typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst, 3124 const union tgsi_exec_channel *src0, 3125 const union tgsi_exec_channel *src1, 3126 const union tgsi_exec_channel *src2); 3127 3128 static void 3129 exec_vector_trinary(struct tgsi_exec_machine *mach, 3130 const struct tgsi_full_instruction *inst, 3131 micro_trinary_op op, 3132 enum tgsi_exec_datatype dst_datatype, 3133 enum tgsi_exec_datatype src_datatype) 3134 { 3135 unsigned int chan; 3136 struct tgsi_exec_vector dst; 3137 3138 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3139 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3140 union tgsi_exec_channel src[3]; 3141 3142 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3143 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3144 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 3145 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 3146 } 3147 } 3148 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3149 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3150 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 3151 } 3152 } 3153 } 3154 3155 typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst, 3156 const union tgsi_exec_channel *src0, 3157 const union tgsi_exec_channel *src1, 3158 const union tgsi_exec_channel *src2, 3159 const union tgsi_exec_channel *src3); 3160 3161 static void 3162 exec_vector_quaternary(struct tgsi_exec_machine *mach, 3163 const struct tgsi_full_instruction *inst, 3164 micro_quaternary_op op, 3165 enum tgsi_exec_datatype dst_datatype, 3166 enum tgsi_exec_datatype src_datatype) 3167 { 3168 unsigned int chan; 3169 struct tgsi_exec_vector dst; 3170 3171 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3172 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3173 union tgsi_exec_channel src[4]; 3174 3175 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3176 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3177 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 3178 fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype); 3179 op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]); 3180 } 3181 } 3182 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3183 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3184 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 3185 } 3186 } 3187 } 3188 3189 static void 3190 exec_dp3(struct tgsi_exec_machine *mach, 3191 const struct tgsi_full_instruction *inst) 3192 { 3193 unsigned int chan; 3194 union tgsi_exec_channel arg[3]; 3195 3196 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3197 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3198 micro_mul(&arg[2], &arg[0], &arg[1]); 3199 3200 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) { 3201 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 3202 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 3203 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3204 } 3205 3206 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3207 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3208 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3209 } 3210 } 3211 } 3212 3213 static void 3214 exec_dp4(struct tgsi_exec_machine *mach, 3215 const struct tgsi_full_instruction *inst) 3216 { 3217 unsigned int chan; 3218 union tgsi_exec_channel arg[3]; 3219 3220 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3221 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3222 micro_mul(&arg[2], &arg[0], &arg[1]); 3223 3224 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) { 3225 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 3226 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 3227 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3228 } 3229 3230 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3231 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3232 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3233 } 3234 } 3235 } 3236 3237 static void 3238 exec_dp2(struct tgsi_exec_machine *mach, 3239 const struct tgsi_full_instruction *inst) 3240 { 3241 unsigned int chan; 3242 union tgsi_exec_channel arg[3]; 3243 3244 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3245 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3246 micro_mul(&arg[2], &arg[0], &arg[1]); 3247 3248 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3249 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3250 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3251 3252 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3253 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3254 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3255 } 3256 } 3257 } 3258 3259 static void 3260 exec_pk2h(struct tgsi_exec_machine *mach, 3261 const struct tgsi_full_instruction *inst) 3262 { 3263 unsigned chan; 3264 union tgsi_exec_channel arg[2], dst; 3265 3266 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3267 fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3268 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { 3269 dst.u[chan] = util_float_to_half(arg[0].f[chan]) | 3270 (util_float_to_half(arg[1].f[chan]) << 16); 3271 } 3272 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3273 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3274 store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT); 3275 } 3276 } 3277 } 3278 3279 static void 3280 exec_up2h(struct tgsi_exec_machine *mach, 3281 const struct tgsi_full_instruction *inst) 3282 { 3283 unsigned chan; 3284 union tgsi_exec_channel arg, dst[2]; 3285 3286 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3287 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { 3288 dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff); 3289 dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16); 3290 } 3291 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3292 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3293 store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3294 } 3295 } 3296 } 3297 3298 static void 3299 micro_ucmp(union tgsi_exec_channel *dst, 3300 const union tgsi_exec_channel *src0, 3301 const union tgsi_exec_channel *src1, 3302 const union tgsi_exec_channel *src2) 3303 { 3304 dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0]; 3305 dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1]; 3306 dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2]; 3307 dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3]; 3308 } 3309 3310 static void 3311 exec_ucmp(struct tgsi_exec_machine *mach, 3312 const struct tgsi_full_instruction *inst) 3313 { 3314 unsigned int chan; 3315 struct tgsi_exec_vector dst; 3316 3317 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3318 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3319 union tgsi_exec_channel src[3]; 3320 3321 fetch_source(mach, &src[0], &inst->Src[0], chan, 3322 TGSI_EXEC_DATA_UINT); 3323 fetch_source(mach, &src[1], &inst->Src[1], chan, 3324 TGSI_EXEC_DATA_FLOAT); 3325 fetch_source(mach, &src[2], &inst->Src[2], chan, 3326 TGSI_EXEC_DATA_FLOAT); 3327 micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 3328 } 3329 } 3330 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3331 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3332 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, 3333 TGSI_EXEC_DATA_FLOAT); 3334 } 3335 } 3336 } 3337 3338 static void 3339 exec_dst(struct tgsi_exec_machine *mach, 3340 const struct tgsi_full_instruction *inst) 3341 { 3342 union tgsi_exec_channel r[2]; 3343 union tgsi_exec_channel d[4]; 3344 3345 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3346 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3347 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3348 micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]); 3349 } 3350 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3351 fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3352 } 3353 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3354 fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3355 } 3356 3357 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3358 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3359 } 3360 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3361 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3362 } 3363 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3364 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3365 } 3366 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3367 store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3368 } 3369 } 3370 3371 static void 3372 exec_log(struct tgsi_exec_machine *mach, 3373 const struct tgsi_full_instruction *inst) 3374 { 3375 union tgsi_exec_channel r[3]; 3376 3377 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3378 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */ 3379 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */ 3380 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */ 3381 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3382 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3383 } 3384 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3385 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */ 3386 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */ 3387 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3388 } 3389 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3390 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3391 } 3392 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3393 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3394 } 3395 } 3396 3397 static void 3398 exec_exp(struct tgsi_exec_machine *mach, 3399 const struct tgsi_full_instruction *inst) 3400 { 3401 union tgsi_exec_channel r[3]; 3402 3403 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3404 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */ 3405 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3406 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */ 3407 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3408 } 3409 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3410 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */ 3411 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3412 } 3413 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3414 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */ 3415 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3416 } 3417 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3418 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3419 } 3420 } 3421 3422 static void 3423 exec_lit(struct tgsi_exec_machine *mach, 3424 const struct tgsi_full_instruction *inst) 3425 { 3426 union tgsi_exec_channel r[3]; 3427 union tgsi_exec_channel d[3]; 3428 3429 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) { 3430 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3431 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3432 fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3433 micro_max(&r[1], &r[1], &ZeroVec); 3434 3435 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3436 micro_min(&r[2], &r[2], &P128Vec); 3437 micro_max(&r[2], &r[2], &M128Vec); 3438 micro_pow(&r[1], &r[1], &r[2]); 3439 micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec); 3440 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3441 } 3442 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3443 micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec); 3444 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3445 } 3446 } 3447 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3448 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3449 } 3450 3451 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3452 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3453 } 3454 } 3455 3456 static void 3457 exec_break(struct tgsi_exec_machine *mach) 3458 { 3459 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { 3460 /* turn off loop channels for each enabled exec channel */ 3461 mach->LoopMask &= ~mach->ExecMask; 3462 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3463 UPDATE_EXEC_MASK(mach); 3464 } else { 3465 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); 3466 3467 mach->Switch.mask = 0x0; 3468 3469 UPDATE_EXEC_MASK(mach); 3470 } 3471 } 3472 3473 static void 3474 exec_switch(struct tgsi_exec_machine *mach, 3475 const struct tgsi_full_instruction *inst) 3476 { 3477 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 3478 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3479 3480 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 3481 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3482 mach->Switch.mask = 0x0; 3483 mach->Switch.defaultMask = 0x0; 3484 3485 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3486 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; 3487 3488 UPDATE_EXEC_MASK(mach); 3489 } 3490 3491 static void 3492 exec_case(struct tgsi_exec_machine *mach, 3493 const struct tgsi_full_instruction *inst) 3494 { 3495 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3496 union tgsi_exec_channel src; 3497 uint mask = 0; 3498 3499 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3500 3501 if (mach->Switch.selector.u[0] == src.u[0]) { 3502 mask |= 0x1; 3503 } 3504 if (mach->Switch.selector.u[1] == src.u[1]) { 3505 mask |= 0x2; 3506 } 3507 if (mach->Switch.selector.u[2] == src.u[2]) { 3508 mask |= 0x4; 3509 } 3510 if (mach->Switch.selector.u[3] == src.u[3]) { 3511 mask |= 0x8; 3512 } 3513 3514 mach->Switch.defaultMask |= mask; 3515 3516 mach->Switch.mask |= mask & prevMask; 3517 3518 UPDATE_EXEC_MASK(mach); 3519 } 3520 3521 /* FIXME: this will only work if default is last */ 3522 static void 3523 exec_default(struct tgsi_exec_machine *mach) 3524 { 3525 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3526 3527 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; 3528 3529 UPDATE_EXEC_MASK(mach); 3530 } 3531 3532 static void 3533 exec_endswitch(struct tgsi_exec_machine *mach) 3534 { 3535 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; 3536 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 3537 3538 UPDATE_EXEC_MASK(mach); 3539 } 3540 3541 typedef void (* micro_dop)(union tgsi_double_channel *dst, 3542 const union tgsi_double_channel *src); 3543 3544 typedef void (* micro_dop_sop)(union tgsi_double_channel *dst, 3545 const union tgsi_double_channel *src0, 3546 union tgsi_exec_channel *src1); 3547 3548 typedef void (* micro_dop_s)(union tgsi_double_channel *dst, 3549 const union tgsi_exec_channel *src); 3550 3551 typedef void (* micro_sop_d)(union tgsi_exec_channel *dst, 3552 const union tgsi_double_channel *src); 3553 3554 static void 3555 fetch_double_channel(struct tgsi_exec_machine *mach, 3556 union tgsi_double_channel *chan, 3557 const struct tgsi_full_src_register *reg, 3558 uint chan_0, 3559 uint chan_1) 3560 { 3561 union tgsi_exec_channel src[2]; 3562 uint i; 3563 3564 fetch_source_d(mach, &src[0], reg, chan_0, TGSI_EXEC_DATA_UINT); 3565 fetch_source_d(mach, &src[1], reg, chan_1, TGSI_EXEC_DATA_UINT); 3566 3567 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 3568 chan->u[i][0] = src[0].u[i]; 3569 chan->u[i][1] = src[1].u[i]; 3570 } 3571 if (reg->Register.Absolute) { 3572 micro_dabs(chan, chan); 3573 } 3574 if (reg->Register.Negate) { 3575 micro_dneg(chan, chan); 3576 } 3577 } 3578 3579 static void 3580 store_double_channel(struct tgsi_exec_machine *mach, 3581 const union tgsi_double_channel *chan, 3582 const struct tgsi_full_dst_register *reg, 3583 const struct tgsi_full_instruction *inst, 3584 uint chan_0, 3585 uint chan_1) 3586 { 3587 union tgsi_exec_channel dst[2]; 3588 uint i; 3589 union tgsi_double_channel temp; 3590 const uint execmask = mach->ExecMask; 3591 3592 if (!inst->Instruction.Saturate) { 3593 for (i = 0; i < TGSI_QUAD_SIZE; i++) 3594 if (execmask & (1 << i)) { 3595 dst[0].u[i] = chan->u[i][0]; 3596 dst[1].u[i] = chan->u[i][1]; 3597 } 3598 } 3599 else { 3600 for (i = 0; i < TGSI_QUAD_SIZE; i++) 3601 if (execmask & (1 << i)) { 3602 if (chan->d[i] < 0.0) 3603 temp.d[i] = 0.0; 3604 else if (chan->d[i] > 1.0) 3605 temp.d[i] = 1.0; 3606 else 3607 temp.d[i] = chan->d[i]; 3608 3609 dst[0].u[i] = temp.u[i][0]; 3610 dst[1].u[i] = temp.u[i][1]; 3611 } 3612 } 3613 3614 store_dest_double(mach, &dst[0], reg, inst, chan_0, TGSI_EXEC_DATA_UINT); 3615 if (chan_1 != -1) 3616 store_dest_double(mach, &dst[1], reg, inst, chan_1, TGSI_EXEC_DATA_UINT); 3617 } 3618 3619 static void 3620 exec_double_unary(struct tgsi_exec_machine *mach, 3621 const struct tgsi_full_instruction *inst, 3622 micro_dop op) 3623 { 3624 union tgsi_double_channel src; 3625 union tgsi_double_channel dst; 3626 3627 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 3628 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3629 op(&dst, &src); 3630 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3631 } 3632 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 3633 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3634 op(&dst, &src); 3635 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3636 } 3637 } 3638 3639 static void 3640 exec_double_binary(struct tgsi_exec_machine *mach, 3641 const struct tgsi_full_instruction *inst, 3642 micro_dop op, 3643 enum tgsi_exec_datatype dst_datatype) 3644 { 3645 union tgsi_double_channel src[2]; 3646 union tgsi_double_channel dst; 3647 int first_dest_chan, second_dest_chan; 3648 int wmask; 3649 3650 wmask = inst->Dst[0].Register.WriteMask; 3651 /* these are & because of the way DSLT etc store their destinations */ 3652 if (wmask & TGSI_WRITEMASK_XY) { 3653 first_dest_chan = TGSI_CHAN_X; 3654 second_dest_chan = TGSI_CHAN_Y; 3655 if (dst_datatype == TGSI_EXEC_DATA_UINT) { 3656 first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y; 3657 second_dest_chan = -1; 3658 } 3659 3660 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3661 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y); 3662 op(&dst, src); 3663 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan); 3664 } 3665 3666 if (wmask & TGSI_WRITEMASK_ZW) { 3667 first_dest_chan = TGSI_CHAN_Z; 3668 second_dest_chan = TGSI_CHAN_W; 3669 if (dst_datatype == TGSI_EXEC_DATA_UINT) { 3670 first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W; 3671 second_dest_chan = -1; 3672 } 3673 3674 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3675 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W); 3676 op(&dst, src); 3677 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan); 3678 } 3679 } 3680 3681 static void 3682 exec_double_trinary(struct tgsi_exec_machine *mach, 3683 const struct tgsi_full_instruction *inst, 3684 micro_dop op) 3685 { 3686 union tgsi_double_channel src[3]; 3687 union tgsi_double_channel dst; 3688 3689 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 3690 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3691 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y); 3692 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y); 3693 op(&dst, src); 3694 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3695 } 3696 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 3697 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3698 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W); 3699 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W); 3700 op(&dst, src); 3701 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3702 } 3703 } 3704 3705 static void 3706 exec_dldexp(struct tgsi_exec_machine *mach, 3707 const struct tgsi_full_instruction *inst) 3708 { 3709 union tgsi_double_channel src0; 3710 union tgsi_exec_channel src1; 3711 union tgsi_double_channel dst; 3712 int wmask; 3713 3714 wmask = inst->Dst[0].Register.WriteMask; 3715 if (wmask & TGSI_WRITEMASK_XY) { 3716 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3717 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 3718 micro_dldexp(&dst, &src0, &src1); 3719 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3720 } 3721 3722 if (wmask & TGSI_WRITEMASK_ZW) { 3723 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3724 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); 3725 micro_dldexp(&dst, &src0, &src1); 3726 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3727 } 3728 } 3729 3730 static void 3731 exec_dfracexp(struct tgsi_exec_machine *mach, 3732 const struct tgsi_full_instruction *inst) 3733 { 3734 union tgsi_double_channel src; 3735 union tgsi_double_channel dst; 3736 union tgsi_exec_channel dst_exp; 3737 3738 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3739 micro_dfracexp(&dst, &dst_exp, &src); 3740 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) 3741 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3742 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) 3743 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3744 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3745 if (inst->Dst[1].Register.WriteMask & (1 << chan)) 3746 store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan, TGSI_EXEC_DATA_INT); 3747 } 3748 } 3749 3750 static void 3751 exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach, 3752 const struct tgsi_full_instruction *inst, 3753 micro_dop_sop op) 3754 { 3755 union tgsi_double_channel src0; 3756 union tgsi_exec_channel src1; 3757 union tgsi_double_channel dst; 3758 int wmask; 3759 3760 wmask = inst->Dst[0].Register.WriteMask; 3761 if (wmask & TGSI_WRITEMASK_XY) { 3762 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3763 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 3764 op(&dst, &src0, &src1); 3765 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3766 } 3767 3768 if (wmask & TGSI_WRITEMASK_ZW) { 3769 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3770 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); 3771 op(&dst, &src0, &src1); 3772 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3773 } 3774 } 3775 3776 static int 3777 get_image_coord_dim(unsigned tgsi_tex) 3778 { 3779 int dim; 3780 switch (tgsi_tex) { 3781 case TGSI_TEXTURE_BUFFER: 3782 case TGSI_TEXTURE_1D: 3783 dim = 1; 3784 break; 3785 case TGSI_TEXTURE_2D: 3786 case TGSI_TEXTURE_RECT: 3787 case TGSI_TEXTURE_1D_ARRAY: 3788 case TGSI_TEXTURE_2D_MSAA: 3789 dim = 2; 3790 break; 3791 case TGSI_TEXTURE_3D: 3792 case TGSI_TEXTURE_CUBE: 3793 case TGSI_TEXTURE_2D_ARRAY: 3794 case TGSI_TEXTURE_2D_ARRAY_MSAA: 3795 case TGSI_TEXTURE_CUBE_ARRAY: 3796 dim = 3; 3797 break; 3798 default: 3799 assert(!"unknown texture target"); 3800 dim = 0; 3801 break; 3802 } 3803 3804 return dim; 3805 } 3806 3807 static int 3808 get_image_coord_sample(unsigned tgsi_tex) 3809 { 3810 int sample = 0; 3811 switch (tgsi_tex) { 3812 case TGSI_TEXTURE_2D_MSAA: 3813 sample = 3; 3814 break; 3815 case TGSI_TEXTURE_2D_ARRAY_MSAA: 3816 sample = 4; 3817 break; 3818 default: 3819 break; 3820 } 3821 return sample; 3822 } 3823 3824 static void 3825 exec_load_img(struct tgsi_exec_machine *mach, 3826 const struct tgsi_full_instruction *inst) 3827 { 3828 union tgsi_exec_channel r[4], sample_r; 3829 uint unit; 3830 int sample; 3831 int i, j; 3832 int dim; 3833 uint chan; 3834 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3835 struct tgsi_image_params params; 3836 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3837 3838 unit = fetch_sampler_unit(mach, inst, 0); 3839 dim = get_image_coord_dim(inst->Memory.Texture); 3840 sample = get_image_coord_sample(inst->Memory.Texture); 3841 assert(dim <= 3); 3842 3843 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 3844 params.unit = unit; 3845 params.tgsi_tex_instr = inst->Memory.Texture; 3846 params.format = inst->Memory.Format; 3847 3848 for (i = 0; i < dim; i++) { 3849 IFETCH(&r[i], 1, TGSI_CHAN_X + i); 3850 } 3851 3852 if (sample) 3853 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); 3854 3855 mach->Image->load(mach->Image, ¶ms, 3856 r[0].i, r[1].i, r[2].i, sample_r.i, 3857 rgba); 3858 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3859 r[0].f[j] = rgba[0][j]; 3860 r[1].f[j] = rgba[1][j]; 3861 r[2].f[j] = rgba[2][j]; 3862 r[3].f[j] = rgba[3][j]; 3863 } 3864 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3865 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3866 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3867 } 3868 } 3869 } 3870 3871 static void 3872 exec_load_buf(struct tgsi_exec_machine *mach, 3873 const struct tgsi_full_instruction *inst) 3874 { 3875 union tgsi_exec_channel r[4]; 3876 uint unit; 3877 int j; 3878 uint chan; 3879 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3880 struct tgsi_buffer_params params; 3881 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3882 3883 unit = fetch_sampler_unit(mach, inst, 0); 3884 3885 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 3886 params.unit = unit; 3887 IFETCH(&r[0], 1, TGSI_CHAN_X); 3888 3889 mach->Buffer->load(mach->Buffer, ¶ms, 3890 r[0].i, rgba); 3891 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3892 r[0].f[j] = rgba[0][j]; 3893 r[1].f[j] = rgba[1][j]; 3894 r[2].f[j] = rgba[2][j]; 3895 r[3].f[j] = rgba[3][j]; 3896 } 3897 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3898 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3899 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3900 } 3901 } 3902 } 3903 3904 static void 3905 exec_load_mem(struct tgsi_exec_machine *mach, 3906 const struct tgsi_full_instruction *inst) 3907 { 3908 union tgsi_exec_channel r[4]; 3909 uint chan; 3910 char *ptr = mach->LocalMem; 3911 uint32_t offset; 3912 int j; 3913 3914 IFETCH(&r[0], 1, TGSI_CHAN_X); 3915 if (r[0].u[0] >= mach->LocalMemSize) 3916 return; 3917 3918 offset = r[0].u[0]; 3919 ptr += offset; 3920 3921 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3922 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3923 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3924 memcpy(&r[chan].u[j], ptr + (4 * chan), 4); 3925 } 3926 } 3927 } 3928 3929 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3930 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3931 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3932 } 3933 } 3934 } 3935 3936 static void 3937 exec_load(struct tgsi_exec_machine *mach, 3938 const struct tgsi_full_instruction *inst) 3939 { 3940 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 3941 exec_load_img(mach, inst); 3942 else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) 3943 exec_load_buf(mach, inst); 3944 else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) 3945 exec_load_mem(mach, inst); 3946 } 3947 3948 static void 3949 exec_store_img(struct tgsi_exec_machine *mach, 3950 const struct tgsi_full_instruction *inst) 3951 { 3952 union tgsi_exec_channel r[3], sample_r; 3953 union tgsi_exec_channel value[4]; 3954 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3955 struct tgsi_image_params params; 3956 int dim; 3957 int sample; 3958 int i, j; 3959 uint unit; 3960 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3961 unit = inst->Dst[0].Register.Index; 3962 dim = get_image_coord_dim(inst->Memory.Texture); 3963 sample = get_image_coord_sample(inst->Memory.Texture); 3964 assert(dim <= 3); 3965 3966 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 3967 params.unit = unit; 3968 params.tgsi_tex_instr = inst->Memory.Texture; 3969 params.format = inst->Memory.Format; 3970 3971 for (i = 0; i < dim; i++) { 3972 IFETCH(&r[i], 0, TGSI_CHAN_X + i); 3973 } 3974 3975 for (i = 0; i < 4; i++) { 3976 FETCH(&value[i], 1, TGSI_CHAN_X + i); 3977 } 3978 if (sample) 3979 IFETCH(&sample_r, 0, TGSI_CHAN_X + sample); 3980 3981 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3982 rgba[0][j] = value[0].f[j]; 3983 rgba[1][j] = value[1].f[j]; 3984 rgba[2][j] = value[2].f[j]; 3985 rgba[3][j] = value[3].f[j]; 3986 } 3987 3988 mach->Image->store(mach->Image, ¶ms, 3989 r[0].i, r[1].i, r[2].i, sample_r.i, 3990 rgba); 3991 } 3992 3993 static void 3994 exec_store_buf(struct tgsi_exec_machine *mach, 3995 const struct tgsi_full_instruction *inst) 3996 { 3997 union tgsi_exec_channel r[3]; 3998 union tgsi_exec_channel value[4]; 3999 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4000 struct tgsi_buffer_params params; 4001 int i, j; 4002 uint unit; 4003 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4004 4005 unit = inst->Dst[0].Register.Index; 4006 4007 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4008 params.unit = unit; 4009 params.writemask = inst->Dst[0].Register.WriteMask; 4010 4011 IFETCH(&r[0], 0, TGSI_CHAN_X); 4012 for (i = 0; i < 4; i++) { 4013 FETCH(&value[i], 1, TGSI_CHAN_X + i); 4014 } 4015 4016 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4017 rgba[0][j] = value[0].f[j]; 4018 rgba[1][j] = value[1].f[j]; 4019 rgba[2][j] = value[2].f[j]; 4020 rgba[3][j] = value[3].f[j]; 4021 } 4022 4023 mach->Buffer->store(mach->Buffer, ¶ms, 4024 r[0].i, 4025 rgba); 4026 } 4027 4028 static void 4029 exec_store_mem(struct tgsi_exec_machine *mach, 4030 const struct tgsi_full_instruction *inst) 4031 { 4032 union tgsi_exec_channel r[3]; 4033 union tgsi_exec_channel value[4]; 4034 uint i, chan; 4035 char *ptr = mach->LocalMem; 4036 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4037 int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4038 4039 IFETCH(&r[0], 0, TGSI_CHAN_X); 4040 4041 for (i = 0; i < 4; i++) { 4042 FETCH(&value[i], 1, TGSI_CHAN_X + i); 4043 } 4044 4045 if (r[0].u[0] >= mach->LocalMemSize) 4046 return; 4047 ptr += r[0].u[0]; 4048 4049 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 4050 if (execmask & (1 << i)) { 4051 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4052 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4053 memcpy(ptr + (chan * 4), &value[chan].u[0], 4); 4054 } 4055 } 4056 } 4057 } 4058 } 4059 4060 static void 4061 exec_store(struct tgsi_exec_machine *mach, 4062 const struct tgsi_full_instruction *inst) 4063 { 4064 if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) 4065 exec_store_img(mach, inst); 4066 else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) 4067 exec_store_buf(mach, inst); 4068 else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) 4069 exec_store_mem(mach, inst); 4070 } 4071 4072 static void 4073 exec_atomop_img(struct tgsi_exec_machine *mach, 4074 const struct tgsi_full_instruction *inst) 4075 { 4076 union tgsi_exec_channel r[4], sample_r; 4077 union tgsi_exec_channel value[4], value2[4]; 4078 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4079 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4080 struct tgsi_image_params params; 4081 int dim; 4082 int sample; 4083 int i, j; 4084 uint unit, chan; 4085 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4086 unit = fetch_sampler_unit(mach, inst, 0); 4087 dim = get_image_coord_dim(inst->Memory.Texture); 4088 sample = get_image_coord_sample(inst->Memory.Texture); 4089 assert(dim <= 3); 4090 4091 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4092 params.unit = unit; 4093 params.tgsi_tex_instr = inst->Memory.Texture; 4094 params.format = inst->Memory.Format; 4095 4096 for (i = 0; i < dim; i++) { 4097 IFETCH(&r[i], 1, TGSI_CHAN_X + i); 4098 } 4099 4100 for (i = 0; i < 4; i++) { 4101 FETCH(&value[i], 2, TGSI_CHAN_X + i); 4102 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 4103 FETCH(&value2[i], 3, TGSI_CHAN_X + i); 4104 } 4105 if (sample) 4106 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); 4107 4108 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4109 rgba[0][j] = value[0].f[j]; 4110 rgba[1][j] = value[1].f[j]; 4111 rgba[2][j] = value[2].f[j]; 4112 rgba[3][j] = value[3].f[j]; 4113 } 4114 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { 4115 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4116 rgba2[0][j] = value2[0].f[j]; 4117 rgba2[1][j] = value2[1].f[j]; 4118 rgba2[2][j] = value2[2].f[j]; 4119 rgba2[3][j] = value2[3].f[j]; 4120 } 4121 } 4122 4123 mach->Image->op(mach->Image, ¶ms, inst->Instruction.Opcode, 4124 r[0].i, r[1].i, r[2].i, sample_r.i, 4125 rgba, rgba2); 4126 4127 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4128 r[0].f[j] = rgba[0][j]; 4129 r[1].f[j] = rgba[1][j]; 4130 r[2].f[j] = rgba[2][j]; 4131 r[3].f[j] = rgba[3][j]; 4132 } 4133 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4134 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4135 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 4136 } 4137 } 4138 } 4139 4140 static void 4141 exec_atomop_buf(struct tgsi_exec_machine *mach, 4142 const struct tgsi_full_instruction *inst) 4143 { 4144 union tgsi_exec_channel r[4]; 4145 union tgsi_exec_channel value[4], value2[4]; 4146 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4147 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4148 struct tgsi_buffer_params params; 4149 int i, j; 4150 uint unit, chan; 4151 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4152 4153 unit = fetch_sampler_unit(mach, inst, 0); 4154 4155 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4156 params.unit = unit; 4157 params.writemask = inst->Dst[0].Register.WriteMask; 4158 4159 IFETCH(&r[0], 1, TGSI_CHAN_X); 4160 4161 for (i = 0; i < 4; i++) { 4162 FETCH(&value[i], 2, TGSI_CHAN_X + i); 4163 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 4164 FETCH(&value2[i], 3, TGSI_CHAN_X + i); 4165 } 4166 4167 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4168 rgba[0][j] = value[0].f[j]; 4169 rgba[1][j] = value[1].f[j]; 4170 rgba[2][j] = value[2].f[j]; 4171 rgba[3][j] = value[3].f[j]; 4172 } 4173 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { 4174 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4175 rgba2[0][j] = value2[0].f[j]; 4176 rgba2[1][j] = value2[1].f[j]; 4177 rgba2[2][j] = value2[2].f[j]; 4178 rgba2[3][j] = value2[3].f[j]; 4179 } 4180 } 4181 4182 mach->Buffer->op(mach->Buffer, ¶ms, inst->Instruction.Opcode, 4183 r[0].i, 4184 rgba, rgba2); 4185 4186 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4187 r[0].f[j] = rgba[0][j]; 4188 r[1].f[j] = rgba[1][j]; 4189 r[2].f[j] = rgba[2][j]; 4190 r[3].f[j] = rgba[3][j]; 4191 } 4192 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4193 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4194 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 4195 } 4196 } 4197 } 4198 4199 static void 4200 exec_atomop_mem(struct tgsi_exec_machine *mach, 4201 const struct tgsi_full_instruction *inst) 4202 { 4203 union tgsi_exec_channel r[4]; 4204 union tgsi_exec_channel value[4], value2[4]; 4205 char *ptr = mach->LocalMem; 4206 uint32_t val; 4207 uint chan, i; 4208 uint32_t offset; 4209 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4210 int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4211 IFETCH(&r[0], 1, TGSI_CHAN_X); 4212 4213 if (r[0].u[0] >= mach->LocalMemSize) 4214 return; 4215 4216 offset = r[0].u[0]; 4217 ptr += offset; 4218 for (i = 0; i < 4; i++) { 4219 FETCH(&value[i], 2, TGSI_CHAN_X + i); 4220 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 4221 FETCH(&value2[i], 3, TGSI_CHAN_X + i); 4222 } 4223 4224 memcpy(&r[0].u[0], ptr, 4); 4225 val = r[0].u[0]; 4226 switch (inst->Instruction.Opcode) { 4227 case TGSI_OPCODE_ATOMUADD: 4228 val += value[0].u[0]; 4229 break; 4230 case TGSI_OPCODE_ATOMXOR: 4231 val ^= value[0].u[0]; 4232 break; 4233 case TGSI_OPCODE_ATOMOR: 4234 val |= value[0].u[0]; 4235 break; 4236 case TGSI_OPCODE_ATOMAND: 4237 val &= value[0].u[0]; 4238 break; 4239 case TGSI_OPCODE_ATOMUMIN: 4240 val = MIN2(val, value[0].u[0]); 4241 break; 4242 case TGSI_OPCODE_ATOMUMAX: 4243 val = MAX2(val, value[0].u[0]); 4244 break; 4245 case TGSI_OPCODE_ATOMIMIN: 4246 val = MIN2(r[0].i[0], value[0].i[0]); 4247 break; 4248 case TGSI_OPCODE_ATOMIMAX: 4249 val = MAX2(r[0].i[0], value[0].i[0]); 4250 break; 4251 case TGSI_OPCODE_ATOMXCHG: 4252 val = value[0].i[0]; 4253 break; 4254 case TGSI_OPCODE_ATOMCAS: 4255 if (val == value[0].u[0]) 4256 val = value2[0].u[0]; 4257 break; 4258 default: 4259 break; 4260 } 4261 for (i = 0; i < TGSI_QUAD_SIZE; i++) 4262 if (execmask & (1 << i)) 4263 memcpy(ptr, &val, 4); 4264 4265 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4266 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4267 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 4268 } 4269 } 4270 } 4271 4272 static void 4273 exec_atomop(struct tgsi_exec_machine *mach, 4274 const struct tgsi_full_instruction *inst) 4275 { 4276 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 4277 exec_atomop_img(mach, inst); 4278 else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) 4279 exec_atomop_buf(mach, inst); 4280 else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) 4281 exec_atomop_mem(mach, inst); 4282 } 4283 4284 static void 4285 exec_resq_img(struct tgsi_exec_machine *mach, 4286 const struct tgsi_full_instruction *inst) 4287 { 4288 int result[4]; 4289 union tgsi_exec_channel r[4]; 4290 uint unit; 4291 int i, chan, j; 4292 struct tgsi_image_params params; 4293 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4294 4295 unit = fetch_sampler_unit(mach, inst, 0); 4296 4297 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4298 params.unit = unit; 4299 params.tgsi_tex_instr = inst->Memory.Texture; 4300 params.format = inst->Memory.Format; 4301 4302 mach->Image->get_dims(mach->Image, ¶ms, result); 4303 4304 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 4305 for (j = 0; j < 4; j++) { 4306 r[j].i[i] = result[j]; 4307 } 4308 } 4309 4310 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4311 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4312 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, 4313 TGSI_EXEC_DATA_INT); 4314 } 4315 } 4316 } 4317 4318 static void 4319 exec_resq_buf(struct tgsi_exec_machine *mach, 4320 const struct tgsi_full_instruction *inst) 4321 { 4322 int result; 4323 union tgsi_exec_channel r[4]; 4324 uint unit; 4325 int i, chan; 4326 struct tgsi_buffer_params params; 4327 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4328 4329 unit = fetch_sampler_unit(mach, inst, 0); 4330 4331 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4332 params.unit = unit; 4333 4334 mach->Buffer->get_dims(mach->Buffer, ¶ms, &result); 4335 4336 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 4337 r[0].i[i] = result; 4338 } 4339 4340 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4341 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4342 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, 4343 TGSI_EXEC_DATA_INT); 4344 } 4345 } 4346 } 4347 4348 static void 4349 exec_resq(struct tgsi_exec_machine *mach, 4350 const struct tgsi_full_instruction *inst) 4351 { 4352 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 4353 exec_resq_img(mach, inst); 4354 else 4355 exec_resq_buf(mach, inst); 4356 } 4357 4358 static void 4359 micro_f2u64(union tgsi_double_channel *dst, 4360 const union tgsi_exec_channel *src) 4361 { 4362 dst->u64[0] = (uint64_t)src->f[0]; 4363 dst->u64[1] = (uint64_t)src->f[1]; 4364 dst->u64[2] = (uint64_t)src->f[2]; 4365 dst->u64[3] = (uint64_t)src->f[3]; 4366 } 4367 4368 static void 4369 micro_f2i64(union tgsi_double_channel *dst, 4370 const union tgsi_exec_channel *src) 4371 { 4372 dst->i64[0] = (int64_t)src->f[0]; 4373 dst->i64[1] = (int64_t)src->f[1]; 4374 dst->i64[2] = (int64_t)src->f[2]; 4375 dst->i64[3] = (int64_t)src->f[3]; 4376 } 4377 4378 static void 4379 micro_u2i64(union tgsi_double_channel *dst, 4380 const union tgsi_exec_channel *src) 4381 { 4382 dst->u64[0] = (uint64_t)src->u[0]; 4383 dst->u64[1] = (uint64_t)src->u[1]; 4384 dst->u64[2] = (uint64_t)src->u[2]; 4385 dst->u64[3] = (uint64_t)src->u[3]; 4386 } 4387 4388 static void 4389 micro_i2i64(union tgsi_double_channel *dst, 4390 const union tgsi_exec_channel *src) 4391 { 4392 dst->i64[0] = (int64_t)src->i[0]; 4393 dst->i64[1] = (int64_t)src->i[1]; 4394 dst->i64[2] = (int64_t)src->i[2]; 4395 dst->i64[3] = (int64_t)src->i[3]; 4396 } 4397 4398 static void 4399 micro_d2u64(union tgsi_double_channel *dst, 4400 const union tgsi_double_channel *src) 4401 { 4402 dst->u64[0] = (uint64_t)src->d[0]; 4403 dst->u64[1] = (uint64_t)src->d[1]; 4404 dst->u64[2] = (uint64_t)src->d[2]; 4405 dst->u64[3] = (uint64_t)src->d[3]; 4406 } 4407 4408 static void 4409 micro_d2i64(union tgsi_double_channel *dst, 4410 const union tgsi_double_channel *src) 4411 { 4412 dst->i64[0] = (int64_t)src->d[0]; 4413 dst->i64[1] = (int64_t)src->d[1]; 4414 dst->i64[2] = (int64_t)src->d[2]; 4415 dst->i64[3] = (int64_t)src->d[3]; 4416 } 4417 4418 static void 4419 micro_u642d(union tgsi_double_channel *dst, 4420 const union tgsi_double_channel *src) 4421 { 4422 dst->d[0] = (double)src->u64[0]; 4423 dst->d[1] = (double)src->u64[1]; 4424 dst->d[2] = (double)src->u64[2]; 4425 dst->d[3] = (double)src->u64[3]; 4426 } 4427 4428 static void 4429 micro_i642d(union tgsi_double_channel *dst, 4430 const union tgsi_double_channel *src) 4431 { 4432 dst->d[0] = (double)src->i64[0]; 4433 dst->d[1] = (double)src->i64[1]; 4434 dst->d[2] = (double)src->i64[2]; 4435 dst->d[3] = (double)src->i64[3]; 4436 } 4437 4438 static void 4439 micro_u642f(union tgsi_exec_channel *dst, 4440 const union tgsi_double_channel *src) 4441 { 4442 dst->f[0] = (float)src->u64[0]; 4443 dst->f[1] = (float)src->u64[1]; 4444 dst->f[2] = (float)src->u64[2]; 4445 dst->f[3] = (float)src->u64[3]; 4446 } 4447 4448 static void 4449 micro_i642f(union tgsi_exec_channel *dst, 4450 const union tgsi_double_channel *src) 4451 { 4452 dst->f[0] = (float)src->i64[0]; 4453 dst->f[1] = (float)src->i64[1]; 4454 dst->f[2] = (float)src->i64[2]; 4455 dst->f[3] = (float)src->i64[3]; 4456 } 4457 4458 static void 4459 exec_t_2_64(struct tgsi_exec_machine *mach, 4460 const struct tgsi_full_instruction *inst, 4461 micro_dop_s op, 4462 enum tgsi_exec_datatype src_datatype) 4463 { 4464 union tgsi_exec_channel src; 4465 union tgsi_double_channel dst; 4466 4467 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 4468 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); 4469 op(&dst, &src); 4470 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 4471 } 4472 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 4473 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype); 4474 op(&dst, &src); 4475 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 4476 } 4477 } 4478 4479 static void 4480 exec_64_2_t(struct tgsi_exec_machine *mach, 4481 const struct tgsi_full_instruction *inst, 4482 micro_sop_d op, 4483 enum tgsi_exec_datatype dst_datatype) 4484 { 4485 union tgsi_double_channel src; 4486 union tgsi_exec_channel dst; 4487 int wm = inst->Dst[0].Register.WriteMask; 4488 int i; 4489 int bit; 4490 for (i = 0; i < 2; i++) { 4491 bit = ffs(wm); 4492 if (bit) { 4493 wm &= ~(1 << (bit - 1)); 4494 if (i == 0) 4495 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 4496 else 4497 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 4498 op(&dst, &src); 4499 store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, dst_datatype); 4500 } 4501 } 4502 } 4503 4504 static void 4505 micro_i2f(union tgsi_exec_channel *dst, 4506 const union tgsi_exec_channel *src) 4507 { 4508 dst->f[0] = (float)src->i[0]; 4509 dst->f[1] = (float)src->i[1]; 4510 dst->f[2] = (float)src->i[2]; 4511 dst->f[3] = (float)src->i[3]; 4512 } 4513 4514 static void 4515 micro_not(union tgsi_exec_channel *dst, 4516 const union tgsi_exec_channel *src) 4517 { 4518 dst->u[0] = ~src->u[0]; 4519 dst->u[1] = ~src->u[1]; 4520 dst->u[2] = ~src->u[2]; 4521 dst->u[3] = ~src->u[3]; 4522 } 4523 4524 static void 4525 micro_shl(union tgsi_exec_channel *dst, 4526 const union tgsi_exec_channel *src0, 4527 const union tgsi_exec_channel *src1) 4528 { 4529 unsigned masked_count; 4530 masked_count = src1->u[0] & 0x1f; 4531 dst->u[0] = src0->u[0] << masked_count; 4532 masked_count = src1->u[1] & 0x1f; 4533 dst->u[1] = src0->u[1] << masked_count; 4534 masked_count = src1->u[2] & 0x1f; 4535 dst->u[2] = src0->u[2] << masked_count; 4536 masked_count = src1->u[3] & 0x1f; 4537 dst->u[3] = src0->u[3] << masked_count; 4538 } 4539 4540 static void 4541 micro_and(union tgsi_exec_channel *dst, 4542 const union tgsi_exec_channel *src0, 4543 const union tgsi_exec_channel *src1) 4544 { 4545 dst->u[0] = src0->u[0] & src1->u[0]; 4546 dst->u[1] = src0->u[1] & src1->u[1]; 4547 dst->u[2] = src0->u[2] & src1->u[2]; 4548 dst->u[3] = src0->u[3] & src1->u[3]; 4549 } 4550 4551 static void 4552 micro_or(union tgsi_exec_channel *dst, 4553 const union tgsi_exec_channel *src0, 4554 const union tgsi_exec_channel *src1) 4555 { 4556 dst->u[0] = src0->u[0] | src1->u[0]; 4557 dst->u[1] = src0->u[1] | src1->u[1]; 4558 dst->u[2] = src0->u[2] | src1->u[2]; 4559 dst->u[3] = src0->u[3] | src1->u[3]; 4560 } 4561 4562 static void 4563 micro_xor(union tgsi_exec_channel *dst, 4564 const union tgsi_exec_channel *src0, 4565 const union tgsi_exec_channel *src1) 4566 { 4567 dst->u[0] = src0->u[0] ^ src1->u[0]; 4568 dst->u[1] = src0->u[1] ^ src1->u[1]; 4569 dst->u[2] = src0->u[2] ^ src1->u[2]; 4570 dst->u[3] = src0->u[3] ^ src1->u[3]; 4571 } 4572 4573 static void 4574 micro_mod(union tgsi_exec_channel *dst, 4575 const union tgsi_exec_channel *src0, 4576 const union tgsi_exec_channel *src1) 4577 { 4578 dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0; 4579 dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0; 4580 dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0; 4581 dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0; 4582 } 4583 4584 static void 4585 micro_f2i(union tgsi_exec_channel *dst, 4586 const union tgsi_exec_channel *src) 4587 { 4588 dst->i[0] = (int)src->f[0]; 4589 dst->i[1] = (int)src->f[1]; 4590 dst->i[2] = (int)src->f[2]; 4591 dst->i[3] = (int)src->f[3]; 4592 } 4593 4594 static void 4595 micro_fseq(union tgsi_exec_channel *dst, 4596 const union tgsi_exec_channel *src0, 4597 const union tgsi_exec_channel *src1) 4598 { 4599 dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0; 4600 dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0; 4601 dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0; 4602 dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0; 4603 } 4604 4605 static void 4606 micro_fsge(union tgsi_exec_channel *dst, 4607 const union tgsi_exec_channel *src0, 4608 const union tgsi_exec_channel *src1) 4609 { 4610 dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0; 4611 dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0; 4612 dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0; 4613 dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0; 4614 } 4615 4616 static void 4617 micro_fslt(union tgsi_exec_channel *dst, 4618 const union tgsi_exec_channel *src0, 4619 const union tgsi_exec_channel *src1) 4620 { 4621 dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0; 4622 dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0; 4623 dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0; 4624 dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0; 4625 } 4626 4627 static void 4628 micro_fsne(union tgsi_exec_channel *dst, 4629 const union tgsi_exec_channel *src0, 4630 const union tgsi_exec_channel *src1) 4631 { 4632 dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0; 4633 dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0; 4634 dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0; 4635 dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0; 4636 } 4637 4638 static void 4639 micro_idiv(union tgsi_exec_channel *dst, 4640 const union tgsi_exec_channel *src0, 4641 const union tgsi_exec_channel *src1) 4642 { 4643 dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0; 4644 dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0; 4645 dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0; 4646 dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0; 4647 } 4648 4649 static void 4650 micro_imax(union tgsi_exec_channel *dst, 4651 const union tgsi_exec_channel *src0, 4652 const union tgsi_exec_channel *src1) 4653 { 4654 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 4655 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 4656 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 4657 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 4658 } 4659 4660 static void 4661 micro_imin(union tgsi_exec_channel *dst, 4662 const union tgsi_exec_channel *src0, 4663 const union tgsi_exec_channel *src1) 4664 { 4665 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 4666 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 4667 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 4668 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 4669 } 4670 4671 static void 4672 micro_isge(union tgsi_exec_channel *dst, 4673 const union tgsi_exec_channel *src0, 4674 const union tgsi_exec_channel *src1) 4675 { 4676 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0; 4677 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0; 4678 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0; 4679 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0; 4680 } 4681 4682 static void 4683 micro_ishr(union tgsi_exec_channel *dst, 4684 const union tgsi_exec_channel *src0, 4685 const union tgsi_exec_channel *src1) 4686 { 4687 unsigned masked_count; 4688 masked_count = src1->i[0] & 0x1f; 4689 dst->i[0] = src0->i[0] >> masked_count; 4690 masked_count = src1->i[1] & 0x1f; 4691 dst->i[1] = src0->i[1] >> masked_count; 4692 masked_count = src1->i[2] & 0x1f; 4693 dst->i[2] = src0->i[2] >> masked_count; 4694 masked_count = src1->i[3] & 0x1f; 4695 dst->i[3] = src0->i[3] >> masked_count; 4696 } 4697 4698 static void 4699 micro_islt(union tgsi_exec_channel *dst, 4700 const union tgsi_exec_channel *src0, 4701 const union tgsi_exec_channel *src1) 4702 { 4703 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0; 4704 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0; 4705 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0; 4706 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0; 4707 } 4708 4709 static void 4710 micro_f2u(union tgsi_exec_channel *dst, 4711 const union tgsi_exec_channel *src) 4712 { 4713 dst->u[0] = (uint)src->f[0]; 4714 dst->u[1] = (uint)src->f[1]; 4715 dst->u[2] = (uint)src->f[2]; 4716 dst->u[3] = (uint)src->f[3]; 4717 } 4718 4719 static void 4720 micro_u2f(union tgsi_exec_channel *dst, 4721 const union tgsi_exec_channel *src) 4722 { 4723 dst->f[0] = (float)src->u[0]; 4724 dst->f[1] = (float)src->u[1]; 4725 dst->f[2] = (float)src->u[2]; 4726 dst->f[3] = (float)src->u[3]; 4727 } 4728 4729 static void 4730 micro_uadd(union tgsi_exec_channel *dst, 4731 const union tgsi_exec_channel *src0, 4732 const union tgsi_exec_channel *src1) 4733 { 4734 dst->u[0] = src0->u[0] + src1->u[0]; 4735 dst->u[1] = src0->u[1] + src1->u[1]; 4736 dst->u[2] = src0->u[2] + src1->u[2]; 4737 dst->u[3] = src0->u[3] + src1->u[3]; 4738 } 4739 4740 static void 4741 micro_udiv(union tgsi_exec_channel *dst, 4742 const union tgsi_exec_channel *src0, 4743 const union tgsi_exec_channel *src1) 4744 { 4745 dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u; 4746 dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u; 4747 dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u; 4748 dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u; 4749 } 4750 4751 static void 4752 micro_umad(union tgsi_exec_channel *dst, 4753 const union tgsi_exec_channel *src0, 4754 const union tgsi_exec_channel *src1, 4755 const union tgsi_exec_channel *src2) 4756 { 4757 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0]; 4758 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1]; 4759 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2]; 4760 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3]; 4761 } 4762 4763 static void 4764 micro_umax(union tgsi_exec_channel *dst, 4765 const union tgsi_exec_channel *src0, 4766 const union tgsi_exec_channel *src1) 4767 { 4768 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 4769 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 4770 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 4771 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 4772 } 4773 4774 static void 4775 micro_umin(union tgsi_exec_channel *dst, 4776 const union tgsi_exec_channel *src0, 4777 const union tgsi_exec_channel *src1) 4778 { 4779 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 4780 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 4781 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 4782 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 4783 } 4784 4785 static void 4786 micro_umod(union tgsi_exec_channel *dst, 4787 const union tgsi_exec_channel *src0, 4788 const union tgsi_exec_channel *src1) 4789 { 4790 dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u; 4791 dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u; 4792 dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u; 4793 dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u; 4794 } 4795 4796 static void 4797 micro_umul(union tgsi_exec_channel *dst, 4798 const union tgsi_exec_channel *src0, 4799 const union tgsi_exec_channel *src1) 4800 { 4801 dst->u[0] = src0->u[0] * src1->u[0]; 4802 dst->u[1] = src0->u[1] * src1->u[1]; 4803 dst->u[2] = src0->u[2] * src1->u[2]; 4804 dst->u[3] = src0->u[3] * src1->u[3]; 4805 } 4806 4807 static void 4808 micro_imul_hi(union tgsi_exec_channel *dst, 4809 const union tgsi_exec_channel *src0, 4810 const union tgsi_exec_channel *src1) 4811 { 4812 #define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32) 4813 dst->i[0] = I64M(src0->i[0], src1->i[0]); 4814 dst->i[1] = I64M(src0->i[1], src1->i[1]); 4815 dst->i[2] = I64M(src0->i[2], src1->i[2]); 4816 dst->i[3] = I64M(src0->i[3], src1->i[3]); 4817 #undef I64M 4818 } 4819 4820 static void 4821 micro_umul_hi(union tgsi_exec_channel *dst, 4822 const union tgsi_exec_channel *src0, 4823 const union tgsi_exec_channel *src1) 4824 { 4825 #define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32) 4826 dst->u[0] = U64M(src0->u[0], src1->u[0]); 4827 dst->u[1] = U64M(src0->u[1], src1->u[1]); 4828 dst->u[2] = U64M(src0->u[2], src1->u[2]); 4829 dst->u[3] = U64M(src0->u[3], src1->u[3]); 4830 #undef U64M 4831 } 4832 4833 static void 4834 micro_useq(union tgsi_exec_channel *dst, 4835 const union tgsi_exec_channel *src0, 4836 const union tgsi_exec_channel *src1) 4837 { 4838 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0; 4839 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0; 4840 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0; 4841 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0; 4842 } 4843 4844 static void 4845 micro_usge(union tgsi_exec_channel *dst, 4846 const union tgsi_exec_channel *src0, 4847 const union tgsi_exec_channel *src1) 4848 { 4849 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0; 4850 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0; 4851 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0; 4852 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0; 4853 } 4854 4855 static void 4856 micro_ushr(union tgsi_exec_channel *dst, 4857 const union tgsi_exec_channel *src0, 4858 const union tgsi_exec_channel *src1) 4859 { 4860 unsigned masked_count; 4861 masked_count = src1->u[0] & 0x1f; 4862 dst->u[0] = src0->u[0] >> masked_count; 4863 masked_count = src1->u[1] & 0x1f; 4864 dst->u[1] = src0->u[1] >> masked_count; 4865 masked_count = src1->u[2] & 0x1f; 4866 dst->u[2] = src0->u[2] >> masked_count; 4867 masked_count = src1->u[3] & 0x1f; 4868 dst->u[3] = src0->u[3] >> masked_count; 4869 } 4870 4871 static void 4872 micro_uslt(union tgsi_exec_channel *dst, 4873 const union tgsi_exec_channel *src0, 4874 const union tgsi_exec_channel *src1) 4875 { 4876 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0; 4877 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0; 4878 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0; 4879 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0; 4880 } 4881 4882 static void 4883 micro_usne(union tgsi_exec_channel *dst, 4884 const union tgsi_exec_channel *src0, 4885 const union tgsi_exec_channel *src1) 4886 { 4887 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0; 4888 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0; 4889 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0; 4890 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0; 4891 } 4892 4893 static void 4894 micro_uarl(union tgsi_exec_channel *dst, 4895 const union tgsi_exec_channel *src) 4896 { 4897 dst->i[0] = src->u[0]; 4898 dst->i[1] = src->u[1]; 4899 dst->i[2] = src->u[2]; 4900 dst->i[3] = src->u[3]; 4901 } 4902 4903 /** 4904 * Signed bitfield extract (i.e. sign-extend the extracted bits) 4905 */ 4906 static void 4907 micro_ibfe(union tgsi_exec_channel *dst, 4908 const union tgsi_exec_channel *src0, 4909 const union tgsi_exec_channel *src1, 4910 const union tgsi_exec_channel *src2) 4911 { 4912 int i; 4913 for (i = 0; i < 4; i++) { 4914 int width = src2->i[i] & 0x1f; 4915 int offset = src1->i[i] & 0x1f; 4916 if (width == 0) 4917 dst->i[i] = 0; 4918 else if (width + offset < 32) 4919 dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width); 4920 else 4921 dst->i[i] = src0->i[i] >> offset; 4922 } 4923 } 4924 4925 /** 4926 * Unsigned bitfield extract 4927 */ 4928 static void 4929 micro_ubfe(union tgsi_exec_channel *dst, 4930 const union tgsi_exec_channel *src0, 4931 const union tgsi_exec_channel *src1, 4932 const union tgsi_exec_channel *src2) 4933 { 4934 int i; 4935 for (i = 0; i < 4; i++) { 4936 int width = src2->u[i] & 0x1f; 4937 int offset = src1->u[i] & 0x1f; 4938 if (width == 0) 4939 dst->u[i] = 0; 4940 else if (width + offset < 32) 4941 dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width); 4942 else 4943 dst->u[i] = src0->u[i] >> offset; 4944 } 4945 } 4946 4947 /** 4948 * Bitfield insert: copy low bits from src1 into a region of src0. 4949 */ 4950 static void 4951 micro_bfi(union tgsi_exec_channel *dst, 4952 const union tgsi_exec_channel *src0, 4953 const union tgsi_exec_channel *src1, 4954 const union tgsi_exec_channel *src2, 4955 const union tgsi_exec_channel *src3) 4956 { 4957 int i; 4958 for (i = 0; i < 4; i++) { 4959 int width = src3->u[i] & 0x1f; 4960 int offset = src2->u[i] & 0x1f; 4961 int bitmask = ((1 << width) - 1) << offset; 4962 dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask); 4963 } 4964 } 4965 4966 static void 4967 micro_brev(union tgsi_exec_channel *dst, 4968 const union tgsi_exec_channel *src) 4969 { 4970 dst->u[0] = util_bitreverse(src->u[0]); 4971 dst->u[1] = util_bitreverse(src->u[1]); 4972 dst->u[2] = util_bitreverse(src->u[2]); 4973 dst->u[3] = util_bitreverse(src->u[3]); 4974 } 4975 4976 static void 4977 micro_popc(union tgsi_exec_channel *dst, 4978 const union tgsi_exec_channel *src) 4979 { 4980 dst->u[0] = util_bitcount(src->u[0]); 4981 dst->u[1] = util_bitcount(src->u[1]); 4982 dst->u[2] = util_bitcount(src->u[2]); 4983 dst->u[3] = util_bitcount(src->u[3]); 4984 } 4985 4986 static void 4987 micro_lsb(union tgsi_exec_channel *dst, 4988 const union tgsi_exec_channel *src) 4989 { 4990 dst->i[0] = ffs(src->u[0]) - 1; 4991 dst->i[1] = ffs(src->u[1]) - 1; 4992 dst->i[2] = ffs(src->u[2]) - 1; 4993 dst->i[3] = ffs(src->u[3]) - 1; 4994 } 4995 4996 static void 4997 micro_imsb(union tgsi_exec_channel *dst, 4998 const union tgsi_exec_channel *src) 4999 { 5000 dst->i[0] = util_last_bit_signed(src->i[0]) - 1; 5001 dst->i[1] = util_last_bit_signed(src->i[1]) - 1; 5002 dst->i[2] = util_last_bit_signed(src->i[2]) - 1; 5003 dst->i[3] = util_last_bit_signed(src->i[3]) - 1; 5004 } 5005 5006 static void 5007 micro_umsb(union tgsi_exec_channel *dst, 5008 const union tgsi_exec_channel *src) 5009 { 5010 dst->i[0] = util_last_bit(src->u[0]) - 1; 5011 dst->i[1] = util_last_bit(src->u[1]) - 1; 5012 dst->i[2] = util_last_bit(src->u[2]) - 1; 5013 dst->i[3] = util_last_bit(src->u[3]) - 1; 5014 } 5015 5016 /** 5017 * Execute a TGSI instruction. 5018 * Returns TRUE if a barrier instruction is hit, 5019 * otherwise FALSE. 5020 */ 5021 static boolean 5022 exec_instruction( 5023 struct tgsi_exec_machine *mach, 5024 const struct tgsi_full_instruction *inst, 5025 int *pc ) 5026 { 5027 union tgsi_exec_channel r[10]; 5028 5029 (*pc)++; 5030 5031 switch (inst->Instruction.Opcode) { 5032 case TGSI_OPCODE_ARL: 5033 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 5034 break; 5035 5036 case TGSI_OPCODE_MOV: 5037 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5038 break; 5039 5040 case TGSI_OPCODE_LIT: 5041 exec_lit(mach, inst); 5042 break; 5043 5044 case TGSI_OPCODE_RCP: 5045 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5046 break; 5047 5048 case TGSI_OPCODE_RSQ: 5049 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5050 break; 5051 5052 case TGSI_OPCODE_EXP: 5053 exec_exp(mach, inst); 5054 break; 5055 5056 case TGSI_OPCODE_LOG: 5057 exec_log(mach, inst); 5058 break; 5059 5060 case TGSI_OPCODE_MUL: 5061 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5062 break; 5063 5064 case TGSI_OPCODE_ADD: 5065 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5066 break; 5067 5068 case TGSI_OPCODE_DP3: 5069 exec_dp3(mach, inst); 5070 break; 5071 5072 case TGSI_OPCODE_DP4: 5073 exec_dp4(mach, inst); 5074 break; 5075 5076 case TGSI_OPCODE_DST: 5077 exec_dst(mach, inst); 5078 break; 5079 5080 case TGSI_OPCODE_MIN: 5081 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5082 break; 5083 5084 case TGSI_OPCODE_MAX: 5085 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5086 break; 5087 5088 case TGSI_OPCODE_SLT: 5089 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5090 break; 5091 5092 case TGSI_OPCODE_SGE: 5093 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5094 break; 5095 5096 case TGSI_OPCODE_MAD: 5097 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5098 break; 5099 5100 case TGSI_OPCODE_LRP: 5101 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5102 break; 5103 5104 case TGSI_OPCODE_SQRT: 5105 exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5106 break; 5107 5108 case TGSI_OPCODE_FRC: 5109 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5110 break; 5111 5112 case TGSI_OPCODE_FLR: 5113 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5114 break; 5115 5116 case TGSI_OPCODE_ROUND: 5117 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5118 break; 5119 5120 case TGSI_OPCODE_EX2: 5121 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5122 break; 5123 5124 case TGSI_OPCODE_LG2: 5125 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5126 break; 5127 5128 case TGSI_OPCODE_POW: 5129 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5130 break; 5131 5132 case TGSI_OPCODE_LDEXP: 5133 exec_vector_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5134 break; 5135 5136 case TGSI_OPCODE_COS: 5137 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5138 break; 5139 5140 case TGSI_OPCODE_DDX: 5141 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5142 break; 5143 5144 case TGSI_OPCODE_DDY: 5145 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5146 break; 5147 5148 case TGSI_OPCODE_KILL: 5149 exec_kill (mach, inst); 5150 break; 5151 5152 case TGSI_OPCODE_KILL_IF: 5153 exec_kill_if (mach, inst); 5154 break; 5155 5156 case TGSI_OPCODE_PK2H: 5157 exec_pk2h(mach, inst); 5158 break; 5159 5160 case TGSI_OPCODE_PK2US: 5161 assert (0); 5162 break; 5163 5164 case TGSI_OPCODE_PK4B: 5165 assert (0); 5166 break; 5167 5168 case TGSI_OPCODE_PK4UB: 5169 assert (0); 5170 break; 5171 5172 case TGSI_OPCODE_SEQ: 5173 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5174 break; 5175 5176 case TGSI_OPCODE_SGT: 5177 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5178 break; 5179 5180 case TGSI_OPCODE_SIN: 5181 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5182 break; 5183 5184 case TGSI_OPCODE_SLE: 5185 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5186 break; 5187 5188 case TGSI_OPCODE_SNE: 5189 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5190 break; 5191 5192 case TGSI_OPCODE_TEX: 5193 /* simple texture lookup */ 5194 /* src[0] = texcoord */ 5195 /* src[1] = sampler unit */ 5196 exec_tex(mach, inst, TEX_MODIFIER_NONE, 1); 5197 break; 5198 5199 case TGSI_OPCODE_TXB: 5200 /* Texture lookup with lod bias */ 5201 /* src[0] = texcoord (src[0].w = LOD bias) */ 5202 /* src[1] = sampler unit */ 5203 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1); 5204 break; 5205 5206 case TGSI_OPCODE_TXD: 5207 /* Texture lookup with explict partial derivatives */ 5208 /* src[0] = texcoord */ 5209 /* src[1] = d[strq]/dx */ 5210 /* src[2] = d[strq]/dy */ 5211 /* src[3] = sampler unit */ 5212 exec_txd(mach, inst); 5213 break; 5214 5215 case TGSI_OPCODE_TXL: 5216 /* Texture lookup with explit LOD */ 5217 /* src[0] = texcoord (src[0].w = LOD) */ 5218 /* src[1] = sampler unit */ 5219 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1); 5220 break; 5221 5222 case TGSI_OPCODE_TXP: 5223 /* Texture lookup with projection */ 5224 /* src[0] = texcoord (src[0].w = projection) */ 5225 /* src[1] = sampler unit */ 5226 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1); 5227 break; 5228 5229 case TGSI_OPCODE_TG4: 5230 /* src[0] = texcoord */ 5231 /* src[1] = component */ 5232 /* src[2] = sampler unit */ 5233 exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2); 5234 break; 5235 5236 case TGSI_OPCODE_LODQ: 5237 /* src[0] = texcoord */ 5238 /* src[1] = sampler unit */ 5239 exec_lodq(mach, inst); 5240 break; 5241 5242 case TGSI_OPCODE_UP2H: 5243 exec_up2h(mach, inst); 5244 break; 5245 5246 case TGSI_OPCODE_UP2US: 5247 assert (0); 5248 break; 5249 5250 case TGSI_OPCODE_UP4B: 5251 assert (0); 5252 break; 5253 5254 case TGSI_OPCODE_UP4UB: 5255 assert (0); 5256 break; 5257 5258 case TGSI_OPCODE_ARR: 5259 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 5260 break; 5261 5262 case TGSI_OPCODE_CAL: 5263 /* skip the call if no execution channels are enabled */ 5264 if (mach->ExecMask) { 5265 /* do the call */ 5266 5267 /* First, record the depths of the execution stacks. 5268 * This is important for deeply nested/looped return statements. 5269 * We have to unwind the stacks by the correct amount. For a 5270 * real code generator, we could determine the number of entries 5271 * to pop off each stack with simple static analysis and avoid 5272 * implementing this data structure at run time. 5273 */ 5274 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 5275 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 5276 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 5277 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; 5278 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; 5279 /* note that PC was already incremented above */ 5280 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 5281 5282 mach->CallStackTop++; 5283 5284 /* Second, push the Cond, Loop, Cont, Func stacks */ 5285 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5286 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5287 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5288 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 5289 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 5290 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 5291 5292 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5293 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 5294 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 5295 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 5296 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 5297 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 5298 5299 /* Finally, jump to the subroutine. The label is a pointer 5300 * (an instruction number) to the BGNSUB instruction. 5301 */ 5302 *pc = inst->Label.Label; 5303 assert(mach->Instructions[*pc].Instruction.Opcode 5304 == TGSI_OPCODE_BGNSUB); 5305 } 5306 break; 5307 5308 case TGSI_OPCODE_RET: 5309 mach->FuncMask &= ~mach->ExecMask; 5310 UPDATE_EXEC_MASK(mach); 5311 5312 if (mach->FuncMask == 0x0) { 5313 /* really return now (otherwise, keep executing */ 5314 5315 if (mach->CallStackTop == 0) { 5316 /* returning from main() */ 5317 mach->CondStackTop = 0; 5318 mach->LoopStackTop = 0; 5319 mach->ContStackTop = 0; 5320 mach->LoopLabelStackTop = 0; 5321 mach->SwitchStackTop = 0; 5322 mach->BreakStackTop = 0; 5323 *pc = -1; 5324 return FALSE; 5325 } 5326 5327 assert(mach->CallStackTop > 0); 5328 mach->CallStackTop--; 5329 5330 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 5331 mach->CondMask = mach->CondStack[mach->CondStackTop]; 5332 5333 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 5334 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 5335 5336 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 5337 mach->ContMask = mach->ContStack[mach->ContStackTop]; 5338 5339 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 5340 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 5341 5342 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 5343 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 5344 5345 assert(mach->FuncStackTop > 0); 5346 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 5347 5348 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 5349 5350 UPDATE_EXEC_MASK(mach); 5351 } 5352 break; 5353 5354 case TGSI_OPCODE_SSG: 5355 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5356 break; 5357 5358 case TGSI_OPCODE_CMP: 5359 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5360 break; 5361 5362 case TGSI_OPCODE_DIV: 5363 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5364 break; 5365 5366 case TGSI_OPCODE_DP2: 5367 exec_dp2(mach, inst); 5368 break; 5369 5370 case TGSI_OPCODE_IF: 5371 /* push CondMask */ 5372 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5373 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5374 FETCH( &r[0], 0, TGSI_CHAN_X ); 5375 /* update CondMask */ 5376 if( ! r[0].f[0] ) { 5377 mach->CondMask &= ~0x1; 5378 } 5379 if( ! r[0].f[1] ) { 5380 mach->CondMask &= ~0x2; 5381 } 5382 if( ! r[0].f[2] ) { 5383 mach->CondMask &= ~0x4; 5384 } 5385 if( ! r[0].f[3] ) { 5386 mach->CondMask &= ~0x8; 5387 } 5388 UPDATE_EXEC_MASK(mach); 5389 /* Todo: If CondMask==0, jump to ELSE */ 5390 break; 5391 5392 case TGSI_OPCODE_UIF: 5393 /* push CondMask */ 5394 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5395 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5396 IFETCH( &r[0], 0, TGSI_CHAN_X ); 5397 /* update CondMask */ 5398 if( ! r[0].u[0] ) { 5399 mach->CondMask &= ~0x1; 5400 } 5401 if( ! r[0].u[1] ) { 5402 mach->CondMask &= ~0x2; 5403 } 5404 if( ! r[0].u[2] ) { 5405 mach->CondMask &= ~0x4; 5406 } 5407 if( ! r[0].u[3] ) { 5408 mach->CondMask &= ~0x8; 5409 } 5410 UPDATE_EXEC_MASK(mach); 5411 /* Todo: If CondMask==0, jump to ELSE */ 5412 break; 5413 5414 case TGSI_OPCODE_ELSE: 5415 /* invert CondMask wrt previous mask */ 5416 { 5417 uint prevMask; 5418 assert(mach->CondStackTop > 0); 5419 prevMask = mach->CondStack[mach->CondStackTop - 1]; 5420 mach->CondMask = ~mach->CondMask & prevMask; 5421 UPDATE_EXEC_MASK(mach); 5422 /* Todo: If CondMask==0, jump to ENDIF */ 5423 } 5424 break; 5425 5426 case TGSI_OPCODE_ENDIF: 5427 /* pop CondMask */ 5428 assert(mach->CondStackTop > 0); 5429 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 5430 UPDATE_EXEC_MASK(mach); 5431 break; 5432 5433 case TGSI_OPCODE_END: 5434 /* make sure we end primitives which haven't 5435 * been explicitly emitted */ 5436 conditional_emit_primitive(mach); 5437 /* halt execution */ 5438 *pc = -1; 5439 break; 5440 5441 case TGSI_OPCODE_CEIL: 5442 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5443 break; 5444 5445 case TGSI_OPCODE_I2F: 5446 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); 5447 break; 5448 5449 case TGSI_OPCODE_NOT: 5450 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5451 break; 5452 5453 case TGSI_OPCODE_TRUNC: 5454 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5455 break; 5456 5457 case TGSI_OPCODE_SHL: 5458 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5459 break; 5460 5461 case TGSI_OPCODE_AND: 5462 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5463 break; 5464 5465 case TGSI_OPCODE_OR: 5466 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5467 break; 5468 5469 case TGSI_OPCODE_MOD: 5470 exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5471 break; 5472 5473 case TGSI_OPCODE_XOR: 5474 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5475 break; 5476 5477 case TGSI_OPCODE_TXF: 5478 exec_txf(mach, inst); 5479 break; 5480 5481 case TGSI_OPCODE_TXQ: 5482 exec_txq(mach, inst); 5483 break; 5484 5485 case TGSI_OPCODE_EMIT: 5486 emit_vertex(mach); 5487 break; 5488 5489 case TGSI_OPCODE_ENDPRIM: 5490 emit_primitive(mach); 5491 break; 5492 5493 case TGSI_OPCODE_BGNLOOP: 5494 /* push LoopMask and ContMasks */ 5495 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5496 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5497 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5498 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 5499 5500 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 5501 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 5502 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 5503 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 5504 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; 5505 break; 5506 5507 case TGSI_OPCODE_ENDLOOP: 5508 /* Restore ContMask, but don't pop */ 5509 assert(mach->ContStackTop > 0); 5510 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 5511 UPDATE_EXEC_MASK(mach); 5512 if (mach->ExecMask) { 5513 /* repeat loop: jump to instruction just past BGNLOOP */ 5514 assert(mach->LoopLabelStackTop > 0); 5515 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 5516 } 5517 else { 5518 /* exit loop: pop LoopMask */ 5519 assert(mach->LoopStackTop > 0); 5520 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 5521 /* pop ContMask */ 5522 assert(mach->ContStackTop > 0); 5523 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 5524 assert(mach->LoopLabelStackTop > 0); 5525 --mach->LoopLabelStackTop; 5526 5527 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 5528 } 5529 UPDATE_EXEC_MASK(mach); 5530 break; 5531 5532 case TGSI_OPCODE_BRK: 5533 exec_break(mach); 5534 break; 5535 5536 case TGSI_OPCODE_CONT: 5537 /* turn off cont channels for each enabled exec channel */ 5538 mach->ContMask &= ~mach->ExecMask; 5539 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 5540 UPDATE_EXEC_MASK(mach); 5541 break; 5542 5543 case TGSI_OPCODE_BGNSUB: 5544 /* no-op */ 5545 break; 5546 5547 case TGSI_OPCODE_ENDSUB: 5548 /* 5549 * XXX: This really should be a no-op. We should never reach this opcode. 5550 */ 5551 5552 assert(mach->CallStackTop > 0); 5553 mach->CallStackTop--; 5554 5555 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 5556 mach->CondMask = mach->CondStack[mach->CondStackTop]; 5557 5558 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 5559 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 5560 5561 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 5562 mach->ContMask = mach->ContStack[mach->ContStackTop]; 5563 5564 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 5565 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 5566 5567 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 5568 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 5569 5570 assert(mach->FuncStackTop > 0); 5571 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 5572 5573 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 5574 5575 UPDATE_EXEC_MASK(mach); 5576 break; 5577 5578 case TGSI_OPCODE_NOP: 5579 break; 5580 5581 case TGSI_OPCODE_F2I: 5582 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 5583 break; 5584 5585 case TGSI_OPCODE_FSEQ: 5586 exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5587 break; 5588 5589 case TGSI_OPCODE_FSGE: 5590 exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5591 break; 5592 5593 case TGSI_OPCODE_FSLT: 5594 exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5595 break; 5596 5597 case TGSI_OPCODE_FSNE: 5598 exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5599 break; 5600 5601 case TGSI_OPCODE_IDIV: 5602 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5603 break; 5604 5605 case TGSI_OPCODE_IMAX: 5606 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5607 break; 5608 5609 case TGSI_OPCODE_IMIN: 5610 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5611 break; 5612 5613 case TGSI_OPCODE_INEG: 5614 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5615 break; 5616 5617 case TGSI_OPCODE_ISGE: 5618 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5619 break; 5620 5621 case TGSI_OPCODE_ISHR: 5622 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5623 break; 5624 5625 case TGSI_OPCODE_ISLT: 5626 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5627 break; 5628 5629 case TGSI_OPCODE_F2U: 5630 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5631 break; 5632 5633 case TGSI_OPCODE_U2F: 5634 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); 5635 break; 5636 5637 case TGSI_OPCODE_UADD: 5638 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5639 break; 5640 5641 case TGSI_OPCODE_UDIV: 5642 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5643 break; 5644 5645 case TGSI_OPCODE_UMAD: 5646 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5647 break; 5648 5649 case TGSI_OPCODE_UMAX: 5650 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5651 break; 5652 5653 case TGSI_OPCODE_UMIN: 5654 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5655 break; 5656 5657 case TGSI_OPCODE_UMOD: 5658 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5659 break; 5660 5661 case TGSI_OPCODE_UMUL: 5662 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5663 break; 5664 5665 case TGSI_OPCODE_IMUL_HI: 5666 exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5667 break; 5668 5669 case TGSI_OPCODE_UMUL_HI: 5670 exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5671 break; 5672 5673 case TGSI_OPCODE_USEQ: 5674 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5675 break; 5676 5677 case TGSI_OPCODE_USGE: 5678 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5679 break; 5680 5681 case TGSI_OPCODE_USHR: 5682 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5683 break; 5684 5685 case TGSI_OPCODE_USLT: 5686 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5687 break; 5688 5689 case TGSI_OPCODE_USNE: 5690 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5691 break; 5692 5693 case TGSI_OPCODE_SWITCH: 5694 exec_switch(mach, inst); 5695 break; 5696 5697 case TGSI_OPCODE_CASE: 5698 exec_case(mach, inst); 5699 break; 5700 5701 case TGSI_OPCODE_DEFAULT: 5702 exec_default(mach); 5703 break; 5704 5705 case TGSI_OPCODE_ENDSWITCH: 5706 exec_endswitch(mach); 5707 break; 5708 5709 case TGSI_OPCODE_SAMPLE_I: 5710 exec_txf(mach, inst); 5711 break; 5712 5713 case TGSI_OPCODE_SAMPLE_I_MS: 5714 exec_txf(mach, inst); 5715 break; 5716 5717 case TGSI_OPCODE_SAMPLE: 5718 exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE); 5719 break; 5720 5721 case TGSI_OPCODE_SAMPLE_B: 5722 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE); 5723 break; 5724 5725 case TGSI_OPCODE_SAMPLE_C: 5726 exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE); 5727 break; 5728 5729 case TGSI_OPCODE_SAMPLE_C_LZ: 5730 exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE); 5731 break; 5732 5733 case TGSI_OPCODE_SAMPLE_D: 5734 exec_sample_d(mach, inst); 5735 break; 5736 5737 case TGSI_OPCODE_SAMPLE_L: 5738 exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE); 5739 break; 5740 5741 case TGSI_OPCODE_GATHER4: 5742 exec_sample(mach, inst, TEX_MODIFIER_GATHER, FALSE); 5743 break; 5744 5745 case TGSI_OPCODE_SVIEWINFO: 5746 exec_txq(mach, inst); 5747 break; 5748 5749 case TGSI_OPCODE_SAMPLE_POS: 5750 assert(0); 5751 break; 5752 5753 case TGSI_OPCODE_SAMPLE_INFO: 5754 assert(0); 5755 break; 5756 5757 case TGSI_OPCODE_LOD: 5758 exec_lodq(mach, inst); 5759 break; 5760 5761 case TGSI_OPCODE_UARL: 5762 exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); 5763 break; 5764 5765 case TGSI_OPCODE_UCMP: 5766 exec_ucmp(mach, inst); 5767 break; 5768 5769 case TGSI_OPCODE_IABS: 5770 exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5771 break; 5772 5773 case TGSI_OPCODE_ISSG: 5774 exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5775 break; 5776 5777 case TGSI_OPCODE_TEX2: 5778 /* simple texture lookup */ 5779 /* src[0] = texcoord */ 5780 /* src[1] = compare */ 5781 /* src[2] = sampler unit */ 5782 exec_tex(mach, inst, TEX_MODIFIER_NONE, 2); 5783 break; 5784 case TGSI_OPCODE_TXB2: 5785 /* simple texture lookup */ 5786 /* src[0] = texcoord */ 5787 /* src[1] = bias */ 5788 /* src[2] = sampler unit */ 5789 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2); 5790 break; 5791 case TGSI_OPCODE_TXL2: 5792 /* simple texture lookup */ 5793 /* src[0] = texcoord */ 5794 /* src[1] = lod */ 5795 /* src[2] = sampler unit */ 5796 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2); 5797 break; 5798 5799 case TGSI_OPCODE_IBFE: 5800 exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5801 break; 5802 case TGSI_OPCODE_UBFE: 5803 exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5804 break; 5805 case TGSI_OPCODE_BFI: 5806 exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5807 break; 5808 case TGSI_OPCODE_BREV: 5809 exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5810 break; 5811 case TGSI_OPCODE_POPC: 5812 exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5813 break; 5814 case TGSI_OPCODE_LSB: 5815 exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); 5816 break; 5817 case TGSI_OPCODE_IMSB: 5818 exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5819 break; 5820 case TGSI_OPCODE_UMSB: 5821 exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); 5822 break; 5823 5824 case TGSI_OPCODE_F2D: 5825 exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT); 5826 break; 5827 5828 case TGSI_OPCODE_D2F: 5829 exec_64_2_t(mach, inst, micro_d2f, TGSI_EXEC_DATA_FLOAT); 5830 break; 5831 5832 case TGSI_OPCODE_DABS: 5833 exec_double_unary(mach, inst, micro_dabs); 5834 break; 5835 5836 case TGSI_OPCODE_DNEG: 5837 exec_double_unary(mach, inst, micro_dneg); 5838 break; 5839 5840 case TGSI_OPCODE_DADD: 5841 exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE); 5842 break; 5843 5844 case TGSI_OPCODE_DDIV: 5845 exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE); 5846 break; 5847 5848 case TGSI_OPCODE_DMUL: 5849 exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE); 5850 break; 5851 5852 case TGSI_OPCODE_DMAX: 5853 exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE); 5854 break; 5855 5856 case TGSI_OPCODE_DMIN: 5857 exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE); 5858 break; 5859 5860 case TGSI_OPCODE_DSLT: 5861 exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT); 5862 break; 5863 5864 case TGSI_OPCODE_DSGE: 5865 exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT); 5866 break; 5867 5868 case TGSI_OPCODE_DSEQ: 5869 exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT); 5870 break; 5871 5872 case TGSI_OPCODE_DSNE: 5873 exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT); 5874 break; 5875 5876 case TGSI_OPCODE_DRCP: 5877 exec_double_unary(mach, inst, micro_drcp); 5878 break; 5879 5880 case TGSI_OPCODE_DSQRT: 5881 exec_double_unary(mach, inst, micro_dsqrt); 5882 break; 5883 5884 case TGSI_OPCODE_DRSQ: 5885 exec_double_unary(mach, inst, micro_drsq); 5886 break; 5887 5888 case TGSI_OPCODE_DMAD: 5889 exec_double_trinary(mach, inst, micro_dmad); 5890 break; 5891 5892 case TGSI_OPCODE_DFRAC: 5893 exec_double_unary(mach, inst, micro_dfrac); 5894 break; 5895 5896 case TGSI_OPCODE_DLDEXP: 5897 exec_dldexp(mach, inst); 5898 break; 5899 5900 case TGSI_OPCODE_DFRACEXP: 5901 exec_dfracexp(mach, inst); 5902 break; 5903 5904 case TGSI_OPCODE_I2D: 5905 exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_INT); 5906 break; 5907 5908 case TGSI_OPCODE_D2I: 5909 exec_64_2_t(mach, inst, micro_d2i, TGSI_EXEC_DATA_INT); 5910 break; 5911 5912 case TGSI_OPCODE_U2D: 5913 exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_UINT); 5914 break; 5915 5916 case TGSI_OPCODE_D2U: 5917 exec_64_2_t(mach, inst, micro_d2u, TGSI_EXEC_DATA_INT); 5918 break; 5919 5920 case TGSI_OPCODE_LOAD: 5921 exec_load(mach, inst); 5922 break; 5923 5924 case TGSI_OPCODE_STORE: 5925 exec_store(mach, inst); 5926 break; 5927 5928 case TGSI_OPCODE_ATOMUADD: 5929 case TGSI_OPCODE_ATOMXCHG: 5930 case TGSI_OPCODE_ATOMCAS: 5931 case TGSI_OPCODE_ATOMAND: 5932 case TGSI_OPCODE_ATOMOR: 5933 case TGSI_OPCODE_ATOMXOR: 5934 case TGSI_OPCODE_ATOMUMIN: 5935 case TGSI_OPCODE_ATOMUMAX: 5936 case TGSI_OPCODE_ATOMIMIN: 5937 case TGSI_OPCODE_ATOMIMAX: 5938 exec_atomop(mach, inst); 5939 break; 5940 5941 case TGSI_OPCODE_RESQ: 5942 exec_resq(mach, inst); 5943 break; 5944 case TGSI_OPCODE_BARRIER: 5945 case TGSI_OPCODE_MEMBAR: 5946 return TRUE; 5947 break; 5948 5949 case TGSI_OPCODE_I64ABS: 5950 exec_double_unary(mach, inst, micro_i64abs); 5951 break; 5952 5953 case TGSI_OPCODE_I64SSG: 5954 exec_double_unary(mach, inst, micro_i64sgn); 5955 break; 5956 5957 case TGSI_OPCODE_I64NEG: 5958 exec_double_unary(mach, inst, micro_i64neg); 5959 break; 5960 5961 case TGSI_OPCODE_U64SEQ: 5962 exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT); 5963 break; 5964 5965 case TGSI_OPCODE_U64SNE: 5966 exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT); 5967 break; 5968 5969 case TGSI_OPCODE_I64SLT: 5970 exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT); 5971 break; 5972 case TGSI_OPCODE_U64SLT: 5973 exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT); 5974 break; 5975 5976 case TGSI_OPCODE_I64SGE: 5977 exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT); 5978 break; 5979 case TGSI_OPCODE_U64SGE: 5980 exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT); 5981 break; 5982 5983 case TGSI_OPCODE_I64MIN: 5984 exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64); 5985 break; 5986 case TGSI_OPCODE_U64MIN: 5987 exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64); 5988 break; 5989 case TGSI_OPCODE_I64MAX: 5990 exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64); 5991 break; 5992 case TGSI_OPCODE_U64MAX: 5993 exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64); 5994 break; 5995 case TGSI_OPCODE_U64ADD: 5996 exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64); 5997 break; 5998 case TGSI_OPCODE_U64MUL: 5999 exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64); 6000 break; 6001 case TGSI_OPCODE_U64SHL: 6002 exec_arg0_64_arg1_32(mach, inst, micro_u64shl); 6003 break; 6004 case TGSI_OPCODE_I64SHR: 6005 exec_arg0_64_arg1_32(mach, inst, micro_i64shr); 6006 break; 6007 case TGSI_OPCODE_U64SHR: 6008 exec_arg0_64_arg1_32(mach, inst, micro_u64shr); 6009 break; 6010 case TGSI_OPCODE_U64DIV: 6011 exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64); 6012 break; 6013 case TGSI_OPCODE_I64DIV: 6014 exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64); 6015 break; 6016 case TGSI_OPCODE_U64MOD: 6017 exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64); 6018 break; 6019 case TGSI_OPCODE_I64MOD: 6020 exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64); 6021 break; 6022 6023 case TGSI_OPCODE_F2U64: 6024 exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT); 6025 break; 6026 6027 case TGSI_OPCODE_F2I64: 6028 exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT); 6029 break; 6030 6031 case TGSI_OPCODE_U2I64: 6032 exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT); 6033 break; 6034 case TGSI_OPCODE_I2I64: 6035 exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT); 6036 break; 6037 6038 case TGSI_OPCODE_D2U64: 6039 exec_double_unary(mach, inst, micro_d2u64); 6040 break; 6041 6042 case TGSI_OPCODE_D2I64: 6043 exec_double_unary(mach, inst, micro_d2i64); 6044 break; 6045 6046 case TGSI_OPCODE_U642F: 6047 exec_64_2_t(mach, inst, micro_u642f, TGSI_EXEC_DATA_FLOAT); 6048 break; 6049 case TGSI_OPCODE_I642F: 6050 exec_64_2_t(mach, inst, micro_i642f, TGSI_EXEC_DATA_FLOAT); 6051 break; 6052 6053 case TGSI_OPCODE_U642D: 6054 exec_double_unary(mach, inst, micro_u642d); 6055 break; 6056 case TGSI_OPCODE_I642D: 6057 exec_double_unary(mach, inst, micro_i642d); 6058 break; 6059 6060 default: 6061 assert( 0 ); 6062 } 6063 return FALSE; 6064 } 6065 6066 static void 6067 tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach) 6068 { 6069 uint default_mask = 0xf; 6070 6071 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 6072 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 6073 6074 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { 6075 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 6076 mach->Primitives[0] = 0; 6077 /* GS runs on a single primitive for now */ 6078 default_mask = 0x1; 6079 } 6080 6081 if (mach->NonHelperMask == 0) 6082 mach->NonHelperMask = default_mask; 6083 mach->CondMask = default_mask; 6084 mach->LoopMask = default_mask; 6085 mach->ContMask = default_mask; 6086 mach->FuncMask = default_mask; 6087 mach->ExecMask = default_mask; 6088 6089 mach->Switch.mask = default_mask; 6090 6091 assert(mach->CondStackTop == 0); 6092 assert(mach->LoopStackTop == 0); 6093 assert(mach->ContStackTop == 0); 6094 assert(mach->SwitchStackTop == 0); 6095 assert(mach->BreakStackTop == 0); 6096 assert(mach->CallStackTop == 0); 6097 } 6098 6099 /** 6100 * Run TGSI interpreter. 6101 * \return bitmask of "alive" quad components 6102 */ 6103 uint 6104 tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc ) 6105 { 6106 uint i; 6107 6108 mach->pc = start_pc; 6109 6110 if (!start_pc) { 6111 tgsi_exec_machine_setup_masks(mach); 6112 6113 /* execute declarations (interpolants) */ 6114 for (i = 0; i < mach->NumDeclarations; i++) { 6115 exec_declaration( mach, mach->Declarations+i ); 6116 } 6117 } 6118 6119 { 6120 #if DEBUG_EXECUTION 6121 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; 6122 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 6123 uint inst = 1; 6124 6125 if (!start_pc) { 6126 memset(mach->Temps, 0, sizeof(temps)); 6127 if (mach->Outputs) 6128 memset(mach->Outputs, 0, sizeof(outputs)); 6129 memset(temps, 0, sizeof(temps)); 6130 memset(outputs, 0, sizeof(outputs)); 6131 } 6132 #endif 6133 6134 /* execute instructions, until pc is set to -1 */ 6135 while (mach->pc != -1) { 6136 boolean barrier_hit; 6137 #if DEBUG_EXECUTION 6138 uint i; 6139 6140 tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++); 6141 #endif 6142 6143 assert(mach->pc < (int) mach->NumInstructions); 6144 barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc); 6145 6146 /* for compute shaders if we hit a barrier return now for later rescheduling */ 6147 if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE) 6148 return 0; 6149 6150 #if DEBUG_EXECUTION 6151 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { 6152 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 6153 uint j; 6154 6155 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 6156 debug_printf("TEMP[%2u] = ", i); 6157 for (j = 0; j < 4; j++) { 6158 if (j > 0) { 6159 debug_printf(" "); 6160 } 6161 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 6162 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], 6163 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], 6164 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], 6165 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); 6166 } 6167 } 6168 } 6169 if (mach->Outputs) { 6170 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 6171 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 6172 uint j; 6173 6174 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 6175 debug_printf("OUT[%2u] = ", i); 6176 for (j = 0; j < 4; j++) { 6177 if (j > 0) { 6178 debug_printf(" "); 6179 } 6180 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 6181 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], 6182 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], 6183 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], 6184 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); 6185 } 6186 } 6187 } 6188 } 6189 #endif 6190 } 6191 } 6192 6193 #if 0 6194 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 6195 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { 6196 /* 6197 * Scale back depth component. 6198 */ 6199 for (i = 0; i < 4; i++) 6200 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 6201 } 6202 #endif 6203 6204 /* Strictly speaking, these assertions aren't really needed but they 6205 * can potentially catch some bugs in the control flow code. 6206 */ 6207 assert(mach->CondStackTop == 0); 6208 assert(mach->LoopStackTop == 0); 6209 assert(mach->ContStackTop == 0); 6210 assert(mach->SwitchStackTop == 0); 6211 assert(mach->BreakStackTop == 0); 6212 assert(mach->CallStackTop == 0); 6213 6214 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 6215 } 6216