1 /************************************************************************** 2 * 3 * Copyright 2007-2008 VMware, Inc. 4 * All Rights Reserved. 5 * Copyright 2009-2010 VMware, Inc. All rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29 /** 30 * TGSI interpreter/executor. 31 * 32 * Flow control information: 33 * 34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 36 * care since a condition may be true for some quad components but false 37 * for other components. 38 * 39 * We basically execute all statements (even if they're in the part of 40 * an IF/ELSE clause that's "not taken") and use a special mask to 41 * control writing to destination registers. This is the ExecMask. 42 * See store_dest(). 43 * 44 * The ExecMask is computed from three other masks (CondMask, LoopMask and 45 * ContMask) which are controlled by the flow control instructions (namely: 46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 47 * 48 * 49 * Authors: 50 * Michal Krol 51 * Brian Paul 52 */ 53 54 #include "pipe/p_compiler.h" 55 #include "pipe/p_state.h" 56 #include "pipe/p_shader_tokens.h" 57 #include "tgsi/tgsi_dump.h" 58 #include "tgsi/tgsi_parse.h" 59 #include "tgsi/tgsi_util.h" 60 #include "tgsi_exec.h" 61 #include "util/u_half.h" 62 #include "util/u_memory.h" 63 #include "util/u_math.h" 64 #include "util/rounding.h" 65 66 67 #define DEBUG_EXECUTION 0 68 69 70 #define FAST_MATH 0 71 72 #define TILE_TOP_LEFT 0 73 #define TILE_TOP_RIGHT 1 74 #define TILE_BOTTOM_LEFT 2 75 #define TILE_BOTTOM_RIGHT 3 76 77 union tgsi_double_channel { 78 double d[TGSI_QUAD_SIZE]; 79 unsigned u[TGSI_QUAD_SIZE][2]; 80 uint64_t u64[TGSI_QUAD_SIZE]; 81 int64_t i64[TGSI_QUAD_SIZE]; 82 }; 83 84 struct tgsi_double_vector { 85 union tgsi_double_channel xy; 86 union tgsi_double_channel zw; 87 }; 88 89 static void 90 micro_abs(union tgsi_exec_channel *dst, 91 const union tgsi_exec_channel *src) 92 { 93 dst->f[0] = fabsf(src->f[0]); 94 dst->f[1] = fabsf(src->f[1]); 95 dst->f[2] = fabsf(src->f[2]); 96 dst->f[3] = fabsf(src->f[3]); 97 } 98 99 static void 100 micro_arl(union tgsi_exec_channel *dst, 101 const union tgsi_exec_channel *src) 102 { 103 dst->i[0] = (int)floorf(src->f[0]); 104 dst->i[1] = (int)floorf(src->f[1]); 105 dst->i[2] = (int)floorf(src->f[2]); 106 dst->i[3] = (int)floorf(src->f[3]); 107 } 108 109 static void 110 micro_arr(union tgsi_exec_channel *dst, 111 const union tgsi_exec_channel *src) 112 { 113 dst->i[0] = (int)floorf(src->f[0] + 0.5f); 114 dst->i[1] = (int)floorf(src->f[1] + 0.5f); 115 dst->i[2] = (int)floorf(src->f[2] + 0.5f); 116 dst->i[3] = (int)floorf(src->f[3] + 0.5f); 117 } 118 119 static void 120 micro_ceil(union tgsi_exec_channel *dst, 121 const union tgsi_exec_channel *src) 122 { 123 dst->f[0] = ceilf(src->f[0]); 124 dst->f[1] = ceilf(src->f[1]); 125 dst->f[2] = ceilf(src->f[2]); 126 dst->f[3] = ceilf(src->f[3]); 127 } 128 129 static void 130 micro_clamp(union tgsi_exec_channel *dst, 131 const union tgsi_exec_channel *src0, 132 const union tgsi_exec_channel *src1, 133 const union tgsi_exec_channel *src2) 134 { 135 dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0]; 136 dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1]; 137 dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2]; 138 dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3]; 139 } 140 141 static void 142 micro_cmp(union tgsi_exec_channel *dst, 143 const union tgsi_exec_channel *src0, 144 const union tgsi_exec_channel *src1, 145 const union tgsi_exec_channel *src2) 146 { 147 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0]; 148 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1]; 149 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2]; 150 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; 151 } 152 153 static void 154 micro_cos(union tgsi_exec_channel *dst, 155 const union tgsi_exec_channel *src) 156 { 157 dst->f[0] = cosf(src->f[0]); 158 dst->f[1] = cosf(src->f[1]); 159 dst->f[2] = cosf(src->f[2]); 160 dst->f[3] = cosf(src->f[3]); 161 } 162 163 static void 164 micro_d2f(union tgsi_exec_channel *dst, 165 const union tgsi_double_channel *src) 166 { 167 dst->f[0] = (float)src->d[0]; 168 dst->f[1] = (float)src->d[1]; 169 dst->f[2] = (float)src->d[2]; 170 dst->f[3] = (float)src->d[3]; 171 } 172 173 static void 174 micro_d2i(union tgsi_exec_channel *dst, 175 const union tgsi_double_channel *src) 176 { 177 dst->i[0] = (int)src->d[0]; 178 dst->i[1] = (int)src->d[1]; 179 dst->i[2] = (int)src->d[2]; 180 dst->i[3] = (int)src->d[3]; 181 } 182 183 static void 184 micro_d2u(union tgsi_exec_channel *dst, 185 const union tgsi_double_channel *src) 186 { 187 dst->u[0] = (unsigned)src->d[0]; 188 dst->u[1] = (unsigned)src->d[1]; 189 dst->u[2] = (unsigned)src->d[2]; 190 dst->u[3] = (unsigned)src->d[3]; 191 } 192 static void 193 micro_dabs(union tgsi_double_channel *dst, 194 const union tgsi_double_channel *src) 195 { 196 dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0]; 197 dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1]; 198 dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2]; 199 dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3]; 200 } 201 202 static void 203 micro_dadd(union tgsi_double_channel *dst, 204 const union tgsi_double_channel *src) 205 { 206 dst->d[0] = src[0].d[0] + src[1].d[0]; 207 dst->d[1] = src[0].d[1] + src[1].d[1]; 208 dst->d[2] = src[0].d[2] + src[1].d[2]; 209 dst->d[3] = src[0].d[3] + src[1].d[3]; 210 } 211 212 static void 213 micro_ddiv(union tgsi_double_channel *dst, 214 const union tgsi_double_channel *src) 215 { 216 dst->d[0] = src[0].d[0] / src[1].d[0]; 217 dst->d[1] = src[0].d[1] / src[1].d[1]; 218 dst->d[2] = src[0].d[2] / src[1].d[2]; 219 dst->d[3] = src[0].d[3] / src[1].d[3]; 220 } 221 222 static void 223 micro_ddx(union tgsi_exec_channel *dst, 224 const union tgsi_exec_channel *src) 225 { 226 dst->f[0] = 227 dst->f[1] = 228 dst->f[2] = 229 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 230 } 231 232 static void 233 micro_ddy(union tgsi_exec_channel *dst, 234 const union tgsi_exec_channel *src) 235 { 236 dst->f[0] = 237 dst->f[1] = 238 dst->f[2] = 239 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 240 } 241 242 static void 243 micro_dmul(union tgsi_double_channel *dst, 244 const union tgsi_double_channel *src) 245 { 246 dst->d[0] = src[0].d[0] * src[1].d[0]; 247 dst->d[1] = src[0].d[1] * src[1].d[1]; 248 dst->d[2] = src[0].d[2] * src[1].d[2]; 249 dst->d[3] = src[0].d[3] * src[1].d[3]; 250 } 251 252 static void 253 micro_dmax(union tgsi_double_channel *dst, 254 const union tgsi_double_channel *src) 255 { 256 dst->d[0] = src[0].d[0] > src[1].d[0] ? src[0].d[0] : src[1].d[0]; 257 dst->d[1] = src[0].d[1] > src[1].d[1] ? src[0].d[1] : src[1].d[1]; 258 dst->d[2] = src[0].d[2] > src[1].d[2] ? src[0].d[2] : src[1].d[2]; 259 dst->d[3] = src[0].d[3] > src[1].d[3] ? src[0].d[3] : src[1].d[3]; 260 } 261 262 static void 263 micro_dmin(union tgsi_double_channel *dst, 264 const union tgsi_double_channel *src) 265 { 266 dst->d[0] = src[0].d[0] < src[1].d[0] ? src[0].d[0] : src[1].d[0]; 267 dst->d[1] = src[0].d[1] < src[1].d[1] ? src[0].d[1] : src[1].d[1]; 268 dst->d[2] = src[0].d[2] < src[1].d[2] ? src[0].d[2] : src[1].d[2]; 269 dst->d[3] = src[0].d[3] < src[1].d[3] ? src[0].d[3] : src[1].d[3]; 270 } 271 272 static void 273 micro_dneg(union tgsi_double_channel *dst, 274 const union tgsi_double_channel *src) 275 { 276 dst->d[0] = -src->d[0]; 277 dst->d[1] = -src->d[1]; 278 dst->d[2] = -src->d[2]; 279 dst->d[3] = -src->d[3]; 280 } 281 282 static void 283 micro_dslt(union tgsi_double_channel *dst, 284 const union tgsi_double_channel *src) 285 { 286 dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U; 287 dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U; 288 dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U; 289 dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U; 290 } 291 292 static void 293 micro_dsne(union tgsi_double_channel *dst, 294 const union tgsi_double_channel *src) 295 { 296 dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U; 297 dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U; 298 dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U; 299 dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U; 300 } 301 302 static void 303 micro_dsge(union tgsi_double_channel *dst, 304 const union tgsi_double_channel *src) 305 { 306 dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U; 307 dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U; 308 dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U; 309 dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U; 310 } 311 312 static void 313 micro_dseq(union tgsi_double_channel *dst, 314 const union tgsi_double_channel *src) 315 { 316 dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U; 317 dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U; 318 dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U; 319 dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U; 320 } 321 322 static void 323 micro_drcp(union tgsi_double_channel *dst, 324 const union tgsi_double_channel *src) 325 { 326 dst->d[0] = 1.0 / src->d[0]; 327 dst->d[1] = 1.0 / src->d[1]; 328 dst->d[2] = 1.0 / src->d[2]; 329 dst->d[3] = 1.0 / src->d[3]; 330 } 331 332 static void 333 micro_dsqrt(union tgsi_double_channel *dst, 334 const union tgsi_double_channel *src) 335 { 336 dst->d[0] = sqrt(src->d[0]); 337 dst->d[1] = sqrt(src->d[1]); 338 dst->d[2] = sqrt(src->d[2]); 339 dst->d[3] = sqrt(src->d[3]); 340 } 341 342 static void 343 micro_drsq(union tgsi_double_channel *dst, 344 const union tgsi_double_channel *src) 345 { 346 dst->d[0] = 1.0 / sqrt(src->d[0]); 347 dst->d[1] = 1.0 / sqrt(src->d[1]); 348 dst->d[2] = 1.0 / sqrt(src->d[2]); 349 dst->d[3] = 1.0 / sqrt(src->d[3]); 350 } 351 352 static void 353 micro_dmad(union tgsi_double_channel *dst, 354 const union tgsi_double_channel *src) 355 { 356 dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0]; 357 dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1]; 358 dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2]; 359 dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3]; 360 } 361 362 static void 363 micro_dfrac(union tgsi_double_channel *dst, 364 const union tgsi_double_channel *src) 365 { 366 dst->d[0] = src->d[0] - floor(src->d[0]); 367 dst->d[1] = src->d[1] - floor(src->d[1]); 368 dst->d[2] = src->d[2] - floor(src->d[2]); 369 dst->d[3] = src->d[3] - floor(src->d[3]); 370 } 371 372 static void 373 micro_dldexp(union tgsi_double_channel *dst, 374 const union tgsi_double_channel *src0, 375 union tgsi_exec_channel *src1) 376 { 377 dst->d[0] = ldexp(src0->d[0], src1->i[0]); 378 dst->d[1] = ldexp(src0->d[1], src1->i[1]); 379 dst->d[2] = ldexp(src0->d[2], src1->i[2]); 380 dst->d[3] = ldexp(src0->d[3], src1->i[3]); 381 } 382 383 static void 384 micro_dfracexp(union tgsi_double_channel *dst, 385 union tgsi_exec_channel *dst_exp, 386 const union tgsi_double_channel *src) 387 { 388 dst->d[0] = frexp(src->d[0], &dst_exp->i[0]); 389 dst->d[1] = frexp(src->d[1], &dst_exp->i[1]); 390 dst->d[2] = frexp(src->d[2], &dst_exp->i[2]); 391 dst->d[3] = frexp(src->d[3], &dst_exp->i[3]); 392 } 393 394 static void 395 micro_exp2(union tgsi_exec_channel *dst, 396 const union tgsi_exec_channel *src) 397 { 398 #if FAST_MATH 399 dst->f[0] = util_fast_exp2(src->f[0]); 400 dst->f[1] = util_fast_exp2(src->f[1]); 401 dst->f[2] = util_fast_exp2(src->f[2]); 402 dst->f[3] = util_fast_exp2(src->f[3]); 403 #else 404 #if DEBUG 405 /* Inf is okay for this instruction, so clamp it to silence assertions. */ 406 uint i; 407 union tgsi_exec_channel clamped; 408 409 for (i = 0; i < 4; i++) { 410 if (src->f[i] > 127.99999f) { 411 clamped.f[i] = 127.99999f; 412 } else if (src->f[i] < -126.99999f) { 413 clamped.f[i] = -126.99999f; 414 } else { 415 clamped.f[i] = src->f[i]; 416 } 417 } 418 src = &clamped; 419 #endif /* DEBUG */ 420 421 dst->f[0] = powf(2.0f, src->f[0]); 422 dst->f[1] = powf(2.0f, src->f[1]); 423 dst->f[2] = powf(2.0f, src->f[2]); 424 dst->f[3] = powf(2.0f, src->f[3]); 425 #endif /* FAST_MATH */ 426 } 427 428 static void 429 micro_f2d(union tgsi_double_channel *dst, 430 const union tgsi_exec_channel *src) 431 { 432 dst->d[0] = (double)src->f[0]; 433 dst->d[1] = (double)src->f[1]; 434 dst->d[2] = (double)src->f[2]; 435 dst->d[3] = (double)src->f[3]; 436 } 437 438 static void 439 micro_flr(union tgsi_exec_channel *dst, 440 const union tgsi_exec_channel *src) 441 { 442 dst->f[0] = floorf(src->f[0]); 443 dst->f[1] = floorf(src->f[1]); 444 dst->f[2] = floorf(src->f[2]); 445 dst->f[3] = floorf(src->f[3]); 446 } 447 448 static void 449 micro_frc(union tgsi_exec_channel *dst, 450 const union tgsi_exec_channel *src) 451 { 452 dst->f[0] = src->f[0] - floorf(src->f[0]); 453 dst->f[1] = src->f[1] - floorf(src->f[1]); 454 dst->f[2] = src->f[2] - floorf(src->f[2]); 455 dst->f[3] = src->f[3] - floorf(src->f[3]); 456 } 457 458 static void 459 micro_i2d(union tgsi_double_channel *dst, 460 const union tgsi_exec_channel *src) 461 { 462 dst->d[0] = (double)src->i[0]; 463 dst->d[1] = (double)src->i[1]; 464 dst->d[2] = (double)src->i[2]; 465 dst->d[3] = (double)src->i[3]; 466 } 467 468 static void 469 micro_iabs(union tgsi_exec_channel *dst, 470 const union tgsi_exec_channel *src) 471 { 472 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; 473 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; 474 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; 475 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; 476 } 477 478 static void 479 micro_ineg(union tgsi_exec_channel *dst, 480 const union tgsi_exec_channel *src) 481 { 482 dst->i[0] = -src->i[0]; 483 dst->i[1] = -src->i[1]; 484 dst->i[2] = -src->i[2]; 485 dst->i[3] = -src->i[3]; 486 } 487 488 static void 489 micro_lg2(union tgsi_exec_channel *dst, 490 const union tgsi_exec_channel *src) 491 { 492 #if FAST_MATH 493 dst->f[0] = util_fast_log2(src->f[0]); 494 dst->f[1] = util_fast_log2(src->f[1]); 495 dst->f[2] = util_fast_log2(src->f[2]); 496 dst->f[3] = util_fast_log2(src->f[3]); 497 #else 498 dst->f[0] = logf(src->f[0]) * 1.442695f; 499 dst->f[1] = logf(src->f[1]) * 1.442695f; 500 dst->f[2] = logf(src->f[2]) * 1.442695f; 501 dst->f[3] = logf(src->f[3]) * 1.442695f; 502 #endif 503 } 504 505 static void 506 micro_lrp(union tgsi_exec_channel *dst, 507 const union tgsi_exec_channel *src0, 508 const union tgsi_exec_channel *src1, 509 const union tgsi_exec_channel *src2) 510 { 511 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0]; 512 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1]; 513 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2]; 514 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3]; 515 } 516 517 static void 518 micro_mad(union tgsi_exec_channel *dst, 519 const union tgsi_exec_channel *src0, 520 const union tgsi_exec_channel *src1, 521 const union tgsi_exec_channel *src2) 522 { 523 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0]; 524 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1]; 525 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2]; 526 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3]; 527 } 528 529 static void 530 micro_mov(union tgsi_exec_channel *dst, 531 const union tgsi_exec_channel *src) 532 { 533 dst->u[0] = src->u[0]; 534 dst->u[1] = src->u[1]; 535 dst->u[2] = src->u[2]; 536 dst->u[3] = src->u[3]; 537 } 538 539 static void 540 micro_rcp(union tgsi_exec_channel *dst, 541 const union tgsi_exec_channel *src) 542 { 543 #if 0 /* for debugging */ 544 assert(src->f[0] != 0.0f); 545 assert(src->f[1] != 0.0f); 546 assert(src->f[2] != 0.0f); 547 assert(src->f[3] != 0.0f); 548 #endif 549 dst->f[0] = 1.0f / src->f[0]; 550 dst->f[1] = 1.0f / src->f[1]; 551 dst->f[2] = 1.0f / src->f[2]; 552 dst->f[3] = 1.0f / src->f[3]; 553 } 554 555 static void 556 micro_rnd(union tgsi_exec_channel *dst, 557 const union tgsi_exec_channel *src) 558 { 559 dst->f[0] = _mesa_roundevenf(src->f[0]); 560 dst->f[1] = _mesa_roundevenf(src->f[1]); 561 dst->f[2] = _mesa_roundevenf(src->f[2]); 562 dst->f[3] = _mesa_roundevenf(src->f[3]); 563 } 564 565 static void 566 micro_rsq(union tgsi_exec_channel *dst, 567 const union tgsi_exec_channel *src) 568 { 569 #if 0 /* for debugging */ 570 assert(src->f[0] != 0.0f); 571 assert(src->f[1] != 0.0f); 572 assert(src->f[2] != 0.0f); 573 assert(src->f[3] != 0.0f); 574 #endif 575 dst->f[0] = 1.0f / sqrtf(src->f[0]); 576 dst->f[1] = 1.0f / sqrtf(src->f[1]); 577 dst->f[2] = 1.0f / sqrtf(src->f[2]); 578 dst->f[3] = 1.0f / sqrtf(src->f[3]); 579 } 580 581 static void 582 micro_sqrt(union tgsi_exec_channel *dst, 583 const union tgsi_exec_channel *src) 584 { 585 dst->f[0] = sqrtf(src->f[0]); 586 dst->f[1] = sqrtf(src->f[1]); 587 dst->f[2] = sqrtf(src->f[2]); 588 dst->f[3] = sqrtf(src->f[3]); 589 } 590 591 static void 592 micro_seq(union tgsi_exec_channel *dst, 593 const union tgsi_exec_channel *src0, 594 const union tgsi_exec_channel *src1) 595 { 596 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f; 597 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f; 598 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f; 599 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f; 600 } 601 602 static void 603 micro_sge(union tgsi_exec_channel *dst, 604 const union tgsi_exec_channel *src0, 605 const union tgsi_exec_channel *src1) 606 { 607 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f; 608 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f; 609 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f; 610 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f; 611 } 612 613 static void 614 micro_sgn(union tgsi_exec_channel *dst, 615 const union tgsi_exec_channel *src) 616 { 617 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 618 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 619 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 620 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 621 } 622 623 static void 624 micro_isgn(union tgsi_exec_channel *dst, 625 const union tgsi_exec_channel *src) 626 { 627 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0; 628 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0; 629 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0; 630 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0; 631 } 632 633 static void 634 micro_sgt(union tgsi_exec_channel *dst, 635 const union tgsi_exec_channel *src0, 636 const union tgsi_exec_channel *src1) 637 { 638 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f; 639 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f; 640 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f; 641 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f; 642 } 643 644 static void 645 micro_sin(union tgsi_exec_channel *dst, 646 const union tgsi_exec_channel *src) 647 { 648 dst->f[0] = sinf(src->f[0]); 649 dst->f[1] = sinf(src->f[1]); 650 dst->f[2] = sinf(src->f[2]); 651 dst->f[3] = sinf(src->f[3]); 652 } 653 654 static void 655 micro_sle(union tgsi_exec_channel *dst, 656 const union tgsi_exec_channel *src0, 657 const union tgsi_exec_channel *src1) 658 { 659 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f; 660 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f; 661 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f; 662 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f; 663 } 664 665 static void 666 micro_slt(union tgsi_exec_channel *dst, 667 const union tgsi_exec_channel *src0, 668 const union tgsi_exec_channel *src1) 669 { 670 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f; 671 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f; 672 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f; 673 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f; 674 } 675 676 static void 677 micro_sne(union tgsi_exec_channel *dst, 678 const union tgsi_exec_channel *src0, 679 const union tgsi_exec_channel *src1) 680 { 681 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f; 682 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f; 683 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f; 684 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f; 685 } 686 687 static void 688 micro_trunc(union tgsi_exec_channel *dst, 689 const union tgsi_exec_channel *src) 690 { 691 dst->f[0] = truncf(src->f[0]); 692 dst->f[1] = truncf(src->f[1]); 693 dst->f[2] = truncf(src->f[2]); 694 dst->f[3] = truncf(src->f[3]); 695 } 696 697 static void 698 micro_u2d(union tgsi_double_channel *dst, 699 const union tgsi_exec_channel *src) 700 { 701 dst->d[0] = (double)src->u[0]; 702 dst->d[1] = (double)src->u[1]; 703 dst->d[2] = (double)src->u[2]; 704 dst->d[3] = (double)src->u[3]; 705 } 706 707 static void 708 micro_i64abs(union tgsi_double_channel *dst, 709 const union tgsi_double_channel *src) 710 { 711 dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0]; 712 dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1]; 713 dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2]; 714 dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3]; 715 } 716 717 static void 718 micro_i64sgn(union tgsi_double_channel *dst, 719 const union tgsi_double_channel *src) 720 { 721 dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0; 722 dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0; 723 dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0; 724 dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0; 725 } 726 727 static void 728 micro_i64neg(union tgsi_double_channel *dst, 729 const union tgsi_double_channel *src) 730 { 731 dst->i64[0] = -src->i64[0]; 732 dst->i64[1] = -src->i64[1]; 733 dst->i64[2] = -src->i64[2]; 734 dst->i64[3] = -src->i64[3]; 735 } 736 737 static void 738 micro_u64seq(union tgsi_double_channel *dst, 739 const union tgsi_double_channel *src) 740 { 741 dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U; 742 dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U; 743 dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U; 744 dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U; 745 } 746 747 static void 748 micro_u64sne(union tgsi_double_channel *dst, 749 const union tgsi_double_channel *src) 750 { 751 dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U; 752 dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U; 753 dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U; 754 dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U; 755 } 756 757 static void 758 micro_i64slt(union tgsi_double_channel *dst, 759 const union tgsi_double_channel *src) 760 { 761 dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U; 762 dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U; 763 dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U; 764 dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U; 765 } 766 767 static void 768 micro_u64slt(union tgsi_double_channel *dst, 769 const union tgsi_double_channel *src) 770 { 771 dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U; 772 dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U; 773 dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U; 774 dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U; 775 } 776 777 static void 778 micro_i64sge(union tgsi_double_channel *dst, 779 const union tgsi_double_channel *src) 780 { 781 dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U; 782 dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U; 783 dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U; 784 dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U; 785 } 786 787 static void 788 micro_u64sge(union tgsi_double_channel *dst, 789 const union tgsi_double_channel *src) 790 { 791 dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U; 792 dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U; 793 dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U; 794 dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U; 795 } 796 797 static void 798 micro_u64max(union tgsi_double_channel *dst, 799 const union tgsi_double_channel *src) 800 { 801 dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; 802 dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; 803 dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; 804 dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; 805 } 806 807 static void 808 micro_i64max(union tgsi_double_channel *dst, 809 const union tgsi_double_channel *src) 810 { 811 dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; 812 dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; 813 dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; 814 dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; 815 } 816 817 static void 818 micro_u64min(union tgsi_double_channel *dst, 819 const union tgsi_double_channel *src) 820 { 821 dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; 822 dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; 823 dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; 824 dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; 825 } 826 827 static void 828 micro_i64min(union tgsi_double_channel *dst, 829 const union tgsi_double_channel *src) 830 { 831 dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; 832 dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; 833 dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; 834 dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; 835 } 836 837 static void 838 micro_u64add(union tgsi_double_channel *dst, 839 const union tgsi_double_channel *src) 840 { 841 dst->u64[0] = src[0].u64[0] + src[1].u64[0]; 842 dst->u64[1] = src[0].u64[1] + src[1].u64[1]; 843 dst->u64[2] = src[0].u64[2] + src[1].u64[2]; 844 dst->u64[3] = src[0].u64[3] + src[1].u64[3]; 845 } 846 847 static void 848 micro_u64mul(union tgsi_double_channel *dst, 849 const union tgsi_double_channel *src) 850 { 851 dst->u64[0] = src[0].u64[0] * src[1].u64[0]; 852 dst->u64[1] = src[0].u64[1] * src[1].u64[1]; 853 dst->u64[2] = src[0].u64[2] * src[1].u64[2]; 854 dst->u64[3] = src[0].u64[3] * src[1].u64[3]; 855 } 856 857 static void 858 micro_u64div(union tgsi_double_channel *dst, 859 const union tgsi_double_channel *src) 860 { 861 dst->u64[0] = src[0].u64[0] / src[1].u64[0]; 862 dst->u64[1] = src[0].u64[1] / src[1].u64[1]; 863 dst->u64[2] = src[0].u64[2] / src[1].u64[2]; 864 dst->u64[3] = src[0].u64[3] / src[1].u64[3]; 865 } 866 867 static void 868 micro_i64div(union tgsi_double_channel *dst, 869 const union tgsi_double_channel *src) 870 { 871 dst->i64[0] = src[0].i64[0] / src[1].i64[0]; 872 dst->i64[1] = src[0].i64[1] / src[1].i64[1]; 873 dst->i64[2] = src[0].i64[2] / src[1].i64[2]; 874 dst->i64[3] = src[0].i64[3] / src[1].i64[3]; 875 } 876 877 static void 878 micro_u64mod(union tgsi_double_channel *dst, 879 const union tgsi_double_channel *src) 880 { 881 dst->u64[0] = src[0].u64[0] % src[1].u64[0]; 882 dst->u64[1] = src[0].u64[1] % src[1].u64[1]; 883 dst->u64[2] = src[0].u64[2] % src[1].u64[2]; 884 dst->u64[3] = src[0].u64[3] % src[1].u64[3]; 885 } 886 887 static void 888 micro_i64mod(union tgsi_double_channel *dst, 889 const union tgsi_double_channel *src) 890 { 891 dst->i64[0] = src[0].i64[0] % src[1].i64[0]; 892 dst->i64[1] = src[0].i64[1] % src[1].i64[1]; 893 dst->i64[2] = src[0].i64[2] % src[1].i64[2]; 894 dst->i64[3] = src[0].i64[3] % src[1].i64[3]; 895 } 896 897 static void 898 micro_u64shl(union tgsi_double_channel *dst, 899 const union tgsi_double_channel *src0, 900 union tgsi_exec_channel *src1) 901 { 902 unsigned masked_count; 903 masked_count = src1->u[0] & 0x3f; 904 dst->u64[0] = src0->u64[0] << masked_count; 905 masked_count = src1->u[1] & 0x3f; 906 dst->u64[1] = src0->u64[1] << masked_count; 907 masked_count = src1->u[2] & 0x3f; 908 dst->u64[2] = src0->u64[2] << masked_count; 909 masked_count = src1->u[3] & 0x3f; 910 dst->u64[3] = src0->u64[3] << masked_count; 911 } 912 913 static void 914 micro_i64shr(union tgsi_double_channel *dst, 915 const union tgsi_double_channel *src0, 916 union tgsi_exec_channel *src1) 917 { 918 unsigned masked_count; 919 masked_count = src1->u[0] & 0x3f; 920 dst->i64[0] = src0->i64[0] >> masked_count; 921 masked_count = src1->u[1] & 0x3f; 922 dst->i64[1] = src0->i64[1] >> masked_count; 923 masked_count = src1->u[2] & 0x3f; 924 dst->i64[2] = src0->i64[2] >> masked_count; 925 masked_count = src1->u[3] & 0x3f; 926 dst->i64[3] = src0->i64[3] >> masked_count; 927 } 928 929 static void 930 micro_u64shr(union tgsi_double_channel *dst, 931 const union tgsi_double_channel *src0, 932 union tgsi_exec_channel *src1) 933 { 934 unsigned masked_count; 935 masked_count = src1->u[0] & 0x3f; 936 dst->u64[0] = src0->u64[0] >> masked_count; 937 masked_count = src1->u[1] & 0x3f; 938 dst->u64[1] = src0->u64[1] >> masked_count; 939 masked_count = src1->u[2] & 0x3f; 940 dst->u64[2] = src0->u64[2] >> masked_count; 941 masked_count = src1->u[3] & 0x3f; 942 dst->u64[3] = src0->u64[3] >> masked_count; 943 } 944 945 enum tgsi_exec_datatype { 946 TGSI_EXEC_DATA_FLOAT, 947 TGSI_EXEC_DATA_INT, 948 TGSI_EXEC_DATA_UINT, 949 TGSI_EXEC_DATA_DOUBLE, 950 TGSI_EXEC_DATA_INT64, 951 TGSI_EXEC_DATA_UINT64, 952 }; 953 954 /* 955 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 956 */ 957 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 958 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 959 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 960 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 961 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 962 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 963 964 965 /** The execution mask depends on the conditional mask and the loop mask */ 966 #define UPDATE_EXEC_MASK(MACH) \ 967 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask 968 969 970 static const union tgsi_exec_channel ZeroVec = 971 { { 0.0, 0.0, 0.0, 0.0 } }; 972 973 static const union tgsi_exec_channel OneVec = { 974 {1.0f, 1.0f, 1.0f, 1.0f} 975 }; 976 977 static const union tgsi_exec_channel P128Vec = { 978 {128.0f, 128.0f, 128.0f, 128.0f} 979 }; 980 981 static const union tgsi_exec_channel M128Vec = { 982 {-128.0f, -128.0f, -128.0f, -128.0f} 983 }; 984 985 986 /** 987 * Assert that none of the float values in 'chan' are infinite or NaN. 988 * NaN and Inf may occur normally during program execution and should 989 * not lead to crashes, etc. But when debugging, it's helpful to catch 990 * them. 991 */ 992 static inline void 993 check_inf_or_nan(const union tgsi_exec_channel *chan) 994 { 995 assert(!util_is_inf_or_nan((chan)->f[0])); 996 assert(!util_is_inf_or_nan((chan)->f[1])); 997 assert(!util_is_inf_or_nan((chan)->f[2])); 998 assert(!util_is_inf_or_nan((chan)->f[3])); 999 } 1000 1001 1002 #ifdef DEBUG 1003 static void 1004 print_chan(const char *msg, const union tgsi_exec_channel *chan) 1005 { 1006 debug_printf("%s = {%f, %f, %f, %f}\n", 1007 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 1008 } 1009 #endif 1010 1011 1012 #ifdef DEBUG 1013 static void 1014 print_temp(const struct tgsi_exec_machine *mach, uint index) 1015 { 1016 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 1017 int i; 1018 debug_printf("Temp[%u] =\n", index); 1019 for (i = 0; i < 4; i++) { 1020 debug_printf(" %c: { %f, %f, %f, %f }\n", 1021 "XYZW"[i], 1022 tmp->xyzw[i].f[0], 1023 tmp->xyzw[i].f[1], 1024 tmp->xyzw[i].f[2], 1025 tmp->xyzw[i].f[3]); 1026 } 1027 } 1028 #endif 1029 1030 1031 void 1032 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, 1033 unsigned num_bufs, 1034 const void **bufs, 1035 const unsigned *buf_sizes) 1036 { 1037 unsigned i; 1038 1039 for (i = 0; i < num_bufs; i++) { 1040 mach->Consts[i] = bufs[i]; 1041 mach->ConstsSize[i] = buf_sizes[i]; 1042 } 1043 } 1044 1045 1046 /** 1047 * Check if there's a potential src/dst register data dependency when 1048 * using SOA execution. 1049 * Example: 1050 * MOV T, T.yxwz; 1051 * This would expand into: 1052 * MOV t0, t1; 1053 * MOV t1, t0; 1054 * MOV t2, t3; 1055 * MOV t3, t2; 1056 * The second instruction will have the wrong value for t0 if executed as-is. 1057 */ 1058 boolean 1059 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 1060 { 1061 uint i, chan; 1062 1063 uint writemask = inst->Dst[0].Register.WriteMask; 1064 if (writemask == TGSI_WRITEMASK_X || 1065 writemask == TGSI_WRITEMASK_Y || 1066 writemask == TGSI_WRITEMASK_Z || 1067 writemask == TGSI_WRITEMASK_W || 1068 writemask == TGSI_WRITEMASK_NONE) { 1069 /* no chance of data dependency */ 1070 return FALSE; 1071 } 1072 1073 /* loop over src regs */ 1074 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1075 if ((inst->Src[i].Register.File == 1076 inst->Dst[0].Register.File) && 1077 ((inst->Src[i].Register.Index == 1078 inst->Dst[0].Register.Index) || 1079 inst->Src[i].Register.Indirect || 1080 inst->Dst[0].Register.Indirect)) { 1081 /* loop over dest channels */ 1082 uint channelsWritten = 0x0; 1083 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 1084 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1085 /* check if we're reading a channel that's been written */ 1086 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); 1087 if (channelsWritten & (1 << swizzle)) { 1088 return TRUE; 1089 } 1090 1091 channelsWritten |= (1 << chan); 1092 } 1093 } 1094 } 1095 } 1096 return FALSE; 1097 } 1098 1099 1100 /** 1101 * Initialize machine state by expanding tokens to full instructions, 1102 * allocating temporary storage, setting up constants, etc. 1103 * After this, we can call tgsi_exec_machine_run() many times. 1104 */ 1105 void 1106 tgsi_exec_machine_bind_shader( 1107 struct tgsi_exec_machine *mach, 1108 const struct tgsi_token *tokens, 1109 struct tgsi_sampler *sampler, 1110 struct tgsi_image *image, 1111 struct tgsi_buffer *buffer) 1112 { 1113 uint k; 1114 struct tgsi_parse_context parse; 1115 struct tgsi_full_instruction *instructions; 1116 struct tgsi_full_declaration *declarations; 1117 uint maxInstructions = 10, numInstructions = 0; 1118 uint maxDeclarations = 10, numDeclarations = 0; 1119 1120 #if 0 1121 tgsi_dump(tokens, 0); 1122 #endif 1123 1124 util_init_math(); 1125 1126 1127 mach->Tokens = tokens; 1128 mach->Sampler = sampler; 1129 mach->Image = image; 1130 mach->Buffer = buffer; 1131 1132 if (!tokens) { 1133 /* unbind and free all */ 1134 FREE(mach->Declarations); 1135 mach->Declarations = NULL; 1136 mach->NumDeclarations = 0; 1137 1138 FREE(mach->Instructions); 1139 mach->Instructions = NULL; 1140 mach->NumInstructions = 0; 1141 1142 return; 1143 } 1144 1145 k = tgsi_parse_init (&parse, mach->Tokens); 1146 if (k != TGSI_PARSE_OK) { 1147 debug_printf( "Problem parsing!\n" ); 1148 return; 1149 } 1150 1151 mach->ImmLimit = 0; 1152 mach->NumOutputs = 0; 1153 1154 for (k = 0; k < TGSI_SEMANTIC_COUNT; k++) 1155 mach->SysSemanticToIndex[k] = -1; 1156 1157 if (mach->ShaderType == PIPE_SHADER_GEOMETRY && 1158 !mach->UsedGeometryShader) { 1159 struct tgsi_exec_vector *inputs; 1160 struct tgsi_exec_vector *outputs; 1161 1162 inputs = align_malloc(sizeof(struct tgsi_exec_vector) * 1163 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS, 1164 16); 1165 1166 if (!inputs) 1167 return; 1168 1169 outputs = align_malloc(sizeof(struct tgsi_exec_vector) * 1170 TGSI_MAX_TOTAL_VERTICES, 16); 1171 1172 if (!outputs) { 1173 align_free(inputs); 1174 return; 1175 } 1176 1177 align_free(mach->Inputs); 1178 align_free(mach->Outputs); 1179 1180 mach->Inputs = inputs; 1181 mach->Outputs = outputs; 1182 mach->UsedGeometryShader = TRUE; 1183 } 1184 1185 declarations = (struct tgsi_full_declaration *) 1186 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 1187 1188 if (!declarations) { 1189 return; 1190 } 1191 1192 instructions = (struct tgsi_full_instruction *) 1193 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 1194 1195 if (!instructions) { 1196 FREE( declarations ); 1197 return; 1198 } 1199 1200 while( !tgsi_parse_end_of_tokens( &parse ) ) { 1201 uint i; 1202 1203 tgsi_parse_token( &parse ); 1204 switch( parse.FullToken.Token.Type ) { 1205 case TGSI_TOKEN_TYPE_DECLARATION: 1206 /* save expanded declaration */ 1207 if (numDeclarations == maxDeclarations) { 1208 declarations = REALLOC(declarations, 1209 maxDeclarations 1210 * sizeof(struct tgsi_full_declaration), 1211 (maxDeclarations + 10) 1212 * sizeof(struct tgsi_full_declaration)); 1213 maxDeclarations += 10; 1214 } 1215 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { 1216 unsigned reg; 1217 for (reg = parse.FullToken.FullDeclaration.Range.First; 1218 reg <= parse.FullToken.FullDeclaration.Range.Last; 1219 ++reg) { 1220 ++mach->NumOutputs; 1221 } 1222 } 1223 else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1224 const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; 1225 mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First; 1226 } 1227 1228 memcpy(declarations + numDeclarations, 1229 &parse.FullToken.FullDeclaration, 1230 sizeof(declarations[0])); 1231 numDeclarations++; 1232 break; 1233 1234 case TGSI_TOKEN_TYPE_IMMEDIATE: 1235 { 1236 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1237 assert( size <= 4 ); 1238 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); 1239 1240 for( i = 0; i < size; i++ ) { 1241 mach->Imms[mach->ImmLimit][i] = 1242 parse.FullToken.FullImmediate.u[i].Float; 1243 } 1244 mach->ImmLimit += 1; 1245 } 1246 break; 1247 1248 case TGSI_TOKEN_TYPE_INSTRUCTION: 1249 1250 /* save expanded instruction */ 1251 if (numInstructions == maxInstructions) { 1252 instructions = REALLOC(instructions, 1253 maxInstructions 1254 * sizeof(struct tgsi_full_instruction), 1255 (maxInstructions + 10) 1256 * sizeof(struct tgsi_full_instruction)); 1257 maxInstructions += 10; 1258 } 1259 1260 memcpy(instructions + numInstructions, 1261 &parse.FullToken.FullInstruction, 1262 sizeof(instructions[0])); 1263 1264 numInstructions++; 1265 break; 1266 1267 case TGSI_TOKEN_TYPE_PROPERTY: 1268 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { 1269 if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) { 1270 mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data; 1271 } 1272 } 1273 break; 1274 1275 default: 1276 assert( 0 ); 1277 } 1278 } 1279 tgsi_parse_free (&parse); 1280 1281 FREE(mach->Declarations); 1282 mach->Declarations = declarations; 1283 mach->NumDeclarations = numDeclarations; 1284 1285 FREE(mach->Instructions); 1286 mach->Instructions = instructions; 1287 mach->NumInstructions = numInstructions; 1288 } 1289 1290 1291 struct tgsi_exec_machine * 1292 tgsi_exec_machine_create(enum pipe_shader_type shader_type) 1293 { 1294 struct tgsi_exec_machine *mach; 1295 uint i; 1296 1297 mach = align_malloc( sizeof *mach, 16 ); 1298 if (!mach) 1299 goto fail; 1300 1301 memset(mach, 0, sizeof(*mach)); 1302 1303 mach->ShaderType = shader_type; 1304 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 1305 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; 1306 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; 1307 1308 if (shader_type != PIPE_SHADER_COMPUTE) { 1309 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16); 1310 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16); 1311 if (!mach->Inputs || !mach->Outputs) 1312 goto fail; 1313 } 1314 1315 /* Setup constants needed by the SSE2 executor. */ 1316 for( i = 0; i < 4; i++ ) { 1317 mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000; 1318 mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF; 1319 mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000; 1320 mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */ 1321 mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f; 1322 mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */ 1323 mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f; 1324 mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f; 1325 mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f; 1326 mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f; 1327 } 1328 1329 #ifdef DEBUG 1330 /* silence warnings */ 1331 (void) print_chan; 1332 (void) print_temp; 1333 #endif 1334 1335 return mach; 1336 1337 fail: 1338 if (mach) { 1339 align_free(mach->Inputs); 1340 align_free(mach->Outputs); 1341 align_free(mach); 1342 } 1343 return NULL; 1344 } 1345 1346 1347 void 1348 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 1349 { 1350 if (mach) { 1351 FREE(mach->Instructions); 1352 FREE(mach->Declarations); 1353 1354 align_free(mach->Inputs); 1355 align_free(mach->Outputs); 1356 1357 align_free(mach); 1358 } 1359 } 1360 1361 static void 1362 micro_add(union tgsi_exec_channel *dst, 1363 const union tgsi_exec_channel *src0, 1364 const union tgsi_exec_channel *src1) 1365 { 1366 dst->f[0] = src0->f[0] + src1->f[0]; 1367 dst->f[1] = src0->f[1] + src1->f[1]; 1368 dst->f[2] = src0->f[2] + src1->f[2]; 1369 dst->f[3] = src0->f[3] + src1->f[3]; 1370 } 1371 1372 static void 1373 micro_div( 1374 union tgsi_exec_channel *dst, 1375 const union tgsi_exec_channel *src0, 1376 const union tgsi_exec_channel *src1 ) 1377 { 1378 if (src1->f[0] != 0) { 1379 dst->f[0] = src0->f[0] / src1->f[0]; 1380 } 1381 if (src1->f[1] != 0) { 1382 dst->f[1] = src0->f[1] / src1->f[1]; 1383 } 1384 if (src1->f[2] != 0) { 1385 dst->f[2] = src0->f[2] / src1->f[2]; 1386 } 1387 if (src1->f[3] != 0) { 1388 dst->f[3] = src0->f[3] / src1->f[3]; 1389 } 1390 } 1391 1392 static void 1393 micro_lt( 1394 union tgsi_exec_channel *dst, 1395 const union tgsi_exec_channel *src0, 1396 const union tgsi_exec_channel *src1, 1397 const union tgsi_exec_channel *src2, 1398 const union tgsi_exec_channel *src3 ) 1399 { 1400 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 1401 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 1402 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 1403 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 1404 } 1405 1406 static void 1407 micro_max(union tgsi_exec_channel *dst, 1408 const union tgsi_exec_channel *src0, 1409 const union tgsi_exec_channel *src1) 1410 { 1411 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 1412 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 1413 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 1414 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 1415 } 1416 1417 static void 1418 micro_min(union tgsi_exec_channel *dst, 1419 const union tgsi_exec_channel *src0, 1420 const union tgsi_exec_channel *src1) 1421 { 1422 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 1423 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 1424 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 1425 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 1426 } 1427 1428 static void 1429 micro_mul(union tgsi_exec_channel *dst, 1430 const union tgsi_exec_channel *src0, 1431 const union tgsi_exec_channel *src1) 1432 { 1433 dst->f[0] = src0->f[0] * src1->f[0]; 1434 dst->f[1] = src0->f[1] * src1->f[1]; 1435 dst->f[2] = src0->f[2] * src1->f[2]; 1436 dst->f[3] = src0->f[3] * src1->f[3]; 1437 } 1438 1439 static void 1440 micro_neg( 1441 union tgsi_exec_channel *dst, 1442 const union tgsi_exec_channel *src ) 1443 { 1444 dst->f[0] = -src->f[0]; 1445 dst->f[1] = -src->f[1]; 1446 dst->f[2] = -src->f[2]; 1447 dst->f[3] = -src->f[3]; 1448 } 1449 1450 static void 1451 micro_pow( 1452 union tgsi_exec_channel *dst, 1453 const union tgsi_exec_channel *src0, 1454 const union tgsi_exec_channel *src1 ) 1455 { 1456 #if FAST_MATH 1457 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 1458 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 1459 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 1460 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 1461 #else 1462 dst->f[0] = powf( src0->f[0], src1->f[0] ); 1463 dst->f[1] = powf( src0->f[1], src1->f[1] ); 1464 dst->f[2] = powf( src0->f[2], src1->f[2] ); 1465 dst->f[3] = powf( src0->f[3], src1->f[3] ); 1466 #endif 1467 } 1468 1469 static void 1470 micro_sub(union tgsi_exec_channel *dst, 1471 const union tgsi_exec_channel *src0, 1472 const union tgsi_exec_channel *src1) 1473 { 1474 dst->f[0] = src0->f[0] - src1->f[0]; 1475 dst->f[1] = src0->f[1] - src1->f[1]; 1476 dst->f[2] = src0->f[2] - src1->f[2]; 1477 dst->f[3] = src0->f[3] - src1->f[3]; 1478 } 1479 1480 static void 1481 fetch_src_file_channel(const struct tgsi_exec_machine *mach, 1482 const uint chan_index, 1483 const uint file, 1484 const uint swizzle, 1485 const union tgsi_exec_channel *index, 1486 const union tgsi_exec_channel *index2D, 1487 union tgsi_exec_channel *chan) 1488 { 1489 uint i; 1490 1491 assert(swizzle < 4); 1492 1493 switch (file) { 1494 case TGSI_FILE_CONSTANT: 1495 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1496 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS); 1497 assert(mach->Consts[index2D->i[i]]); 1498 1499 if (index->i[i] < 0) { 1500 chan->u[i] = 0; 1501 } else { 1502 /* NOTE: copying the const value as a uint instead of float */ 1503 const uint constbuf = index2D->i[i]; 1504 const uint *buf = (const uint *)mach->Consts[constbuf]; 1505 const int pos = index->i[i] * 4 + swizzle; 1506 /* const buffer bounds check */ 1507 if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) { 1508 if (0) { 1509 /* Debug: print warning */ 1510 static int count = 0; 1511 if (count++ < 100) 1512 debug_printf("TGSI Exec: const buffer index %d" 1513 " out of bounds\n", pos); 1514 } 1515 chan->u[i] = 0; 1516 } 1517 else 1518 chan->u[i] = buf[pos]; 1519 } 1520 } 1521 break; 1522 1523 case TGSI_FILE_INPUT: 1524 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1525 /* 1526 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 1527 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", 1528 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], 1529 index2D->i[i], index->i[i]); 1530 }*/ 1531 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; 1532 assert(pos >= 0); 1533 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); 1534 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; 1535 } 1536 break; 1537 1538 case TGSI_FILE_SYSTEM_VALUE: 1539 /* XXX no swizzling at this point. Will be needed if we put 1540 * gl_FragCoord, for example, in a sys value register. 1541 */ 1542 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1543 chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i]; 1544 } 1545 break; 1546 1547 case TGSI_FILE_TEMPORARY: 1548 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1549 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); 1550 assert(index2D->i[i] == 0); 1551 1552 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; 1553 } 1554 break; 1555 1556 case TGSI_FILE_IMMEDIATE: 1557 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1558 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); 1559 assert(index2D->i[i] == 0); 1560 1561 chan->f[i] = mach->Imms[index->i[i]][swizzle]; 1562 } 1563 break; 1564 1565 case TGSI_FILE_ADDRESS: 1566 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1567 assert(index->i[i] >= 0); 1568 assert(index2D->i[i] == 0); 1569 1570 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; 1571 } 1572 break; 1573 1574 case TGSI_FILE_PREDICATE: 1575 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1576 assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS); 1577 assert(index2D->i[i] == 0); 1578 1579 chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i]; 1580 } 1581 break; 1582 1583 case TGSI_FILE_OUTPUT: 1584 /* vertex/fragment output vars can be read too */ 1585 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1586 assert(index->i[i] >= 0); 1587 assert(index2D->i[i] == 0); 1588 1589 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; 1590 } 1591 break; 1592 1593 default: 1594 assert(0); 1595 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1596 chan->u[i] = 0; 1597 } 1598 } 1599 } 1600 1601 static void 1602 fetch_source_d(const struct tgsi_exec_machine *mach, 1603 union tgsi_exec_channel *chan, 1604 const struct tgsi_full_src_register *reg, 1605 const uint chan_index, 1606 enum tgsi_exec_datatype src_datatype) 1607 { 1608 union tgsi_exec_channel index; 1609 union tgsi_exec_channel index2D; 1610 uint swizzle; 1611 1612 /* We start with a direct index into a register file. 1613 * 1614 * file[1], 1615 * where: 1616 * file = Register.File 1617 * [1] = Register.Index 1618 */ 1619 index.i[0] = 1620 index.i[1] = 1621 index.i[2] = 1622 index.i[3] = reg->Register.Index; 1623 1624 /* There is an extra source register that indirectly subscripts 1625 * a register file. The direct index now becomes an offset 1626 * that is being added to the indirect register. 1627 * 1628 * file[ind[2].x+1], 1629 * where: 1630 * ind = Indirect.File 1631 * [2] = Indirect.Index 1632 * .x = Indirect.SwizzleX 1633 */ 1634 if (reg->Register.Indirect) { 1635 union tgsi_exec_channel index2; 1636 union tgsi_exec_channel indir_index; 1637 const uint execmask = mach->ExecMask; 1638 uint i; 1639 1640 /* which address register (always zero now) */ 1641 index2.i[0] = 1642 index2.i[1] = 1643 index2.i[2] = 1644 index2.i[3] = reg->Indirect.Index; 1645 /* get current value of address register[swizzle] */ 1646 swizzle = reg->Indirect.Swizzle; 1647 fetch_src_file_channel(mach, 1648 chan_index, 1649 reg->Indirect.File, 1650 swizzle, 1651 &index2, 1652 &ZeroVec, 1653 &indir_index); 1654 1655 /* add value of address register to the offset */ 1656 index.i[0] += indir_index.i[0]; 1657 index.i[1] += indir_index.i[1]; 1658 index.i[2] += indir_index.i[2]; 1659 index.i[3] += indir_index.i[3]; 1660 1661 /* for disabled execution channels, zero-out the index to 1662 * avoid using a potential garbage value. 1663 */ 1664 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1665 if ((execmask & (1 << i)) == 0) 1666 index.i[i] = 0; 1667 } 1668 } 1669 1670 /* There is an extra source register that is a second 1671 * subscript to a register file. Effectively it means that 1672 * the register file is actually a 2D array of registers. 1673 * 1674 * file[3][1], 1675 * where: 1676 * [3] = Dimension.Index 1677 */ 1678 if (reg->Register.Dimension) { 1679 index2D.i[0] = 1680 index2D.i[1] = 1681 index2D.i[2] = 1682 index2D.i[3] = reg->Dimension.Index; 1683 1684 /* Again, the second subscript index can be addressed indirectly 1685 * identically to the first one. 1686 * Nothing stops us from indirectly addressing the indirect register, 1687 * but there is no need for that, so we won't exercise it. 1688 * 1689 * file[ind[4].y+3][1], 1690 * where: 1691 * ind = DimIndirect.File 1692 * [4] = DimIndirect.Index 1693 * .y = DimIndirect.SwizzleX 1694 */ 1695 if (reg->Dimension.Indirect) { 1696 union tgsi_exec_channel index2; 1697 union tgsi_exec_channel indir_index; 1698 const uint execmask = mach->ExecMask; 1699 uint i; 1700 1701 index2.i[0] = 1702 index2.i[1] = 1703 index2.i[2] = 1704 index2.i[3] = reg->DimIndirect.Index; 1705 1706 swizzle = reg->DimIndirect.Swizzle; 1707 fetch_src_file_channel(mach, 1708 chan_index, 1709 reg->DimIndirect.File, 1710 swizzle, 1711 &index2, 1712 &ZeroVec, 1713 &indir_index); 1714 1715 index2D.i[0] += indir_index.i[0]; 1716 index2D.i[1] += indir_index.i[1]; 1717 index2D.i[2] += indir_index.i[2]; 1718 index2D.i[3] += indir_index.i[3]; 1719 1720 /* for disabled execution channels, zero-out the index to 1721 * avoid using a potential garbage value. 1722 */ 1723 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1724 if ((execmask & (1 << i)) == 0) { 1725 index2D.i[i] = 0; 1726 } 1727 } 1728 } 1729 1730 /* If by any chance there was a need for a 3D array of register 1731 * files, we would have to check whether Dimension is followed 1732 * by a dimension register and continue the saga. 1733 */ 1734 } else { 1735 index2D.i[0] = 1736 index2D.i[1] = 1737 index2D.i[2] = 1738 index2D.i[3] = 0; 1739 } 1740 1741 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1742 fetch_src_file_channel(mach, 1743 chan_index, 1744 reg->Register.File, 1745 swizzle, 1746 &index, 1747 &index2D, 1748 chan); 1749 } 1750 1751 static void 1752 fetch_source(const struct tgsi_exec_machine *mach, 1753 union tgsi_exec_channel *chan, 1754 const struct tgsi_full_src_register *reg, 1755 const uint chan_index, 1756 enum tgsi_exec_datatype src_datatype) 1757 { 1758 fetch_source_d(mach, chan, reg, chan_index, src_datatype); 1759 1760 if (reg->Register.Absolute) { 1761 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1762 micro_abs(chan, chan); 1763 } else { 1764 micro_iabs(chan, chan); 1765 } 1766 } 1767 1768 if (reg->Register.Negate) { 1769 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1770 micro_neg(chan, chan); 1771 } else { 1772 micro_ineg(chan, chan); 1773 } 1774 } 1775 } 1776 1777 static union tgsi_exec_channel * 1778 store_dest_dstret(struct tgsi_exec_machine *mach, 1779 const union tgsi_exec_channel *chan, 1780 const struct tgsi_full_dst_register *reg, 1781 const struct tgsi_full_instruction *inst, 1782 uint chan_index, 1783 enum tgsi_exec_datatype dst_datatype) 1784 { 1785 uint i; 1786 static union tgsi_exec_channel null; 1787 union tgsi_exec_channel *dst; 1788 union tgsi_exec_channel index2D; 1789 uint execmask = mach->ExecMask; 1790 int offset = 0; /* indirection offset */ 1791 int index; 1792 1793 /* for debugging */ 1794 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) { 1795 check_inf_or_nan(chan); 1796 } 1797 1798 /* There is an extra source register that indirectly subscripts 1799 * a register file. The direct index now becomes an offset 1800 * that is being added to the indirect register. 1801 * 1802 * file[ind[2].x+1], 1803 * where: 1804 * ind = Indirect.File 1805 * [2] = Indirect.Index 1806 * .x = Indirect.SwizzleX 1807 */ 1808 if (reg->Register.Indirect) { 1809 union tgsi_exec_channel index; 1810 union tgsi_exec_channel indir_index; 1811 uint swizzle; 1812 1813 /* which address register (always zero for now) */ 1814 index.i[0] = 1815 index.i[1] = 1816 index.i[2] = 1817 index.i[3] = reg->Indirect.Index; 1818 1819 /* get current value of address register[swizzle] */ 1820 swizzle = reg->Indirect.Swizzle; 1821 1822 /* fetch values from the address/indirection register */ 1823 fetch_src_file_channel(mach, 1824 chan_index, 1825 reg->Indirect.File, 1826 swizzle, 1827 &index, 1828 &ZeroVec, 1829 &indir_index); 1830 1831 /* save indirection offset */ 1832 offset = indir_index.i[0]; 1833 } 1834 1835 /* There is an extra source register that is a second 1836 * subscript to a register file. Effectively it means that 1837 * the register file is actually a 2D array of registers. 1838 * 1839 * file[3][1], 1840 * where: 1841 * [3] = Dimension.Index 1842 */ 1843 if (reg->Register.Dimension) { 1844 index2D.i[0] = 1845 index2D.i[1] = 1846 index2D.i[2] = 1847 index2D.i[3] = reg->Dimension.Index; 1848 1849 /* Again, the second subscript index can be addressed indirectly 1850 * identically to the first one. 1851 * Nothing stops us from indirectly addressing the indirect register, 1852 * but there is no need for that, so we won't exercise it. 1853 * 1854 * file[ind[4].y+3][1], 1855 * where: 1856 * ind = DimIndirect.File 1857 * [4] = DimIndirect.Index 1858 * .y = DimIndirect.SwizzleX 1859 */ 1860 if (reg->Dimension.Indirect) { 1861 union tgsi_exec_channel index2; 1862 union tgsi_exec_channel indir_index; 1863 const uint execmask = mach->ExecMask; 1864 unsigned swizzle; 1865 uint i; 1866 1867 index2.i[0] = 1868 index2.i[1] = 1869 index2.i[2] = 1870 index2.i[3] = reg->DimIndirect.Index; 1871 1872 swizzle = reg->DimIndirect.Swizzle; 1873 fetch_src_file_channel(mach, 1874 chan_index, 1875 reg->DimIndirect.File, 1876 swizzle, 1877 &index2, 1878 &ZeroVec, 1879 &indir_index); 1880 1881 index2D.i[0] += indir_index.i[0]; 1882 index2D.i[1] += indir_index.i[1]; 1883 index2D.i[2] += indir_index.i[2]; 1884 index2D.i[3] += indir_index.i[3]; 1885 1886 /* for disabled execution channels, zero-out the index to 1887 * avoid using a potential garbage value. 1888 */ 1889 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1890 if ((execmask & (1 << i)) == 0) { 1891 index2D.i[i] = 0; 1892 } 1893 } 1894 } 1895 1896 /* If by any chance there was a need for a 3D array of register 1897 * files, we would have to check whether Dimension is followed 1898 * by a dimension register and continue the saga. 1899 */ 1900 } else { 1901 index2D.i[0] = 1902 index2D.i[1] = 1903 index2D.i[2] = 1904 index2D.i[3] = 0; 1905 } 1906 1907 switch (reg->Register.File) { 1908 case TGSI_FILE_NULL: 1909 dst = &null; 1910 break; 1911 1912 case TGSI_FILE_OUTPUT: 1913 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1914 + reg->Register.Index; 1915 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1916 #if 0 1917 debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n", 1918 mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0], 1919 reg->Register.Index); 1920 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 1921 debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask); 1922 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1923 if (execmask & (1 << i)) 1924 debug_printf("%f, ", chan->f[i]); 1925 debug_printf(")\n"); 1926 } 1927 #endif 1928 break; 1929 1930 case TGSI_FILE_TEMPORARY: 1931 index = reg->Register.Index; 1932 assert( index < TGSI_EXEC_NUM_TEMPS ); 1933 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1934 break; 1935 1936 case TGSI_FILE_ADDRESS: 1937 index = reg->Register.Index; 1938 dst = &mach->Addrs[index].xyzw[chan_index]; 1939 break; 1940 1941 case TGSI_FILE_PREDICATE: 1942 index = reg->Register.Index; 1943 assert(index < TGSI_EXEC_NUM_PREDS); 1944 dst = &mach->Predicates[index].xyzw[chan_index]; 1945 break; 1946 1947 default: 1948 assert( 0 ); 1949 return NULL; 1950 } 1951 1952 if (inst->Instruction.Predicate) { 1953 uint swizzle; 1954 union tgsi_exec_channel *pred; 1955 1956 switch (chan_index) { 1957 case TGSI_CHAN_X: 1958 swizzle = inst->Predicate.SwizzleX; 1959 break; 1960 case TGSI_CHAN_Y: 1961 swizzle = inst->Predicate.SwizzleY; 1962 break; 1963 case TGSI_CHAN_Z: 1964 swizzle = inst->Predicate.SwizzleZ; 1965 break; 1966 case TGSI_CHAN_W: 1967 swizzle = inst->Predicate.SwizzleW; 1968 break; 1969 default: 1970 assert(0); 1971 return NULL; 1972 } 1973 1974 assert(inst->Predicate.Index == 0); 1975 1976 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; 1977 1978 if (inst->Predicate.Negate) { 1979 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1980 if (pred->u[i]) { 1981 execmask &= ~(1 << i); 1982 } 1983 } 1984 } else { 1985 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1986 if (!pred->u[i]) { 1987 execmask &= ~(1 << i); 1988 } 1989 } 1990 } 1991 } 1992 1993 return dst; 1994 } 1995 1996 static void 1997 store_dest_double(struct tgsi_exec_machine *mach, 1998 const union tgsi_exec_channel *chan, 1999 const struct tgsi_full_dst_register *reg, 2000 const struct tgsi_full_instruction *inst, 2001 uint chan_index, 2002 enum tgsi_exec_datatype dst_datatype) 2003 { 2004 union tgsi_exec_channel *dst; 2005 const uint execmask = mach->ExecMask; 2006 int i; 2007 2008 dst = store_dest_dstret(mach, chan, reg, inst, chan_index, 2009 dst_datatype); 2010 if (!dst) 2011 return; 2012 2013 /* doubles path */ 2014 for (i = 0; i < TGSI_QUAD_SIZE; i++) 2015 if (execmask & (1 << i)) 2016 dst->i[i] = chan->i[i]; 2017 } 2018 2019 static void 2020 store_dest(struct tgsi_exec_machine *mach, 2021 const union tgsi_exec_channel *chan, 2022 const struct tgsi_full_dst_register *reg, 2023 const struct tgsi_full_instruction *inst, 2024 uint chan_index, 2025 enum tgsi_exec_datatype dst_datatype) 2026 { 2027 union tgsi_exec_channel *dst; 2028 const uint execmask = mach->ExecMask; 2029 int i; 2030 2031 dst = store_dest_dstret(mach, chan, reg, inst, chan_index, 2032 dst_datatype); 2033 if (!dst) 2034 return; 2035 2036 if (!inst->Instruction.Saturate) { 2037 for (i = 0; i < TGSI_QUAD_SIZE; i++) 2038 if (execmask & (1 << i)) 2039 dst->i[i] = chan->i[i]; 2040 } 2041 else { 2042 for (i = 0; i < TGSI_QUAD_SIZE; i++) 2043 if (execmask & (1 << i)) { 2044 if (chan->f[i] < 0.0f) 2045 dst->f[i] = 0.0f; 2046 else if (chan->f[i] > 1.0f) 2047 dst->f[i] = 1.0f; 2048 else 2049 dst->i[i] = chan->i[i]; 2050 } 2051 } 2052 } 2053 2054 #define FETCH(VAL,INDEX,CHAN)\ 2055 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) 2056 2057 #define IFETCH(VAL,INDEX,CHAN)\ 2058 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT) 2059 2060 2061 /** 2062 * Execute ARB-style KIL which is predicated by a src register. 2063 * Kill fragment if any of the four values is less than zero. 2064 */ 2065 static void 2066 exec_kill_if(struct tgsi_exec_machine *mach, 2067 const struct tgsi_full_instruction *inst) 2068 { 2069 uint uniquemask; 2070 uint chan_index; 2071 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 2072 union tgsi_exec_channel r[1]; 2073 2074 /* This mask stores component bits that were already tested. */ 2075 uniquemask = 0; 2076 2077 for (chan_index = 0; chan_index < 4; chan_index++) 2078 { 2079 uint swizzle; 2080 uint i; 2081 2082 /* unswizzle channel */ 2083 swizzle = tgsi_util_get_full_src_register_swizzle ( 2084 &inst->Src[0], 2085 chan_index); 2086 2087 /* check if the component has not been already tested */ 2088 if (uniquemask & (1 << swizzle)) 2089 continue; 2090 uniquemask |= 1 << swizzle; 2091 2092 FETCH(&r[0], 0, chan_index); 2093 for (i = 0; i < 4; i++) 2094 if (r[0].f[i] < 0.0f) 2095 kilmask |= 1 << i; 2096 } 2097 2098 /* restrict to fragments currently executing */ 2099 kilmask &= mach->ExecMask; 2100 2101 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 2102 } 2103 2104 /** 2105 * Unconditional fragment kill/discard. 2106 */ 2107 static void 2108 exec_kill(struct tgsi_exec_machine *mach, 2109 const struct tgsi_full_instruction *inst) 2110 { 2111 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 2112 2113 /* kill fragment for all fragments currently executing */ 2114 kilmask = mach->ExecMask; 2115 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 2116 } 2117 2118 static void 2119 emit_vertex(struct tgsi_exec_machine *mach) 2120 { 2121 /* FIXME: check for exec mask correctly 2122 unsigned i; 2123 for (i = 0; i < TGSI_QUAD_SIZE; ++i) { 2124 if ((mach->ExecMask & (1 << i))) 2125 */ 2126 if (mach->ExecMask) { 2127 if (mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] >= mach->MaxOutputVertices) 2128 return; 2129 2130 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; 2131 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 2132 } 2133 } 2134 2135 static void 2136 emit_primitive(struct tgsi_exec_machine *mach) 2137 { 2138 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; 2139 /* FIXME: check for exec mask correctly 2140 unsigned i; 2141 for (i = 0; i < TGSI_QUAD_SIZE; ++i) { 2142 if ((mach->ExecMask & (1 << i))) 2143 */ 2144 if (mach->ExecMask) { 2145 ++(*prim_count); 2146 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); 2147 mach->Primitives[*prim_count] = 0; 2148 } 2149 } 2150 2151 static void 2152 conditional_emit_primitive(struct tgsi_exec_machine *mach) 2153 { 2154 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 2155 int emitted_verts = 2156 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]; 2157 if (emitted_verts) { 2158 emit_primitive(mach); 2159 } 2160 } 2161 } 2162 2163 2164 /* 2165 * Fetch four texture samples using STR texture coordinates. 2166 */ 2167 static void 2168 fetch_texel( struct tgsi_sampler *sampler, 2169 const unsigned sview_idx, 2170 const unsigned sampler_idx, 2171 const union tgsi_exec_channel *s, 2172 const union tgsi_exec_channel *t, 2173 const union tgsi_exec_channel *p, 2174 const union tgsi_exec_channel *c0, 2175 const union tgsi_exec_channel *c1, 2176 float derivs[3][2][TGSI_QUAD_SIZE], 2177 const int8_t offset[3], 2178 enum tgsi_sampler_control control, 2179 union tgsi_exec_channel *r, 2180 union tgsi_exec_channel *g, 2181 union tgsi_exec_channel *b, 2182 union tgsi_exec_channel *a ) 2183 { 2184 uint j; 2185 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 2186 2187 /* FIXME: handle explicit derivs, offsets */ 2188 sampler->get_samples(sampler, sview_idx, sampler_idx, 2189 s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba); 2190 2191 for (j = 0; j < 4; j++) { 2192 r->f[j] = rgba[0][j]; 2193 g->f[j] = rgba[1][j]; 2194 b->f[j] = rgba[2][j]; 2195 a->f[j] = rgba[3][j]; 2196 } 2197 } 2198 2199 2200 #define TEX_MODIFIER_NONE 0 2201 #define TEX_MODIFIER_PROJECTED 1 2202 #define TEX_MODIFIER_LOD_BIAS 2 2203 #define TEX_MODIFIER_EXPLICIT_LOD 3 2204 #define TEX_MODIFIER_LEVEL_ZERO 4 2205 #define TEX_MODIFIER_GATHER 5 2206 2207 /* 2208 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array. 2209 */ 2210 static void 2211 fetch_texel_offsets(struct tgsi_exec_machine *mach, 2212 const struct tgsi_full_instruction *inst, 2213 int8_t offsets[3]) 2214 { 2215 if (inst->Texture.NumOffsets == 1) { 2216 union tgsi_exec_channel index; 2217 union tgsi_exec_channel offset[3]; 2218 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index; 2219 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, 2220 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]); 2221 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, 2222 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]); 2223 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, 2224 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]); 2225 offsets[0] = offset[0].i[0]; 2226 offsets[1] = offset[1].i[0]; 2227 offsets[2] = offset[2].i[0]; 2228 } else { 2229 assert(inst->Texture.NumOffsets == 0); 2230 offsets[0] = offsets[1] = offsets[2] = 0; 2231 } 2232 } 2233 2234 2235 /* 2236 * Fetch dx and dy values for one channel (s, t or r). 2237 * Put dx values into one float array, dy values into another. 2238 */ 2239 static void 2240 fetch_assign_deriv_channel(struct tgsi_exec_machine *mach, 2241 const struct tgsi_full_instruction *inst, 2242 unsigned regdsrcx, 2243 unsigned chan, 2244 float derivs[2][TGSI_QUAD_SIZE]) 2245 { 2246 union tgsi_exec_channel d; 2247 FETCH(&d, regdsrcx, chan); 2248 derivs[0][0] = d.f[0]; 2249 derivs[0][1] = d.f[1]; 2250 derivs[0][2] = d.f[2]; 2251 derivs[0][3] = d.f[3]; 2252 FETCH(&d, regdsrcx + 1, chan); 2253 derivs[1][0] = d.f[0]; 2254 derivs[1][1] = d.f[1]; 2255 derivs[1][2] = d.f[2]; 2256 derivs[1][3] = d.f[3]; 2257 } 2258 2259 static uint 2260 fetch_sampler_unit(struct tgsi_exec_machine *mach, 2261 const struct tgsi_full_instruction *inst, 2262 uint sampler) 2263 { 2264 uint unit = 0; 2265 int i; 2266 if (inst->Src[sampler].Register.Indirect) { 2267 const struct tgsi_full_src_register *reg = &inst->Src[sampler]; 2268 union tgsi_exec_channel indir_index, index2; 2269 const uint execmask = mach->ExecMask; 2270 index2.i[0] = 2271 index2.i[1] = 2272 index2.i[2] = 2273 index2.i[3] = reg->Indirect.Index; 2274 2275 fetch_src_file_channel(mach, 2276 0, 2277 reg->Indirect.File, 2278 reg->Indirect.Swizzle, 2279 &index2, 2280 &ZeroVec, 2281 &indir_index); 2282 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2283 if (execmask & (1 << i)) { 2284 unit = inst->Src[sampler].Register.Index + indir_index.i[i]; 2285 break; 2286 } 2287 } 2288 2289 } else { 2290 unit = inst->Src[sampler].Register.Index; 2291 } 2292 return unit; 2293 } 2294 2295 /* 2296 * execute a texture instruction. 2297 * 2298 * modifier is used to control the channel routing for the 2299 * instruction variants like proj, lod, and texture with lod bias. 2300 * sampler indicates which src register the sampler is contained in. 2301 */ 2302 static void 2303 exec_tex(struct tgsi_exec_machine *mach, 2304 const struct tgsi_full_instruction *inst, 2305 uint modifier, uint sampler) 2306 { 2307 const union tgsi_exec_channel *args[5], *proj = NULL; 2308 union tgsi_exec_channel r[5]; 2309 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; 2310 uint chan; 2311 uint unit; 2312 int8_t offsets[3]; 2313 int dim, shadow_ref, i; 2314 2315 unit = fetch_sampler_unit(mach, inst, sampler); 2316 /* always fetch all 3 offsets, overkill but keeps code simple */ 2317 fetch_texel_offsets(mach, inst, offsets); 2318 2319 assert(modifier != TEX_MODIFIER_LEVEL_ZERO); 2320 assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER); 2321 2322 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); 2323 shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture); 2324 2325 assert(dim <= 4); 2326 if (shadow_ref >= 0) 2327 assert(shadow_ref >= dim && shadow_ref < ARRAY_SIZE(args)); 2328 2329 /* fetch modifier to the last argument */ 2330 if (modifier != TEX_MODIFIER_NONE) { 2331 const int last = ARRAY_SIZE(args) - 1; 2332 2333 /* fetch modifier from src0.w or src1.x */ 2334 if (sampler == 1) { 2335 assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W); 2336 FETCH(&r[last], 0, TGSI_CHAN_W); 2337 } 2338 else { 2339 assert(shadow_ref != 4); 2340 FETCH(&r[last], 1, TGSI_CHAN_X); 2341 } 2342 2343 if (modifier != TEX_MODIFIER_PROJECTED) { 2344 args[last] = &r[last]; 2345 } 2346 else { 2347 proj = &r[last]; 2348 args[last] = &ZeroVec; 2349 } 2350 2351 /* point unused arguments to zero vector */ 2352 for (i = dim; i < last; i++) 2353 args[i] = &ZeroVec; 2354 2355 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) 2356 control = TGSI_SAMPLER_LOD_EXPLICIT; 2357 else if (modifier == TEX_MODIFIER_LOD_BIAS) 2358 control = TGSI_SAMPLER_LOD_BIAS; 2359 else if (modifier == TEX_MODIFIER_GATHER) 2360 control = TGSI_SAMPLER_GATHER; 2361 } 2362 else { 2363 for (i = dim; i < ARRAY_SIZE(args); i++) 2364 args[i] = &ZeroVec; 2365 } 2366 2367 /* fetch coordinates */ 2368 for (i = 0; i < dim; i++) { 2369 FETCH(&r[i], 0, TGSI_CHAN_X + i); 2370 2371 if (proj) 2372 micro_div(&r[i], &r[i], proj); 2373 2374 args[i] = &r[i]; 2375 } 2376 2377 /* fetch reference value */ 2378 if (shadow_ref >= 0) { 2379 FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4)); 2380 2381 if (proj) 2382 micro_div(&r[shadow_ref], &r[shadow_ref], proj); 2383 2384 args[shadow_ref] = &r[shadow_ref]; 2385 } 2386 2387 fetch_texel(mach->Sampler, unit, unit, 2388 args[0], args[1], args[2], args[3], args[4], 2389 NULL, offsets, control, 2390 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2391 2392 #if 0 2393 debug_printf("fetch r: %g %g %g %g\n", 2394 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]); 2395 debug_printf("fetch g: %g %g %g %g\n", 2396 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]); 2397 debug_printf("fetch b: %g %g %g %g\n", 2398 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]); 2399 debug_printf("fetch a: %g %g %g %g\n", 2400 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]); 2401 #endif 2402 2403 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2404 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2405 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2406 } 2407 } 2408 } 2409 2410 static void 2411 exec_lodq(struct tgsi_exec_machine *mach, 2412 const struct tgsi_full_instruction *inst) 2413 { 2414 uint unit; 2415 int dim; 2416 int i; 2417 union tgsi_exec_channel coords[4]; 2418 const union tgsi_exec_channel *args[ARRAY_SIZE(coords)]; 2419 union tgsi_exec_channel r[2]; 2420 2421 unit = fetch_sampler_unit(mach, inst, 1); 2422 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); 2423 assert(dim <= ARRAY_SIZE(coords)); 2424 /* fetch coordinates */ 2425 for (i = 0; i < dim; i++) { 2426 FETCH(&coords[i], 0, TGSI_CHAN_X + i); 2427 args[i] = &coords[i]; 2428 } 2429 for (i = dim; i < ARRAY_SIZE(coords); i++) { 2430 args[i] = &ZeroVec; 2431 } 2432 mach->Sampler->query_lod(mach->Sampler, unit, unit, 2433 args[0]->f, 2434 args[1]->f, 2435 args[2]->f, 2436 args[3]->f, 2437 TGSI_SAMPLER_LOD_NONE, 2438 r[0].f, 2439 r[1].f); 2440 2441 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2442 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, 2443 TGSI_EXEC_DATA_FLOAT); 2444 } 2445 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2446 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, 2447 TGSI_EXEC_DATA_FLOAT); 2448 } 2449 } 2450 2451 static void 2452 exec_txd(struct tgsi_exec_machine *mach, 2453 const struct tgsi_full_instruction *inst) 2454 { 2455 union tgsi_exec_channel r[4]; 2456 float derivs[3][2][TGSI_QUAD_SIZE]; 2457 uint chan; 2458 uint unit; 2459 int8_t offsets[3]; 2460 2461 unit = fetch_sampler_unit(mach, inst, 3); 2462 /* always fetch all 3 offsets, overkill but keeps code simple */ 2463 fetch_texel_offsets(mach, inst, offsets); 2464 2465 switch (inst->Texture.Texture) { 2466 case TGSI_TEXTURE_1D: 2467 FETCH(&r[0], 0, TGSI_CHAN_X); 2468 2469 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2470 2471 fetch_texel(mach->Sampler, unit, unit, 2472 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2473 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2474 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2475 break; 2476 2477 case TGSI_TEXTURE_SHADOW1D: 2478 case TGSI_TEXTURE_1D_ARRAY: 2479 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2480 /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */ 2481 FETCH(&r[0], 0, TGSI_CHAN_X); 2482 FETCH(&r[1], 0, TGSI_CHAN_Y); 2483 FETCH(&r[2], 0, TGSI_CHAN_Z); 2484 2485 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2486 2487 fetch_texel(mach->Sampler, unit, unit, 2488 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2489 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2490 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2491 break; 2492 2493 case TGSI_TEXTURE_2D: 2494 case TGSI_TEXTURE_RECT: 2495 FETCH(&r[0], 0, TGSI_CHAN_X); 2496 FETCH(&r[1], 0, TGSI_CHAN_Y); 2497 2498 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2499 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2500 2501 fetch_texel(mach->Sampler, unit, unit, 2502 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2503 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2504 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2505 break; 2506 2507 2508 case TGSI_TEXTURE_SHADOW2D: 2509 case TGSI_TEXTURE_SHADOWRECT: 2510 case TGSI_TEXTURE_2D_ARRAY: 2511 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2512 /* only SHADOW2D_ARRAY actually needs W */ 2513 FETCH(&r[0], 0, TGSI_CHAN_X); 2514 FETCH(&r[1], 0, TGSI_CHAN_Y); 2515 FETCH(&r[2], 0, TGSI_CHAN_Z); 2516 FETCH(&r[3], 0, TGSI_CHAN_W); 2517 2518 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2519 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2520 2521 fetch_texel(mach->Sampler, unit, unit, 2522 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ 2523 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2524 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2525 break; 2526 2527 case TGSI_TEXTURE_3D: 2528 case TGSI_TEXTURE_CUBE: 2529 case TGSI_TEXTURE_CUBE_ARRAY: 2530 case TGSI_TEXTURE_SHADOWCUBE: 2531 /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */ 2532 FETCH(&r[0], 0, TGSI_CHAN_X); 2533 FETCH(&r[1], 0, TGSI_CHAN_Y); 2534 FETCH(&r[2], 0, TGSI_CHAN_Z); 2535 FETCH(&r[3], 0, TGSI_CHAN_W); 2536 2537 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2538 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2539 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]); 2540 2541 fetch_texel(mach->Sampler, unit, unit, 2542 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ 2543 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2544 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2545 break; 2546 2547 default: 2548 assert(0); 2549 } 2550 2551 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2552 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2553 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2554 } 2555 } 2556 } 2557 2558 2559 static void 2560 exec_txf(struct tgsi_exec_machine *mach, 2561 const struct tgsi_full_instruction *inst) 2562 { 2563 union tgsi_exec_channel r[4]; 2564 uint chan; 2565 uint unit; 2566 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 2567 int j; 2568 int8_t offsets[3]; 2569 unsigned target; 2570 2571 unit = fetch_sampler_unit(mach, inst, 1); 2572 /* always fetch all 3 offsets, overkill but keeps code simple */ 2573 fetch_texel_offsets(mach, inst, offsets); 2574 2575 IFETCH(&r[3], 0, TGSI_CHAN_W); 2576 2577 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || 2578 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { 2579 target = mach->SamplerViews[unit].Resource; 2580 } 2581 else { 2582 target = inst->Texture.Texture; 2583 } 2584 switch(target) { 2585 case TGSI_TEXTURE_3D: 2586 case TGSI_TEXTURE_2D_ARRAY: 2587 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2588 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2589 IFETCH(&r[2], 0, TGSI_CHAN_Z); 2590 /* fallthrough */ 2591 case TGSI_TEXTURE_2D: 2592 case TGSI_TEXTURE_RECT: 2593 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2594 case TGSI_TEXTURE_SHADOW2D: 2595 case TGSI_TEXTURE_SHADOWRECT: 2596 case TGSI_TEXTURE_1D_ARRAY: 2597 case TGSI_TEXTURE_2D_MSAA: 2598 IFETCH(&r[1], 0, TGSI_CHAN_Y); 2599 /* fallthrough */ 2600 case TGSI_TEXTURE_BUFFER: 2601 case TGSI_TEXTURE_1D: 2602 case TGSI_TEXTURE_SHADOW1D: 2603 IFETCH(&r[0], 0, TGSI_CHAN_X); 2604 break; 2605 default: 2606 assert(0); 2607 break; 2608 } 2609 2610 mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i, 2611 offsets, rgba); 2612 2613 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 2614 r[0].f[j] = rgba[0][j]; 2615 r[1].f[j] = rgba[1][j]; 2616 r[2].f[j] = rgba[2][j]; 2617 r[3].f[j] = rgba[3][j]; 2618 } 2619 2620 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || 2621 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { 2622 unsigned char swizzles[4]; 2623 swizzles[0] = inst->Src[1].Register.SwizzleX; 2624 swizzles[1] = inst->Src[1].Register.SwizzleY; 2625 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2626 swizzles[3] = inst->Src[1].Register.SwizzleW; 2627 2628 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2629 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2630 store_dest(mach, &r[swizzles[chan]], 2631 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2632 } 2633 } 2634 } 2635 else { 2636 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2637 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2638 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2639 } 2640 } 2641 } 2642 } 2643 2644 static void 2645 exec_txq(struct tgsi_exec_machine *mach, 2646 const struct tgsi_full_instruction *inst) 2647 { 2648 int result[4]; 2649 union tgsi_exec_channel r[4], src; 2650 uint chan; 2651 uint unit; 2652 int i,j; 2653 2654 unit = fetch_sampler_unit(mach, inst, 1); 2655 2656 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 2657 2658 /* XXX: This interface can't return per-pixel values */ 2659 mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result); 2660 2661 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2662 for (j = 0; j < 4; j++) { 2663 r[j].i[i] = result[j]; 2664 } 2665 } 2666 2667 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2668 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2669 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, 2670 TGSI_EXEC_DATA_INT); 2671 } 2672 } 2673 } 2674 2675 static void 2676 exec_sample(struct tgsi_exec_machine *mach, 2677 const struct tgsi_full_instruction *inst, 2678 uint modifier, boolean compare) 2679 { 2680 const uint resource_unit = inst->Src[1].Register.Index; 2681 const uint sampler_unit = inst->Src[2].Register.Index; 2682 union tgsi_exec_channel r[5], c1; 2683 const union tgsi_exec_channel *lod = &ZeroVec; 2684 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; 2685 uint chan; 2686 unsigned char swizzles[4]; 2687 int8_t offsets[3]; 2688 2689 /* always fetch all 3 offsets, overkill but keeps code simple */ 2690 fetch_texel_offsets(mach, inst, offsets); 2691 2692 assert(modifier != TEX_MODIFIER_PROJECTED); 2693 2694 if (modifier != TEX_MODIFIER_NONE) { 2695 if (modifier == TEX_MODIFIER_LOD_BIAS) { 2696 FETCH(&c1, 3, TGSI_CHAN_X); 2697 lod = &c1; 2698 control = TGSI_SAMPLER_LOD_BIAS; 2699 } 2700 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { 2701 FETCH(&c1, 3, TGSI_CHAN_X); 2702 lod = &c1; 2703 control = TGSI_SAMPLER_LOD_EXPLICIT; 2704 } 2705 else { 2706 assert(modifier == TEX_MODIFIER_LEVEL_ZERO); 2707 control = TGSI_SAMPLER_LOD_ZERO; 2708 } 2709 } 2710 2711 FETCH(&r[0], 0, TGSI_CHAN_X); 2712 2713 switch (mach->SamplerViews[resource_unit].Resource) { 2714 case TGSI_TEXTURE_1D: 2715 if (compare) { 2716 FETCH(&r[2], 3, TGSI_CHAN_X); 2717 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2718 &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ 2719 NULL, offsets, control, 2720 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2721 } 2722 else { 2723 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2724 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ 2725 NULL, offsets, control, 2726 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2727 } 2728 break; 2729 2730 case TGSI_TEXTURE_1D_ARRAY: 2731 case TGSI_TEXTURE_2D: 2732 case TGSI_TEXTURE_RECT: 2733 FETCH(&r[1], 0, TGSI_CHAN_Y); 2734 if (compare) { 2735 FETCH(&r[2], 3, TGSI_CHAN_X); 2736 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2737 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ 2738 NULL, offsets, control, 2739 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2740 } 2741 else { 2742 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2743 &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ 2744 NULL, offsets, control, 2745 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2746 } 2747 break; 2748 2749 case TGSI_TEXTURE_2D_ARRAY: 2750 case TGSI_TEXTURE_3D: 2751 case TGSI_TEXTURE_CUBE: 2752 FETCH(&r[1], 0, TGSI_CHAN_Y); 2753 FETCH(&r[2], 0, TGSI_CHAN_Z); 2754 if(compare) { 2755 FETCH(&r[3], 3, TGSI_CHAN_X); 2756 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2757 &r[0], &r[1], &r[2], &r[3], lod, 2758 NULL, offsets, control, 2759 &r[0], &r[1], &r[2], &r[3]); 2760 } 2761 else { 2762 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2763 &r[0], &r[1], &r[2], &ZeroVec, lod, 2764 NULL, offsets, control, 2765 &r[0], &r[1], &r[2], &r[3]); 2766 } 2767 break; 2768 2769 case TGSI_TEXTURE_CUBE_ARRAY: 2770 FETCH(&r[1], 0, TGSI_CHAN_Y); 2771 FETCH(&r[2], 0, TGSI_CHAN_Z); 2772 FETCH(&r[3], 0, TGSI_CHAN_W); 2773 if(compare) { 2774 FETCH(&r[4], 3, TGSI_CHAN_X); 2775 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2776 &r[0], &r[1], &r[2], &r[3], &r[4], 2777 NULL, offsets, control, 2778 &r[0], &r[1], &r[2], &r[3]); 2779 } 2780 else { 2781 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2782 &r[0], &r[1], &r[2], &r[3], lod, 2783 NULL, offsets, control, 2784 &r[0], &r[1], &r[2], &r[3]); 2785 } 2786 break; 2787 2788 2789 default: 2790 assert(0); 2791 } 2792 2793 swizzles[0] = inst->Src[1].Register.SwizzleX; 2794 swizzles[1] = inst->Src[1].Register.SwizzleY; 2795 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2796 swizzles[3] = inst->Src[1].Register.SwizzleW; 2797 2798 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2799 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2800 store_dest(mach, &r[swizzles[chan]], 2801 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2802 } 2803 } 2804 } 2805 2806 static void 2807 exec_sample_d(struct tgsi_exec_machine *mach, 2808 const struct tgsi_full_instruction *inst) 2809 { 2810 const uint resource_unit = inst->Src[1].Register.Index; 2811 const uint sampler_unit = inst->Src[2].Register.Index; 2812 union tgsi_exec_channel r[4]; 2813 float derivs[3][2][TGSI_QUAD_SIZE]; 2814 uint chan; 2815 unsigned char swizzles[4]; 2816 int8_t offsets[3]; 2817 2818 /* always fetch all 3 offsets, overkill but keeps code simple */ 2819 fetch_texel_offsets(mach, inst, offsets); 2820 2821 FETCH(&r[0], 0, TGSI_CHAN_X); 2822 2823 switch (mach->SamplerViews[resource_unit].Resource) { 2824 case TGSI_TEXTURE_1D: 2825 case TGSI_TEXTURE_1D_ARRAY: 2826 /* only 1D array actually needs Y */ 2827 FETCH(&r[1], 0, TGSI_CHAN_Y); 2828 2829 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2830 2831 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2832 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2833 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2834 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2835 break; 2836 2837 case TGSI_TEXTURE_2D: 2838 case TGSI_TEXTURE_RECT: 2839 case TGSI_TEXTURE_2D_ARRAY: 2840 /* only 2D array actually needs Z */ 2841 FETCH(&r[1], 0, TGSI_CHAN_Y); 2842 FETCH(&r[2], 0, TGSI_CHAN_Z); 2843 2844 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2845 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); 2846 2847 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2848 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */ 2849 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2850 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2851 break; 2852 2853 case TGSI_TEXTURE_3D: 2854 case TGSI_TEXTURE_CUBE: 2855 case TGSI_TEXTURE_CUBE_ARRAY: 2856 /* only cube array actually needs W */ 2857 FETCH(&r[1], 0, TGSI_CHAN_Y); 2858 FETCH(&r[2], 0, TGSI_CHAN_Z); 2859 FETCH(&r[3], 0, TGSI_CHAN_W); 2860 2861 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2862 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); 2863 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]); 2864 2865 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2866 &r[0], &r[1], &r[2], &r[3], &ZeroVec, 2867 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2868 &r[0], &r[1], &r[2], &r[3]); 2869 break; 2870 2871 default: 2872 assert(0); 2873 } 2874 2875 swizzles[0] = inst->Src[1].Register.SwizzleX; 2876 swizzles[1] = inst->Src[1].Register.SwizzleY; 2877 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2878 swizzles[3] = inst->Src[1].Register.SwizzleW; 2879 2880 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2881 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2882 store_dest(mach, &r[swizzles[chan]], 2883 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2884 } 2885 } 2886 } 2887 2888 2889 /** 2890 * Evaluate a constant-valued coefficient at the position of the 2891 * current quad. 2892 */ 2893 static void 2894 eval_constant_coef( 2895 struct tgsi_exec_machine *mach, 2896 unsigned attrib, 2897 unsigned chan ) 2898 { 2899 unsigned i; 2900 2901 for( i = 0; i < TGSI_QUAD_SIZE; i++ ) { 2902 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 2903 } 2904 } 2905 2906 /** 2907 * Evaluate a linear-valued coefficient at the position of the 2908 * current quad. 2909 */ 2910 static void 2911 eval_linear_coef( 2912 struct tgsi_exec_machine *mach, 2913 unsigned attrib, 2914 unsigned chan ) 2915 { 2916 const float x = mach->QuadPos.xyzw[0].f[0]; 2917 const float y = mach->QuadPos.xyzw[1].f[0]; 2918 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2919 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2920 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2921 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 2922 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 2923 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 2924 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 2925 } 2926 2927 /** 2928 * Evaluate a perspective-valued coefficient at the position of the 2929 * current quad. 2930 */ 2931 static void 2932 eval_perspective_coef( 2933 struct tgsi_exec_machine *mach, 2934 unsigned attrib, 2935 unsigned chan ) 2936 { 2937 const float x = mach->QuadPos.xyzw[0].f[0]; 2938 const float y = mach->QuadPos.xyzw[1].f[0]; 2939 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2940 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2941 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2942 const float *w = mach->QuadPos.xyzw[3].f; 2943 /* divide by W here */ 2944 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 2945 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 2946 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 2947 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 2948 } 2949 2950 2951 typedef void (* eval_coef_func)( 2952 struct tgsi_exec_machine *mach, 2953 unsigned attrib, 2954 unsigned chan ); 2955 2956 static void 2957 exec_declaration(struct tgsi_exec_machine *mach, 2958 const struct tgsi_full_declaration *decl) 2959 { 2960 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { 2961 mach->SamplerViews[decl->Range.First] = decl->SamplerView; 2962 return; 2963 } 2964 2965 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { 2966 if (decl->Declaration.File == TGSI_FILE_INPUT) { 2967 uint first, last, mask; 2968 2969 first = decl->Range.First; 2970 last = decl->Range.Last; 2971 mask = decl->Declaration.UsageMask; 2972 2973 /* XXX we could remove this special-case code since 2974 * mach->InterpCoefs[first].a0 should already have the 2975 * front/back-face value. But we should first update the 2976 * ureg code to emit the right UsageMask value (WRITEMASK_X). 2977 * Then, we could remove the tgsi_exec_machine::Face field. 2978 */ 2979 /* XXX make FACE a system value */ 2980 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { 2981 uint i; 2982 2983 assert(decl->Semantic.Index == 0); 2984 assert(first == last); 2985 2986 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2987 mach->Inputs[first].xyzw[0].f[i] = mach->Face; 2988 } 2989 } else { 2990 eval_coef_func eval; 2991 uint i, j; 2992 2993 switch (decl->Interp.Interpolate) { 2994 case TGSI_INTERPOLATE_CONSTANT: 2995 eval = eval_constant_coef; 2996 break; 2997 2998 case TGSI_INTERPOLATE_LINEAR: 2999 eval = eval_linear_coef; 3000 break; 3001 3002 case TGSI_INTERPOLATE_PERSPECTIVE: 3003 eval = eval_perspective_coef; 3004 break; 3005 3006 case TGSI_INTERPOLATE_COLOR: 3007 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef; 3008 break; 3009 3010 default: 3011 assert(0); 3012 return; 3013 } 3014 3015 for (j = 0; j < TGSI_NUM_CHANNELS; j++) { 3016 if (mask & (1 << j)) { 3017 for (i = first; i <= last; i++) { 3018 eval(mach, i, j); 3019 } 3020 } 3021 } 3022 } 3023 3024 if (DEBUG_EXECUTION) { 3025 uint i, j; 3026 for (i = first; i <= last; ++i) { 3027 debug_printf("IN[%2u] = ", i); 3028 for (j = 0; j < TGSI_NUM_CHANNELS; j++) { 3029 if (j > 0) { 3030 debug_printf(" "); 3031 } 3032 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 3033 mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j], 3034 mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j], 3035 mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j], 3036 mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]); 3037 } 3038 } 3039 } 3040 } 3041 } 3042 3043 } 3044 3045 typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, 3046 const union tgsi_exec_channel *src); 3047 3048 static void 3049 exec_scalar_unary(struct tgsi_exec_machine *mach, 3050 const struct tgsi_full_instruction *inst, 3051 micro_unary_op op, 3052 enum tgsi_exec_datatype dst_datatype, 3053 enum tgsi_exec_datatype src_datatype) 3054 { 3055 unsigned int chan; 3056 union tgsi_exec_channel src; 3057 union tgsi_exec_channel dst; 3058 3059 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); 3060 op(&dst, &src); 3061 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3062 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3063 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 3064 } 3065 } 3066 } 3067 3068 static void 3069 exec_vector_unary(struct tgsi_exec_machine *mach, 3070 const struct tgsi_full_instruction *inst, 3071 micro_unary_op op, 3072 enum tgsi_exec_datatype dst_datatype, 3073 enum tgsi_exec_datatype src_datatype) 3074 { 3075 unsigned int chan; 3076 struct tgsi_exec_vector dst; 3077 3078 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3079 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3080 union tgsi_exec_channel src; 3081 3082 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); 3083 op(&dst.xyzw[chan], &src); 3084 } 3085 } 3086 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3087 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3088 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 3089 } 3090 } 3091 } 3092 3093 typedef void (* micro_binary_op)(union tgsi_exec_channel *dst, 3094 const union tgsi_exec_channel *src0, 3095 const union tgsi_exec_channel *src1); 3096 3097 static void 3098 exec_scalar_binary(struct tgsi_exec_machine *mach, 3099 const struct tgsi_full_instruction *inst, 3100 micro_binary_op op, 3101 enum tgsi_exec_datatype dst_datatype, 3102 enum tgsi_exec_datatype src_datatype) 3103 { 3104 unsigned int chan; 3105 union tgsi_exec_channel src[2]; 3106 union tgsi_exec_channel dst; 3107 3108 fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype); 3109 fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype); 3110 op(&dst, &src[0], &src[1]); 3111 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3112 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3113 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 3114 } 3115 } 3116 } 3117 3118 static void 3119 exec_vector_binary(struct tgsi_exec_machine *mach, 3120 const struct tgsi_full_instruction *inst, 3121 micro_binary_op op, 3122 enum tgsi_exec_datatype dst_datatype, 3123 enum tgsi_exec_datatype src_datatype) 3124 { 3125 unsigned int chan; 3126 struct tgsi_exec_vector dst; 3127 3128 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3129 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3130 union tgsi_exec_channel src[2]; 3131 3132 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3133 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3134 op(&dst.xyzw[chan], &src[0], &src[1]); 3135 } 3136 } 3137 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3138 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3139 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 3140 } 3141 } 3142 } 3143 3144 typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst, 3145 const union tgsi_exec_channel *src0, 3146 const union tgsi_exec_channel *src1, 3147 const union tgsi_exec_channel *src2); 3148 3149 static void 3150 exec_vector_trinary(struct tgsi_exec_machine *mach, 3151 const struct tgsi_full_instruction *inst, 3152 micro_trinary_op op, 3153 enum tgsi_exec_datatype dst_datatype, 3154 enum tgsi_exec_datatype src_datatype) 3155 { 3156 unsigned int chan; 3157 struct tgsi_exec_vector dst; 3158 3159 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3160 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3161 union tgsi_exec_channel src[3]; 3162 3163 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3164 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3165 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 3166 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 3167 } 3168 } 3169 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3170 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3171 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 3172 } 3173 } 3174 } 3175 3176 typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst, 3177 const union tgsi_exec_channel *src0, 3178 const union tgsi_exec_channel *src1, 3179 const union tgsi_exec_channel *src2, 3180 const union tgsi_exec_channel *src3); 3181 3182 static void 3183 exec_vector_quaternary(struct tgsi_exec_machine *mach, 3184 const struct tgsi_full_instruction *inst, 3185 micro_quaternary_op op, 3186 enum tgsi_exec_datatype dst_datatype, 3187 enum tgsi_exec_datatype src_datatype) 3188 { 3189 unsigned int chan; 3190 struct tgsi_exec_vector dst; 3191 3192 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3193 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3194 union tgsi_exec_channel src[4]; 3195 3196 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3197 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3198 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 3199 fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype); 3200 op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]); 3201 } 3202 } 3203 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3204 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3205 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 3206 } 3207 } 3208 } 3209 3210 static void 3211 exec_dp3(struct tgsi_exec_machine *mach, 3212 const struct tgsi_full_instruction *inst) 3213 { 3214 unsigned int chan; 3215 union tgsi_exec_channel arg[3]; 3216 3217 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3218 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3219 micro_mul(&arg[2], &arg[0], &arg[1]); 3220 3221 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) { 3222 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 3223 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 3224 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3225 } 3226 3227 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3228 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3229 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3230 } 3231 } 3232 } 3233 3234 static void 3235 exec_dp4(struct tgsi_exec_machine *mach, 3236 const struct tgsi_full_instruction *inst) 3237 { 3238 unsigned int chan; 3239 union tgsi_exec_channel arg[3]; 3240 3241 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3242 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3243 micro_mul(&arg[2], &arg[0], &arg[1]); 3244 3245 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) { 3246 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 3247 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 3248 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3249 } 3250 3251 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3252 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3253 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3254 } 3255 } 3256 } 3257 3258 static void 3259 exec_dp2a(struct tgsi_exec_machine *mach, 3260 const struct tgsi_full_instruction *inst) 3261 { 3262 unsigned int chan; 3263 union tgsi_exec_channel arg[3]; 3264 3265 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3266 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3267 micro_mul(&arg[2], &arg[0], &arg[1]); 3268 3269 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3270 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3271 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); 3272 3273 fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3274 micro_add(&arg[0], &arg[0], &arg[1]); 3275 3276 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3277 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3278 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3279 } 3280 } 3281 } 3282 3283 static void 3284 exec_dph(struct tgsi_exec_machine *mach, 3285 const struct tgsi_full_instruction *inst) 3286 { 3287 unsigned int chan; 3288 union tgsi_exec_channel arg[3]; 3289 3290 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3291 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3292 micro_mul(&arg[2], &arg[0], &arg[1]); 3293 3294 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3295 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3296 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3297 3298 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3299 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3300 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); 3301 3302 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3303 micro_add(&arg[0], &arg[0], &arg[1]); 3304 3305 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3306 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3307 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3308 } 3309 } 3310 } 3311 3312 static void 3313 exec_dp2(struct tgsi_exec_machine *mach, 3314 const struct tgsi_full_instruction *inst) 3315 { 3316 unsigned int chan; 3317 union tgsi_exec_channel arg[3]; 3318 3319 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3320 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3321 micro_mul(&arg[2], &arg[0], &arg[1]); 3322 3323 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3324 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3325 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3326 3327 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3328 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3329 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3330 } 3331 } 3332 } 3333 3334 static void 3335 exec_pk2h(struct tgsi_exec_machine *mach, 3336 const struct tgsi_full_instruction *inst) 3337 { 3338 unsigned chan; 3339 union tgsi_exec_channel arg[2], dst; 3340 3341 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3342 fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3343 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { 3344 dst.u[chan] = util_float_to_half(arg[0].f[chan]) | 3345 (util_float_to_half(arg[1].f[chan]) << 16); 3346 } 3347 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3348 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3349 store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT); 3350 } 3351 } 3352 } 3353 3354 static void 3355 exec_up2h(struct tgsi_exec_machine *mach, 3356 const struct tgsi_full_instruction *inst) 3357 { 3358 unsigned chan; 3359 union tgsi_exec_channel arg, dst[2]; 3360 3361 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3362 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { 3363 dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff); 3364 dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16); 3365 } 3366 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3367 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3368 store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3369 } 3370 } 3371 } 3372 3373 static void 3374 exec_scs(struct tgsi_exec_machine *mach, 3375 const struct tgsi_full_instruction *inst) 3376 { 3377 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { 3378 union tgsi_exec_channel arg; 3379 union tgsi_exec_channel result; 3380 3381 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3382 3383 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3384 micro_cos(&result, &arg); 3385 store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3386 } 3387 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3388 micro_sin(&result, &arg); 3389 store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3390 } 3391 } 3392 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3393 store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3394 } 3395 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3396 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3397 } 3398 } 3399 3400 static void 3401 exec_xpd(struct tgsi_exec_machine *mach, 3402 const struct tgsi_full_instruction *inst) 3403 { 3404 union tgsi_exec_channel r[6]; 3405 union tgsi_exec_channel d[3]; 3406 3407 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3408 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3409 3410 micro_mul(&r[2], &r[0], &r[1]); 3411 3412 fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3413 fetch_source(mach, &r[4], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3414 3415 micro_mul(&r[5], &r[3], &r[4] ); 3416 micro_sub(&d[TGSI_CHAN_X], &r[2], &r[5]); 3417 3418 fetch_source(mach, &r[2], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3419 3420 micro_mul(&r[3], &r[3], &r[2]); 3421 3422 fetch_source(mach, &r[5], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3423 3424 micro_mul(&r[1], &r[1], &r[5]); 3425 micro_sub(&d[TGSI_CHAN_Y], &r[3], &r[1]); 3426 3427 micro_mul(&r[5], &r[5], &r[4]); 3428 micro_mul(&r[0], &r[0], &r[2]); 3429 micro_sub(&d[TGSI_CHAN_Z], &r[5], &r[0]); 3430 3431 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3432 store_dest(mach, &d[TGSI_CHAN_X], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3433 } 3434 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3435 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3436 } 3437 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3438 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3439 } 3440 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3441 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3442 } 3443 } 3444 3445 static void 3446 exec_dst(struct tgsi_exec_machine *mach, 3447 const struct tgsi_full_instruction *inst) 3448 { 3449 union tgsi_exec_channel r[2]; 3450 union tgsi_exec_channel d[4]; 3451 3452 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3453 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3454 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3455 micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]); 3456 } 3457 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3458 fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3459 } 3460 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3461 fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3462 } 3463 3464 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3465 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3466 } 3467 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3468 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3469 } 3470 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3471 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3472 } 3473 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3474 store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3475 } 3476 } 3477 3478 static void 3479 exec_log(struct tgsi_exec_machine *mach, 3480 const struct tgsi_full_instruction *inst) 3481 { 3482 union tgsi_exec_channel r[3]; 3483 3484 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3485 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */ 3486 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */ 3487 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */ 3488 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3489 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3490 } 3491 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3492 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */ 3493 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */ 3494 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3495 } 3496 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3497 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3498 } 3499 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3500 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3501 } 3502 } 3503 3504 static void 3505 exec_exp(struct tgsi_exec_machine *mach, 3506 const struct tgsi_full_instruction *inst) 3507 { 3508 union tgsi_exec_channel r[3]; 3509 3510 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3511 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */ 3512 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3513 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */ 3514 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3515 } 3516 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3517 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */ 3518 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3519 } 3520 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3521 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */ 3522 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3523 } 3524 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3525 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3526 } 3527 } 3528 3529 static void 3530 exec_lit(struct tgsi_exec_machine *mach, 3531 const struct tgsi_full_instruction *inst) 3532 { 3533 union tgsi_exec_channel r[3]; 3534 union tgsi_exec_channel d[3]; 3535 3536 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) { 3537 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3538 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3539 fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3540 micro_max(&r[1], &r[1], &ZeroVec); 3541 3542 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3543 micro_min(&r[2], &r[2], &P128Vec); 3544 micro_max(&r[2], &r[2], &M128Vec); 3545 micro_pow(&r[1], &r[1], &r[2]); 3546 micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec); 3547 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3548 } 3549 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3550 micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec); 3551 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3552 } 3553 } 3554 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3555 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3556 } 3557 3558 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3559 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3560 } 3561 } 3562 3563 static void 3564 exec_break(struct tgsi_exec_machine *mach) 3565 { 3566 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { 3567 /* turn off loop channels for each enabled exec channel */ 3568 mach->LoopMask &= ~mach->ExecMask; 3569 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3570 UPDATE_EXEC_MASK(mach); 3571 } else { 3572 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); 3573 3574 mach->Switch.mask = 0x0; 3575 3576 UPDATE_EXEC_MASK(mach); 3577 } 3578 } 3579 3580 static void 3581 exec_switch(struct tgsi_exec_machine *mach, 3582 const struct tgsi_full_instruction *inst) 3583 { 3584 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 3585 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3586 3587 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 3588 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3589 mach->Switch.mask = 0x0; 3590 mach->Switch.defaultMask = 0x0; 3591 3592 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3593 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; 3594 3595 UPDATE_EXEC_MASK(mach); 3596 } 3597 3598 static void 3599 exec_case(struct tgsi_exec_machine *mach, 3600 const struct tgsi_full_instruction *inst) 3601 { 3602 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3603 union tgsi_exec_channel src; 3604 uint mask = 0; 3605 3606 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3607 3608 if (mach->Switch.selector.u[0] == src.u[0]) { 3609 mask |= 0x1; 3610 } 3611 if (mach->Switch.selector.u[1] == src.u[1]) { 3612 mask |= 0x2; 3613 } 3614 if (mach->Switch.selector.u[2] == src.u[2]) { 3615 mask |= 0x4; 3616 } 3617 if (mach->Switch.selector.u[3] == src.u[3]) { 3618 mask |= 0x8; 3619 } 3620 3621 mach->Switch.defaultMask |= mask; 3622 3623 mach->Switch.mask |= mask & prevMask; 3624 3625 UPDATE_EXEC_MASK(mach); 3626 } 3627 3628 /* FIXME: this will only work if default is last */ 3629 static void 3630 exec_default(struct tgsi_exec_machine *mach) 3631 { 3632 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3633 3634 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; 3635 3636 UPDATE_EXEC_MASK(mach); 3637 } 3638 3639 static void 3640 exec_endswitch(struct tgsi_exec_machine *mach) 3641 { 3642 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; 3643 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 3644 3645 UPDATE_EXEC_MASK(mach); 3646 } 3647 3648 typedef void (* micro_dop)(union tgsi_double_channel *dst, 3649 const union tgsi_double_channel *src); 3650 3651 typedef void (* micro_dop_sop)(union tgsi_double_channel *dst, 3652 const union tgsi_double_channel *src0, 3653 union tgsi_exec_channel *src1); 3654 3655 typedef void (* micro_dop_s)(union tgsi_double_channel *dst, 3656 const union tgsi_exec_channel *src); 3657 3658 typedef void (* micro_sop_d)(union tgsi_exec_channel *dst, 3659 const union tgsi_double_channel *src); 3660 3661 static void 3662 fetch_double_channel(struct tgsi_exec_machine *mach, 3663 union tgsi_double_channel *chan, 3664 const struct tgsi_full_src_register *reg, 3665 uint chan_0, 3666 uint chan_1) 3667 { 3668 union tgsi_exec_channel src[2]; 3669 uint i; 3670 3671 fetch_source_d(mach, &src[0], reg, chan_0, TGSI_EXEC_DATA_UINT); 3672 fetch_source_d(mach, &src[1], reg, chan_1, TGSI_EXEC_DATA_UINT); 3673 3674 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 3675 chan->u[i][0] = src[0].u[i]; 3676 chan->u[i][1] = src[1].u[i]; 3677 } 3678 if (reg->Register.Absolute) { 3679 micro_dabs(chan, chan); 3680 } 3681 if (reg->Register.Negate) { 3682 micro_dneg(chan, chan); 3683 } 3684 } 3685 3686 static void 3687 store_double_channel(struct tgsi_exec_machine *mach, 3688 const union tgsi_double_channel *chan, 3689 const struct tgsi_full_dst_register *reg, 3690 const struct tgsi_full_instruction *inst, 3691 uint chan_0, 3692 uint chan_1) 3693 { 3694 union tgsi_exec_channel dst[2]; 3695 uint i; 3696 union tgsi_double_channel temp; 3697 const uint execmask = mach->ExecMask; 3698 3699 if (!inst->Instruction.Saturate) { 3700 for (i = 0; i < TGSI_QUAD_SIZE; i++) 3701 if (execmask & (1 << i)) { 3702 dst[0].u[i] = chan->u[i][0]; 3703 dst[1].u[i] = chan->u[i][1]; 3704 } 3705 } 3706 else { 3707 for (i = 0; i < TGSI_QUAD_SIZE; i++) 3708 if (execmask & (1 << i)) { 3709 if (chan->d[i] < 0.0) 3710 temp.d[i] = 0.0; 3711 else if (chan->d[i] > 1.0) 3712 temp.d[i] = 1.0; 3713 else 3714 temp.d[i] = chan->d[i]; 3715 3716 dst[0].u[i] = temp.u[i][0]; 3717 dst[1].u[i] = temp.u[i][1]; 3718 } 3719 } 3720 3721 store_dest_double(mach, &dst[0], reg, inst, chan_0, TGSI_EXEC_DATA_UINT); 3722 if (chan_1 != -1) 3723 store_dest_double(mach, &dst[1], reg, inst, chan_1, TGSI_EXEC_DATA_UINT); 3724 } 3725 3726 static void 3727 exec_double_unary(struct tgsi_exec_machine *mach, 3728 const struct tgsi_full_instruction *inst, 3729 micro_dop op) 3730 { 3731 union tgsi_double_channel src; 3732 union tgsi_double_channel dst; 3733 3734 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 3735 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3736 op(&dst, &src); 3737 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3738 } 3739 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 3740 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3741 op(&dst, &src); 3742 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3743 } 3744 } 3745 3746 static void 3747 exec_double_binary(struct tgsi_exec_machine *mach, 3748 const struct tgsi_full_instruction *inst, 3749 micro_dop op, 3750 enum tgsi_exec_datatype dst_datatype) 3751 { 3752 union tgsi_double_channel src[2]; 3753 union tgsi_double_channel dst; 3754 int first_dest_chan, second_dest_chan; 3755 int wmask; 3756 3757 wmask = inst->Dst[0].Register.WriteMask; 3758 /* these are & because of the way DSLT etc store their destinations */ 3759 if (wmask & TGSI_WRITEMASK_XY) { 3760 first_dest_chan = TGSI_CHAN_X; 3761 second_dest_chan = TGSI_CHAN_Y; 3762 if (dst_datatype == TGSI_EXEC_DATA_UINT) { 3763 first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y; 3764 second_dest_chan = -1; 3765 } 3766 3767 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3768 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y); 3769 op(&dst, src); 3770 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan); 3771 } 3772 3773 if (wmask & TGSI_WRITEMASK_ZW) { 3774 first_dest_chan = TGSI_CHAN_Z; 3775 second_dest_chan = TGSI_CHAN_W; 3776 if (dst_datatype == TGSI_EXEC_DATA_UINT) { 3777 first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W; 3778 second_dest_chan = -1; 3779 } 3780 3781 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3782 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W); 3783 op(&dst, src); 3784 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan); 3785 } 3786 } 3787 3788 static void 3789 exec_double_trinary(struct tgsi_exec_machine *mach, 3790 const struct tgsi_full_instruction *inst, 3791 micro_dop op) 3792 { 3793 union tgsi_double_channel src[3]; 3794 union tgsi_double_channel dst; 3795 3796 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 3797 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3798 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y); 3799 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y); 3800 op(&dst, src); 3801 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3802 } 3803 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 3804 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3805 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W); 3806 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W); 3807 op(&dst, src); 3808 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3809 } 3810 } 3811 3812 static void 3813 exec_dldexp(struct tgsi_exec_machine *mach, 3814 const struct tgsi_full_instruction *inst) 3815 { 3816 union tgsi_double_channel src0; 3817 union tgsi_exec_channel src1; 3818 union tgsi_double_channel dst; 3819 int wmask; 3820 3821 wmask = inst->Dst[0].Register.WriteMask; 3822 if (wmask & TGSI_WRITEMASK_XY) { 3823 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3824 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 3825 micro_dldexp(&dst, &src0, &src1); 3826 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3827 } 3828 3829 if (wmask & TGSI_WRITEMASK_ZW) { 3830 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3831 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); 3832 micro_dldexp(&dst, &src0, &src1); 3833 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3834 } 3835 } 3836 3837 static void 3838 exec_dfracexp(struct tgsi_exec_machine *mach, 3839 const struct tgsi_full_instruction *inst) 3840 { 3841 union tgsi_double_channel src; 3842 union tgsi_double_channel dst; 3843 union tgsi_exec_channel dst_exp; 3844 3845 if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)) { 3846 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3847 micro_dfracexp(&dst, &dst_exp, &src); 3848 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3849 store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT); 3850 } 3851 if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) { 3852 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3853 micro_dfracexp(&dst, &dst_exp, &src); 3854 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3855 store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT); 3856 } 3857 } 3858 3859 static void 3860 exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach, 3861 const struct tgsi_full_instruction *inst, 3862 micro_dop_sop op) 3863 { 3864 union tgsi_double_channel src0; 3865 union tgsi_exec_channel src1; 3866 union tgsi_double_channel dst; 3867 int wmask; 3868 3869 wmask = inst->Dst[0].Register.WriteMask; 3870 if (wmask & TGSI_WRITEMASK_XY) { 3871 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3872 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 3873 op(&dst, &src0, &src1); 3874 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3875 } 3876 3877 if (wmask & TGSI_WRITEMASK_ZW) { 3878 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3879 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); 3880 op(&dst, &src0, &src1); 3881 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3882 } 3883 } 3884 3885 static int 3886 get_image_coord_dim(unsigned tgsi_tex) 3887 { 3888 int dim; 3889 switch (tgsi_tex) { 3890 case TGSI_TEXTURE_BUFFER: 3891 case TGSI_TEXTURE_1D: 3892 dim = 1; 3893 break; 3894 case TGSI_TEXTURE_2D: 3895 case TGSI_TEXTURE_RECT: 3896 case TGSI_TEXTURE_1D_ARRAY: 3897 case TGSI_TEXTURE_2D_MSAA: 3898 dim = 2; 3899 break; 3900 case TGSI_TEXTURE_3D: 3901 case TGSI_TEXTURE_CUBE: 3902 case TGSI_TEXTURE_2D_ARRAY: 3903 case TGSI_TEXTURE_2D_ARRAY_MSAA: 3904 case TGSI_TEXTURE_CUBE_ARRAY: 3905 dim = 3; 3906 break; 3907 default: 3908 assert(!"unknown texture target"); 3909 dim = 0; 3910 break; 3911 } 3912 3913 return dim; 3914 } 3915 3916 static int 3917 get_image_coord_sample(unsigned tgsi_tex) 3918 { 3919 int sample = 0; 3920 switch (tgsi_tex) { 3921 case TGSI_TEXTURE_2D_MSAA: 3922 sample = 3; 3923 break; 3924 case TGSI_TEXTURE_2D_ARRAY_MSAA: 3925 sample = 4; 3926 break; 3927 default: 3928 break; 3929 } 3930 return sample; 3931 } 3932 3933 static void 3934 exec_load_img(struct tgsi_exec_machine *mach, 3935 const struct tgsi_full_instruction *inst) 3936 { 3937 union tgsi_exec_channel r[4], sample_r; 3938 uint unit; 3939 int sample; 3940 int i, j; 3941 int dim; 3942 uint chan; 3943 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3944 struct tgsi_image_params params; 3945 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3946 3947 unit = fetch_sampler_unit(mach, inst, 0); 3948 dim = get_image_coord_dim(inst->Memory.Texture); 3949 sample = get_image_coord_sample(inst->Memory.Texture); 3950 assert(dim <= 3); 3951 3952 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 3953 params.unit = unit; 3954 params.tgsi_tex_instr = inst->Memory.Texture; 3955 params.format = inst->Memory.Format; 3956 3957 for (i = 0; i < dim; i++) { 3958 IFETCH(&r[i], 1, TGSI_CHAN_X + i); 3959 } 3960 3961 if (sample) 3962 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); 3963 3964 mach->Image->load(mach->Image, ¶ms, 3965 r[0].i, r[1].i, r[2].i, sample_r.i, 3966 rgba); 3967 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3968 r[0].f[j] = rgba[0][j]; 3969 r[1].f[j] = rgba[1][j]; 3970 r[2].f[j] = rgba[2][j]; 3971 r[3].f[j] = rgba[3][j]; 3972 } 3973 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3974 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3975 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3976 } 3977 } 3978 } 3979 3980 static void 3981 exec_load_buf(struct tgsi_exec_machine *mach, 3982 const struct tgsi_full_instruction *inst) 3983 { 3984 union tgsi_exec_channel r[4]; 3985 uint unit; 3986 int j; 3987 uint chan; 3988 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3989 struct tgsi_buffer_params params; 3990 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3991 3992 unit = fetch_sampler_unit(mach, inst, 0); 3993 3994 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 3995 params.unit = unit; 3996 IFETCH(&r[0], 1, TGSI_CHAN_X); 3997 3998 mach->Buffer->load(mach->Buffer, ¶ms, 3999 r[0].i, rgba); 4000 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4001 r[0].f[j] = rgba[0][j]; 4002 r[1].f[j] = rgba[1][j]; 4003 r[2].f[j] = rgba[2][j]; 4004 r[3].f[j] = rgba[3][j]; 4005 } 4006 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4007 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4008 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 4009 } 4010 } 4011 } 4012 4013 static void 4014 exec_load_mem(struct tgsi_exec_machine *mach, 4015 const struct tgsi_full_instruction *inst) 4016 { 4017 union tgsi_exec_channel r[4]; 4018 uint chan; 4019 char *ptr = mach->LocalMem; 4020 uint32_t offset; 4021 int j; 4022 4023 IFETCH(&r[0], 1, TGSI_CHAN_X); 4024 if (r[0].u[0] >= mach->LocalMemSize) 4025 return; 4026 4027 offset = r[0].u[0]; 4028 ptr += offset; 4029 4030 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4031 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4032 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4033 memcpy(&r[chan].u[j], ptr + (4 * chan), 4); 4034 } 4035 } 4036 } 4037 4038 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4039 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4040 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 4041 } 4042 } 4043 } 4044 4045 static void 4046 exec_load(struct tgsi_exec_machine *mach, 4047 const struct tgsi_full_instruction *inst) 4048 { 4049 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 4050 exec_load_img(mach, inst); 4051 else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) 4052 exec_load_buf(mach, inst); 4053 else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) 4054 exec_load_mem(mach, inst); 4055 } 4056 4057 static void 4058 exec_store_img(struct tgsi_exec_machine *mach, 4059 const struct tgsi_full_instruction *inst) 4060 { 4061 union tgsi_exec_channel r[3], sample_r; 4062 union tgsi_exec_channel value[4]; 4063 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4064 struct tgsi_image_params params; 4065 int dim; 4066 int sample; 4067 int i, j; 4068 uint unit; 4069 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4070 unit = inst->Dst[0].Register.Index; 4071 dim = get_image_coord_dim(inst->Memory.Texture); 4072 sample = get_image_coord_sample(inst->Memory.Texture); 4073 assert(dim <= 3); 4074 4075 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4076 params.unit = unit; 4077 params.tgsi_tex_instr = inst->Memory.Texture; 4078 params.format = inst->Memory.Format; 4079 4080 for (i = 0; i < dim; i++) { 4081 IFETCH(&r[i], 0, TGSI_CHAN_X + i); 4082 } 4083 4084 for (i = 0; i < 4; i++) { 4085 FETCH(&value[i], 1, TGSI_CHAN_X + i); 4086 } 4087 if (sample) 4088 IFETCH(&sample_r, 0, TGSI_CHAN_X + sample); 4089 4090 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4091 rgba[0][j] = value[0].f[j]; 4092 rgba[1][j] = value[1].f[j]; 4093 rgba[2][j] = value[2].f[j]; 4094 rgba[3][j] = value[3].f[j]; 4095 } 4096 4097 mach->Image->store(mach->Image, ¶ms, 4098 r[0].i, r[1].i, r[2].i, sample_r.i, 4099 rgba); 4100 } 4101 4102 static void 4103 exec_store_buf(struct tgsi_exec_machine *mach, 4104 const struct tgsi_full_instruction *inst) 4105 { 4106 union tgsi_exec_channel r[3]; 4107 union tgsi_exec_channel value[4]; 4108 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4109 struct tgsi_buffer_params params; 4110 int i, j; 4111 uint unit; 4112 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4113 4114 unit = inst->Dst[0].Register.Index; 4115 4116 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4117 params.unit = unit; 4118 params.writemask = inst->Dst[0].Register.WriteMask; 4119 4120 IFETCH(&r[0], 0, TGSI_CHAN_X); 4121 for (i = 0; i < 4; i++) { 4122 FETCH(&value[i], 1, TGSI_CHAN_X + i); 4123 } 4124 4125 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4126 rgba[0][j] = value[0].f[j]; 4127 rgba[1][j] = value[1].f[j]; 4128 rgba[2][j] = value[2].f[j]; 4129 rgba[3][j] = value[3].f[j]; 4130 } 4131 4132 mach->Buffer->store(mach->Buffer, ¶ms, 4133 r[0].i, 4134 rgba); 4135 } 4136 4137 static void 4138 exec_store_mem(struct tgsi_exec_machine *mach, 4139 const struct tgsi_full_instruction *inst) 4140 { 4141 union tgsi_exec_channel r[3]; 4142 union tgsi_exec_channel value[4]; 4143 uint i, chan; 4144 char *ptr = mach->LocalMem; 4145 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4146 int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4147 4148 IFETCH(&r[0], 0, TGSI_CHAN_X); 4149 4150 for (i = 0; i < 4; i++) { 4151 FETCH(&value[i], 1, TGSI_CHAN_X + i); 4152 } 4153 4154 if (r[0].u[0] >= mach->LocalMemSize) 4155 return; 4156 ptr += r[0].u[0]; 4157 4158 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 4159 if (execmask & (1 << i)) { 4160 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4161 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4162 memcpy(ptr + (chan * 4), &value[chan].u[0], 4); 4163 } 4164 } 4165 } 4166 } 4167 } 4168 4169 static void 4170 exec_store(struct tgsi_exec_machine *mach, 4171 const struct tgsi_full_instruction *inst) 4172 { 4173 if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) 4174 exec_store_img(mach, inst); 4175 else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) 4176 exec_store_buf(mach, inst); 4177 else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) 4178 exec_store_mem(mach, inst); 4179 } 4180 4181 static void 4182 exec_atomop_img(struct tgsi_exec_machine *mach, 4183 const struct tgsi_full_instruction *inst) 4184 { 4185 union tgsi_exec_channel r[4], sample_r; 4186 union tgsi_exec_channel value[4], value2[4]; 4187 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4188 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4189 struct tgsi_image_params params; 4190 int dim; 4191 int sample; 4192 int i, j; 4193 uint unit, chan; 4194 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4195 unit = fetch_sampler_unit(mach, inst, 0); 4196 dim = get_image_coord_dim(inst->Memory.Texture); 4197 sample = get_image_coord_sample(inst->Memory.Texture); 4198 assert(dim <= 3); 4199 4200 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4201 params.unit = unit; 4202 params.tgsi_tex_instr = inst->Memory.Texture; 4203 params.format = inst->Memory.Format; 4204 4205 for (i = 0; i < dim; i++) { 4206 IFETCH(&r[i], 1, TGSI_CHAN_X + i); 4207 } 4208 4209 for (i = 0; i < 4; i++) { 4210 FETCH(&value[i], 2, TGSI_CHAN_X + i); 4211 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 4212 FETCH(&value2[i], 3, TGSI_CHAN_X + i); 4213 } 4214 if (sample) 4215 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); 4216 4217 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4218 rgba[0][j] = value[0].f[j]; 4219 rgba[1][j] = value[1].f[j]; 4220 rgba[2][j] = value[2].f[j]; 4221 rgba[3][j] = value[3].f[j]; 4222 } 4223 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { 4224 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4225 rgba2[0][j] = value2[0].f[j]; 4226 rgba2[1][j] = value2[1].f[j]; 4227 rgba2[2][j] = value2[2].f[j]; 4228 rgba2[3][j] = value2[3].f[j]; 4229 } 4230 } 4231 4232 mach->Image->op(mach->Image, ¶ms, inst->Instruction.Opcode, 4233 r[0].i, r[1].i, r[2].i, sample_r.i, 4234 rgba, rgba2); 4235 4236 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4237 r[0].f[j] = rgba[0][j]; 4238 r[1].f[j] = rgba[1][j]; 4239 r[2].f[j] = rgba[2][j]; 4240 r[3].f[j] = rgba[3][j]; 4241 } 4242 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4243 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4244 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 4245 } 4246 } 4247 } 4248 4249 static void 4250 exec_atomop_buf(struct tgsi_exec_machine *mach, 4251 const struct tgsi_full_instruction *inst) 4252 { 4253 union tgsi_exec_channel r[4]; 4254 union tgsi_exec_channel value[4], value2[4]; 4255 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4256 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4257 struct tgsi_buffer_params params; 4258 int i, j; 4259 uint unit, chan; 4260 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4261 4262 unit = fetch_sampler_unit(mach, inst, 0); 4263 4264 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4265 params.unit = unit; 4266 params.writemask = inst->Dst[0].Register.WriteMask; 4267 4268 IFETCH(&r[0], 1, TGSI_CHAN_X); 4269 4270 for (i = 0; i < 4; i++) { 4271 FETCH(&value[i], 2, TGSI_CHAN_X + i); 4272 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 4273 FETCH(&value2[i], 3, TGSI_CHAN_X + i); 4274 } 4275 4276 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4277 rgba[0][j] = value[0].f[j]; 4278 rgba[1][j] = value[1].f[j]; 4279 rgba[2][j] = value[2].f[j]; 4280 rgba[3][j] = value[3].f[j]; 4281 } 4282 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { 4283 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4284 rgba2[0][j] = value2[0].f[j]; 4285 rgba2[1][j] = value2[1].f[j]; 4286 rgba2[2][j] = value2[2].f[j]; 4287 rgba2[3][j] = value2[3].f[j]; 4288 } 4289 } 4290 4291 mach->Buffer->op(mach->Buffer, ¶ms, inst->Instruction.Opcode, 4292 r[0].i, 4293 rgba, rgba2); 4294 4295 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4296 r[0].f[j] = rgba[0][j]; 4297 r[1].f[j] = rgba[1][j]; 4298 r[2].f[j] = rgba[2][j]; 4299 r[3].f[j] = rgba[3][j]; 4300 } 4301 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4302 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4303 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 4304 } 4305 } 4306 } 4307 4308 static void 4309 exec_atomop_mem(struct tgsi_exec_machine *mach, 4310 const struct tgsi_full_instruction *inst) 4311 { 4312 union tgsi_exec_channel r[4]; 4313 union tgsi_exec_channel value[4], value2[4]; 4314 char *ptr = mach->LocalMem; 4315 uint32_t val; 4316 uint chan, i; 4317 uint32_t offset; 4318 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4319 int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4320 IFETCH(&r[0], 1, TGSI_CHAN_X); 4321 4322 if (r[0].u[0] >= mach->LocalMemSize) 4323 return; 4324 4325 offset = r[0].u[0]; 4326 ptr += offset; 4327 for (i = 0; i < 4; i++) { 4328 FETCH(&value[i], 2, TGSI_CHAN_X + i); 4329 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 4330 FETCH(&value2[i], 3, TGSI_CHAN_X + i); 4331 } 4332 4333 memcpy(&r[0].u[0], ptr, 4); 4334 val = r[0].u[0]; 4335 switch (inst->Instruction.Opcode) { 4336 case TGSI_OPCODE_ATOMUADD: 4337 val += value[0].u[0]; 4338 break; 4339 case TGSI_OPCODE_ATOMXOR: 4340 val ^= value[0].u[0]; 4341 break; 4342 case TGSI_OPCODE_ATOMOR: 4343 val |= value[0].u[0]; 4344 break; 4345 case TGSI_OPCODE_ATOMAND: 4346 val &= value[0].u[0]; 4347 break; 4348 case TGSI_OPCODE_ATOMUMIN: 4349 val = MIN2(val, value[0].u[0]); 4350 break; 4351 case TGSI_OPCODE_ATOMUMAX: 4352 val = MAX2(val, value[0].u[0]); 4353 break; 4354 case TGSI_OPCODE_ATOMIMIN: 4355 val = MIN2(r[0].i[0], value[0].i[0]); 4356 break; 4357 case TGSI_OPCODE_ATOMIMAX: 4358 val = MAX2(r[0].i[0], value[0].i[0]); 4359 break; 4360 case TGSI_OPCODE_ATOMXCHG: 4361 val = value[0].i[0]; 4362 break; 4363 case TGSI_OPCODE_ATOMCAS: 4364 if (val == value[0].u[0]) 4365 val = value2[0].u[0]; 4366 break; 4367 default: 4368 break; 4369 } 4370 for (i = 0; i < TGSI_QUAD_SIZE; i++) 4371 if (execmask & (1 << i)) 4372 memcpy(ptr, &val, 4); 4373 4374 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4375 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4376 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 4377 } 4378 } 4379 } 4380 4381 static void 4382 exec_atomop(struct tgsi_exec_machine *mach, 4383 const struct tgsi_full_instruction *inst) 4384 { 4385 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 4386 exec_atomop_img(mach, inst); 4387 else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) 4388 exec_atomop_buf(mach, inst); 4389 else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) 4390 exec_atomop_mem(mach, inst); 4391 } 4392 4393 static void 4394 exec_resq_img(struct tgsi_exec_machine *mach, 4395 const struct tgsi_full_instruction *inst) 4396 { 4397 int result[4]; 4398 union tgsi_exec_channel r[4]; 4399 uint unit; 4400 int i, chan, j; 4401 struct tgsi_image_params params; 4402 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4403 4404 unit = fetch_sampler_unit(mach, inst, 0); 4405 4406 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4407 params.unit = unit; 4408 params.tgsi_tex_instr = inst->Memory.Texture; 4409 params.format = inst->Memory.Format; 4410 4411 mach->Image->get_dims(mach->Image, ¶ms, result); 4412 4413 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 4414 for (j = 0; j < 4; j++) { 4415 r[j].i[i] = result[j]; 4416 } 4417 } 4418 4419 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4420 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4421 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, 4422 TGSI_EXEC_DATA_INT); 4423 } 4424 } 4425 } 4426 4427 static void 4428 exec_resq_buf(struct tgsi_exec_machine *mach, 4429 const struct tgsi_full_instruction *inst) 4430 { 4431 int result; 4432 union tgsi_exec_channel r[4]; 4433 uint unit; 4434 int i, chan; 4435 struct tgsi_buffer_params params; 4436 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4437 4438 unit = fetch_sampler_unit(mach, inst, 0); 4439 4440 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4441 params.unit = unit; 4442 4443 mach->Buffer->get_dims(mach->Buffer, ¶ms, &result); 4444 4445 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 4446 r[0].i[i] = result; 4447 } 4448 4449 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4450 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4451 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, 4452 TGSI_EXEC_DATA_INT); 4453 } 4454 } 4455 } 4456 4457 static void 4458 exec_resq(struct tgsi_exec_machine *mach, 4459 const struct tgsi_full_instruction *inst) 4460 { 4461 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 4462 exec_resq_img(mach, inst); 4463 else 4464 exec_resq_buf(mach, inst); 4465 } 4466 4467 static void 4468 micro_f2u64(union tgsi_double_channel *dst, 4469 const union tgsi_exec_channel *src) 4470 { 4471 dst->u64[0] = (uint64_t)src->f[0]; 4472 dst->u64[1] = (uint64_t)src->f[1]; 4473 dst->u64[2] = (uint64_t)src->f[2]; 4474 dst->u64[3] = (uint64_t)src->f[3]; 4475 } 4476 4477 static void 4478 micro_f2i64(union tgsi_double_channel *dst, 4479 const union tgsi_exec_channel *src) 4480 { 4481 dst->i64[0] = (int64_t)src->f[0]; 4482 dst->i64[1] = (int64_t)src->f[1]; 4483 dst->i64[2] = (int64_t)src->f[2]; 4484 dst->i64[3] = (int64_t)src->f[3]; 4485 } 4486 4487 static void 4488 micro_u2i64(union tgsi_double_channel *dst, 4489 const union tgsi_exec_channel *src) 4490 { 4491 dst->u64[0] = (uint64_t)src->u[0]; 4492 dst->u64[1] = (uint64_t)src->u[1]; 4493 dst->u64[2] = (uint64_t)src->u[2]; 4494 dst->u64[3] = (uint64_t)src->u[3]; 4495 } 4496 4497 static void 4498 micro_i2i64(union tgsi_double_channel *dst, 4499 const union tgsi_exec_channel *src) 4500 { 4501 dst->i64[0] = (int64_t)src->i[0]; 4502 dst->i64[1] = (int64_t)src->i[1]; 4503 dst->i64[2] = (int64_t)src->i[2]; 4504 dst->i64[3] = (int64_t)src->i[3]; 4505 } 4506 4507 static void 4508 micro_d2u64(union tgsi_double_channel *dst, 4509 const union tgsi_double_channel *src) 4510 { 4511 dst->u64[0] = (uint64_t)src->d[0]; 4512 dst->u64[1] = (uint64_t)src->d[1]; 4513 dst->u64[2] = (uint64_t)src->d[2]; 4514 dst->u64[3] = (uint64_t)src->d[3]; 4515 } 4516 4517 static void 4518 micro_d2i64(union tgsi_double_channel *dst, 4519 const union tgsi_double_channel *src) 4520 { 4521 dst->i64[0] = (int64_t)src->d[0]; 4522 dst->i64[1] = (int64_t)src->d[1]; 4523 dst->i64[2] = (int64_t)src->d[2]; 4524 dst->i64[3] = (int64_t)src->d[3]; 4525 } 4526 4527 static void 4528 micro_u642d(union tgsi_double_channel *dst, 4529 const union tgsi_double_channel *src) 4530 { 4531 dst->d[0] = (double)src->u64[0]; 4532 dst->d[1] = (double)src->u64[1]; 4533 dst->d[2] = (double)src->u64[2]; 4534 dst->d[3] = (double)src->u64[3]; 4535 } 4536 4537 static void 4538 micro_i642d(union tgsi_double_channel *dst, 4539 const union tgsi_double_channel *src) 4540 { 4541 dst->d[0] = (double)src->i64[0]; 4542 dst->d[1] = (double)src->i64[1]; 4543 dst->d[2] = (double)src->i64[2]; 4544 dst->d[3] = (double)src->i64[3]; 4545 } 4546 4547 static void 4548 micro_u642f(union tgsi_exec_channel *dst, 4549 const union tgsi_double_channel *src) 4550 { 4551 dst->f[0] = (float)src->u64[0]; 4552 dst->f[1] = (float)src->u64[1]; 4553 dst->f[2] = (float)src->u64[2]; 4554 dst->f[3] = (float)src->u64[3]; 4555 } 4556 4557 static void 4558 micro_i642f(union tgsi_exec_channel *dst, 4559 const union tgsi_double_channel *src) 4560 { 4561 dst->f[0] = (float)src->i64[0]; 4562 dst->f[1] = (float)src->i64[1]; 4563 dst->f[2] = (float)src->i64[2]; 4564 dst->f[3] = (float)src->i64[3]; 4565 } 4566 4567 static void 4568 exec_t_2_64(struct tgsi_exec_machine *mach, 4569 const struct tgsi_full_instruction *inst, 4570 micro_dop_s op, 4571 enum tgsi_exec_datatype src_datatype) 4572 { 4573 union tgsi_exec_channel src; 4574 union tgsi_double_channel dst; 4575 4576 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 4577 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); 4578 op(&dst, &src); 4579 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 4580 } 4581 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 4582 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype); 4583 op(&dst, &src); 4584 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 4585 } 4586 } 4587 4588 static void 4589 exec_64_2_t(struct tgsi_exec_machine *mach, 4590 const struct tgsi_full_instruction *inst, 4591 micro_sop_d op, 4592 enum tgsi_exec_datatype dst_datatype) 4593 { 4594 union tgsi_double_channel src; 4595 union tgsi_exec_channel dst; 4596 int wm = inst->Dst[0].Register.WriteMask; 4597 int i; 4598 int bit; 4599 for (i = 0; i < 2; i++) { 4600 bit = ffs(wm); 4601 if (bit) { 4602 wm &= ~(1 << (bit - 1)); 4603 if (i == 0) 4604 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 4605 else 4606 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 4607 op(&dst, &src); 4608 store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, dst_datatype); 4609 } 4610 } 4611 } 4612 4613 static void 4614 micro_i2f(union tgsi_exec_channel *dst, 4615 const union tgsi_exec_channel *src) 4616 { 4617 dst->f[0] = (float)src->i[0]; 4618 dst->f[1] = (float)src->i[1]; 4619 dst->f[2] = (float)src->i[2]; 4620 dst->f[3] = (float)src->i[3]; 4621 } 4622 4623 static void 4624 micro_not(union tgsi_exec_channel *dst, 4625 const union tgsi_exec_channel *src) 4626 { 4627 dst->u[0] = ~src->u[0]; 4628 dst->u[1] = ~src->u[1]; 4629 dst->u[2] = ~src->u[2]; 4630 dst->u[3] = ~src->u[3]; 4631 } 4632 4633 static void 4634 micro_shl(union tgsi_exec_channel *dst, 4635 const union tgsi_exec_channel *src0, 4636 const union tgsi_exec_channel *src1) 4637 { 4638 unsigned masked_count; 4639 masked_count = src1->u[0] & 0x1f; 4640 dst->u[0] = src0->u[0] << masked_count; 4641 masked_count = src1->u[1] & 0x1f; 4642 dst->u[1] = src0->u[1] << masked_count; 4643 masked_count = src1->u[2] & 0x1f; 4644 dst->u[2] = src0->u[2] << masked_count; 4645 masked_count = src1->u[3] & 0x1f; 4646 dst->u[3] = src0->u[3] << masked_count; 4647 } 4648 4649 static void 4650 micro_and(union tgsi_exec_channel *dst, 4651 const union tgsi_exec_channel *src0, 4652 const union tgsi_exec_channel *src1) 4653 { 4654 dst->u[0] = src0->u[0] & src1->u[0]; 4655 dst->u[1] = src0->u[1] & src1->u[1]; 4656 dst->u[2] = src0->u[2] & src1->u[2]; 4657 dst->u[3] = src0->u[3] & src1->u[3]; 4658 } 4659 4660 static void 4661 micro_or(union tgsi_exec_channel *dst, 4662 const union tgsi_exec_channel *src0, 4663 const union tgsi_exec_channel *src1) 4664 { 4665 dst->u[0] = src0->u[0] | src1->u[0]; 4666 dst->u[1] = src0->u[1] | src1->u[1]; 4667 dst->u[2] = src0->u[2] | src1->u[2]; 4668 dst->u[3] = src0->u[3] | src1->u[3]; 4669 } 4670 4671 static void 4672 micro_xor(union tgsi_exec_channel *dst, 4673 const union tgsi_exec_channel *src0, 4674 const union tgsi_exec_channel *src1) 4675 { 4676 dst->u[0] = src0->u[0] ^ src1->u[0]; 4677 dst->u[1] = src0->u[1] ^ src1->u[1]; 4678 dst->u[2] = src0->u[2] ^ src1->u[2]; 4679 dst->u[3] = src0->u[3] ^ src1->u[3]; 4680 } 4681 4682 static void 4683 micro_mod(union tgsi_exec_channel *dst, 4684 const union tgsi_exec_channel *src0, 4685 const union tgsi_exec_channel *src1) 4686 { 4687 dst->i[0] = src0->i[0] % src1->i[0]; 4688 dst->i[1] = src0->i[1] % src1->i[1]; 4689 dst->i[2] = src0->i[2] % src1->i[2]; 4690 dst->i[3] = src0->i[3] % src1->i[3]; 4691 } 4692 4693 static void 4694 micro_f2i(union tgsi_exec_channel *dst, 4695 const union tgsi_exec_channel *src) 4696 { 4697 dst->i[0] = (int)src->f[0]; 4698 dst->i[1] = (int)src->f[1]; 4699 dst->i[2] = (int)src->f[2]; 4700 dst->i[3] = (int)src->f[3]; 4701 } 4702 4703 static void 4704 micro_fseq(union tgsi_exec_channel *dst, 4705 const union tgsi_exec_channel *src0, 4706 const union tgsi_exec_channel *src1) 4707 { 4708 dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0; 4709 dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0; 4710 dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0; 4711 dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0; 4712 } 4713 4714 static void 4715 micro_fsge(union tgsi_exec_channel *dst, 4716 const union tgsi_exec_channel *src0, 4717 const union tgsi_exec_channel *src1) 4718 { 4719 dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0; 4720 dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0; 4721 dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0; 4722 dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0; 4723 } 4724 4725 static void 4726 micro_fslt(union tgsi_exec_channel *dst, 4727 const union tgsi_exec_channel *src0, 4728 const union tgsi_exec_channel *src1) 4729 { 4730 dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0; 4731 dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0; 4732 dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0; 4733 dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0; 4734 } 4735 4736 static void 4737 micro_fsne(union tgsi_exec_channel *dst, 4738 const union tgsi_exec_channel *src0, 4739 const union tgsi_exec_channel *src1) 4740 { 4741 dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0; 4742 dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0; 4743 dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0; 4744 dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0; 4745 } 4746 4747 static void 4748 micro_idiv(union tgsi_exec_channel *dst, 4749 const union tgsi_exec_channel *src0, 4750 const union tgsi_exec_channel *src1) 4751 { 4752 dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0; 4753 dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0; 4754 dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0; 4755 dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0; 4756 } 4757 4758 static void 4759 micro_imax(union tgsi_exec_channel *dst, 4760 const union tgsi_exec_channel *src0, 4761 const union tgsi_exec_channel *src1) 4762 { 4763 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 4764 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 4765 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 4766 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 4767 } 4768 4769 static void 4770 micro_imin(union tgsi_exec_channel *dst, 4771 const union tgsi_exec_channel *src0, 4772 const union tgsi_exec_channel *src1) 4773 { 4774 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 4775 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 4776 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 4777 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 4778 } 4779 4780 static void 4781 micro_isge(union tgsi_exec_channel *dst, 4782 const union tgsi_exec_channel *src0, 4783 const union tgsi_exec_channel *src1) 4784 { 4785 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0; 4786 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0; 4787 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0; 4788 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0; 4789 } 4790 4791 static void 4792 micro_ishr(union tgsi_exec_channel *dst, 4793 const union tgsi_exec_channel *src0, 4794 const union tgsi_exec_channel *src1) 4795 { 4796 unsigned masked_count; 4797 masked_count = src1->i[0] & 0x1f; 4798 dst->i[0] = src0->i[0] >> masked_count; 4799 masked_count = src1->i[1] & 0x1f; 4800 dst->i[1] = src0->i[1] >> masked_count; 4801 masked_count = src1->i[2] & 0x1f; 4802 dst->i[2] = src0->i[2] >> masked_count; 4803 masked_count = src1->i[3] & 0x1f; 4804 dst->i[3] = src0->i[3] >> masked_count; 4805 } 4806 4807 static void 4808 micro_islt(union tgsi_exec_channel *dst, 4809 const union tgsi_exec_channel *src0, 4810 const union tgsi_exec_channel *src1) 4811 { 4812 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0; 4813 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0; 4814 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0; 4815 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0; 4816 } 4817 4818 static void 4819 micro_f2u(union tgsi_exec_channel *dst, 4820 const union tgsi_exec_channel *src) 4821 { 4822 dst->u[0] = (uint)src->f[0]; 4823 dst->u[1] = (uint)src->f[1]; 4824 dst->u[2] = (uint)src->f[2]; 4825 dst->u[3] = (uint)src->f[3]; 4826 } 4827 4828 static void 4829 micro_u2f(union tgsi_exec_channel *dst, 4830 const union tgsi_exec_channel *src) 4831 { 4832 dst->f[0] = (float)src->u[0]; 4833 dst->f[1] = (float)src->u[1]; 4834 dst->f[2] = (float)src->u[2]; 4835 dst->f[3] = (float)src->u[3]; 4836 } 4837 4838 static void 4839 micro_uadd(union tgsi_exec_channel *dst, 4840 const union tgsi_exec_channel *src0, 4841 const union tgsi_exec_channel *src1) 4842 { 4843 dst->u[0] = src0->u[0] + src1->u[0]; 4844 dst->u[1] = src0->u[1] + src1->u[1]; 4845 dst->u[2] = src0->u[2] + src1->u[2]; 4846 dst->u[3] = src0->u[3] + src1->u[3]; 4847 } 4848 4849 static void 4850 micro_udiv(union tgsi_exec_channel *dst, 4851 const union tgsi_exec_channel *src0, 4852 const union tgsi_exec_channel *src1) 4853 { 4854 dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u; 4855 dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u; 4856 dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u; 4857 dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u; 4858 } 4859 4860 static void 4861 micro_umad(union tgsi_exec_channel *dst, 4862 const union tgsi_exec_channel *src0, 4863 const union tgsi_exec_channel *src1, 4864 const union tgsi_exec_channel *src2) 4865 { 4866 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0]; 4867 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1]; 4868 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2]; 4869 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3]; 4870 } 4871 4872 static void 4873 micro_umax(union tgsi_exec_channel *dst, 4874 const union tgsi_exec_channel *src0, 4875 const union tgsi_exec_channel *src1) 4876 { 4877 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 4878 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 4879 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 4880 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 4881 } 4882 4883 static void 4884 micro_umin(union tgsi_exec_channel *dst, 4885 const union tgsi_exec_channel *src0, 4886 const union tgsi_exec_channel *src1) 4887 { 4888 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 4889 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 4890 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 4891 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 4892 } 4893 4894 static void 4895 micro_umod(union tgsi_exec_channel *dst, 4896 const union tgsi_exec_channel *src0, 4897 const union tgsi_exec_channel *src1) 4898 { 4899 dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u; 4900 dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u; 4901 dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u; 4902 dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u; 4903 } 4904 4905 static void 4906 micro_umul(union tgsi_exec_channel *dst, 4907 const union tgsi_exec_channel *src0, 4908 const union tgsi_exec_channel *src1) 4909 { 4910 dst->u[0] = src0->u[0] * src1->u[0]; 4911 dst->u[1] = src0->u[1] * src1->u[1]; 4912 dst->u[2] = src0->u[2] * src1->u[2]; 4913 dst->u[3] = src0->u[3] * src1->u[3]; 4914 } 4915 4916 static void 4917 micro_imul_hi(union tgsi_exec_channel *dst, 4918 const union tgsi_exec_channel *src0, 4919 const union tgsi_exec_channel *src1) 4920 { 4921 #define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32) 4922 dst->i[0] = I64M(src0->i[0], src1->i[0]); 4923 dst->i[1] = I64M(src0->i[1], src1->i[1]); 4924 dst->i[2] = I64M(src0->i[2], src1->i[2]); 4925 dst->i[3] = I64M(src0->i[3], src1->i[3]); 4926 #undef I64M 4927 } 4928 4929 static void 4930 micro_umul_hi(union tgsi_exec_channel *dst, 4931 const union tgsi_exec_channel *src0, 4932 const union tgsi_exec_channel *src1) 4933 { 4934 #define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32) 4935 dst->u[0] = U64M(src0->u[0], src1->u[0]); 4936 dst->u[1] = U64M(src0->u[1], src1->u[1]); 4937 dst->u[2] = U64M(src0->u[2], src1->u[2]); 4938 dst->u[3] = U64M(src0->u[3], src1->u[3]); 4939 #undef U64M 4940 } 4941 4942 static void 4943 micro_useq(union tgsi_exec_channel *dst, 4944 const union tgsi_exec_channel *src0, 4945 const union tgsi_exec_channel *src1) 4946 { 4947 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0; 4948 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0; 4949 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0; 4950 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0; 4951 } 4952 4953 static void 4954 micro_usge(union tgsi_exec_channel *dst, 4955 const union tgsi_exec_channel *src0, 4956 const union tgsi_exec_channel *src1) 4957 { 4958 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0; 4959 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0; 4960 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0; 4961 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0; 4962 } 4963 4964 static void 4965 micro_ushr(union tgsi_exec_channel *dst, 4966 const union tgsi_exec_channel *src0, 4967 const union tgsi_exec_channel *src1) 4968 { 4969 unsigned masked_count; 4970 masked_count = src1->u[0] & 0x1f; 4971 dst->u[0] = src0->u[0] >> masked_count; 4972 masked_count = src1->u[1] & 0x1f; 4973 dst->u[1] = src0->u[1] >> masked_count; 4974 masked_count = src1->u[2] & 0x1f; 4975 dst->u[2] = src0->u[2] >> masked_count; 4976 masked_count = src1->u[3] & 0x1f; 4977 dst->u[3] = src0->u[3] >> masked_count; 4978 } 4979 4980 static void 4981 micro_uslt(union tgsi_exec_channel *dst, 4982 const union tgsi_exec_channel *src0, 4983 const union tgsi_exec_channel *src1) 4984 { 4985 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0; 4986 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0; 4987 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0; 4988 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0; 4989 } 4990 4991 static void 4992 micro_usne(union tgsi_exec_channel *dst, 4993 const union tgsi_exec_channel *src0, 4994 const union tgsi_exec_channel *src1) 4995 { 4996 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0; 4997 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0; 4998 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0; 4999 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0; 5000 } 5001 5002 static void 5003 micro_uarl(union tgsi_exec_channel *dst, 5004 const union tgsi_exec_channel *src) 5005 { 5006 dst->i[0] = src->u[0]; 5007 dst->i[1] = src->u[1]; 5008 dst->i[2] = src->u[2]; 5009 dst->i[3] = src->u[3]; 5010 } 5011 5012 static void 5013 micro_ucmp(union tgsi_exec_channel *dst, 5014 const union tgsi_exec_channel *src0, 5015 const union tgsi_exec_channel *src1, 5016 const union tgsi_exec_channel *src2) 5017 { 5018 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 5019 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 5020 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 5021 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 5022 } 5023 5024 /** 5025 * Signed bitfield extract (i.e. sign-extend the extracted bits) 5026 */ 5027 static void 5028 micro_ibfe(union tgsi_exec_channel *dst, 5029 const union tgsi_exec_channel *src0, 5030 const union tgsi_exec_channel *src1, 5031 const union tgsi_exec_channel *src2) 5032 { 5033 int i; 5034 for (i = 0; i < 4; i++) { 5035 int width = src2->i[i] & 0x1f; 5036 int offset = src1->i[i] & 0x1f; 5037 if (width == 0) 5038 dst->i[i] = 0; 5039 else if (width + offset < 32) 5040 dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width); 5041 else 5042 dst->i[i] = src0->i[i] >> offset; 5043 } 5044 } 5045 5046 /** 5047 * Unsigned bitfield extract 5048 */ 5049 static void 5050 micro_ubfe(union tgsi_exec_channel *dst, 5051 const union tgsi_exec_channel *src0, 5052 const union tgsi_exec_channel *src1, 5053 const union tgsi_exec_channel *src2) 5054 { 5055 int i; 5056 for (i = 0; i < 4; i++) { 5057 int width = src2->u[i] & 0x1f; 5058 int offset = src1->u[i] & 0x1f; 5059 if (width == 0) 5060 dst->u[i] = 0; 5061 else if (width + offset < 32) 5062 dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width); 5063 else 5064 dst->u[i] = src0->u[i] >> offset; 5065 } 5066 } 5067 5068 /** 5069 * Bitfield insert: copy low bits from src1 into a region of src0. 5070 */ 5071 static void 5072 micro_bfi(union tgsi_exec_channel *dst, 5073 const union tgsi_exec_channel *src0, 5074 const union tgsi_exec_channel *src1, 5075 const union tgsi_exec_channel *src2, 5076 const union tgsi_exec_channel *src3) 5077 { 5078 int i; 5079 for (i = 0; i < 4; i++) { 5080 int width = src3->u[i] & 0x1f; 5081 int offset = src2->u[i] & 0x1f; 5082 int bitmask = ((1 << width) - 1) << offset; 5083 dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask); 5084 } 5085 } 5086 5087 static void 5088 micro_brev(union tgsi_exec_channel *dst, 5089 const union tgsi_exec_channel *src) 5090 { 5091 dst->u[0] = util_bitreverse(src->u[0]); 5092 dst->u[1] = util_bitreverse(src->u[1]); 5093 dst->u[2] = util_bitreverse(src->u[2]); 5094 dst->u[3] = util_bitreverse(src->u[3]); 5095 } 5096 5097 static void 5098 micro_popc(union tgsi_exec_channel *dst, 5099 const union tgsi_exec_channel *src) 5100 { 5101 dst->u[0] = util_bitcount(src->u[0]); 5102 dst->u[1] = util_bitcount(src->u[1]); 5103 dst->u[2] = util_bitcount(src->u[2]); 5104 dst->u[3] = util_bitcount(src->u[3]); 5105 } 5106 5107 static void 5108 micro_lsb(union tgsi_exec_channel *dst, 5109 const union tgsi_exec_channel *src) 5110 { 5111 dst->i[0] = ffs(src->u[0]) - 1; 5112 dst->i[1] = ffs(src->u[1]) - 1; 5113 dst->i[2] = ffs(src->u[2]) - 1; 5114 dst->i[3] = ffs(src->u[3]) - 1; 5115 } 5116 5117 static void 5118 micro_imsb(union tgsi_exec_channel *dst, 5119 const union tgsi_exec_channel *src) 5120 { 5121 dst->i[0] = util_last_bit_signed(src->i[0]) - 1; 5122 dst->i[1] = util_last_bit_signed(src->i[1]) - 1; 5123 dst->i[2] = util_last_bit_signed(src->i[2]) - 1; 5124 dst->i[3] = util_last_bit_signed(src->i[3]) - 1; 5125 } 5126 5127 static void 5128 micro_umsb(union tgsi_exec_channel *dst, 5129 const union tgsi_exec_channel *src) 5130 { 5131 dst->i[0] = util_last_bit(src->u[0]) - 1; 5132 dst->i[1] = util_last_bit(src->u[1]) - 1; 5133 dst->i[2] = util_last_bit(src->u[2]) - 1; 5134 dst->i[3] = util_last_bit(src->u[3]) - 1; 5135 } 5136 5137 /** 5138 * Execute a TGSI instruction. 5139 * Returns TRUE if a barrier instruction is hit, 5140 * otherwise FALSE. 5141 */ 5142 static boolean 5143 exec_instruction( 5144 struct tgsi_exec_machine *mach, 5145 const struct tgsi_full_instruction *inst, 5146 int *pc ) 5147 { 5148 union tgsi_exec_channel r[10]; 5149 5150 (*pc)++; 5151 5152 switch (inst->Instruction.Opcode) { 5153 case TGSI_OPCODE_ARL: 5154 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 5155 break; 5156 5157 case TGSI_OPCODE_MOV: 5158 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5159 break; 5160 5161 case TGSI_OPCODE_LIT: 5162 exec_lit(mach, inst); 5163 break; 5164 5165 case TGSI_OPCODE_RCP: 5166 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5167 break; 5168 5169 case TGSI_OPCODE_RSQ: 5170 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5171 break; 5172 5173 case TGSI_OPCODE_EXP: 5174 exec_exp(mach, inst); 5175 break; 5176 5177 case TGSI_OPCODE_LOG: 5178 exec_log(mach, inst); 5179 break; 5180 5181 case TGSI_OPCODE_MUL: 5182 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5183 break; 5184 5185 case TGSI_OPCODE_ADD: 5186 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5187 break; 5188 5189 case TGSI_OPCODE_DP3: 5190 exec_dp3(mach, inst); 5191 break; 5192 5193 case TGSI_OPCODE_DP4: 5194 exec_dp4(mach, inst); 5195 break; 5196 5197 case TGSI_OPCODE_DST: 5198 exec_dst(mach, inst); 5199 break; 5200 5201 case TGSI_OPCODE_MIN: 5202 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5203 break; 5204 5205 case TGSI_OPCODE_MAX: 5206 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5207 break; 5208 5209 case TGSI_OPCODE_SLT: 5210 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5211 break; 5212 5213 case TGSI_OPCODE_SGE: 5214 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5215 break; 5216 5217 case TGSI_OPCODE_MAD: 5218 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5219 break; 5220 5221 case TGSI_OPCODE_LRP: 5222 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5223 break; 5224 5225 case TGSI_OPCODE_SQRT: 5226 exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5227 break; 5228 5229 case TGSI_OPCODE_DP2A: 5230 exec_dp2a(mach, inst); 5231 break; 5232 5233 case TGSI_OPCODE_FRC: 5234 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5235 break; 5236 5237 case TGSI_OPCODE_CLAMP: 5238 exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5239 break; 5240 5241 case TGSI_OPCODE_FLR: 5242 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5243 break; 5244 5245 case TGSI_OPCODE_ROUND: 5246 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5247 break; 5248 5249 case TGSI_OPCODE_EX2: 5250 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5251 break; 5252 5253 case TGSI_OPCODE_LG2: 5254 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5255 break; 5256 5257 case TGSI_OPCODE_POW: 5258 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5259 break; 5260 5261 case TGSI_OPCODE_XPD: 5262 exec_xpd(mach, inst); 5263 break; 5264 5265 case TGSI_OPCODE_DPH: 5266 exec_dph(mach, inst); 5267 break; 5268 5269 case TGSI_OPCODE_COS: 5270 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5271 break; 5272 5273 case TGSI_OPCODE_DDX: 5274 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5275 break; 5276 5277 case TGSI_OPCODE_DDY: 5278 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5279 break; 5280 5281 case TGSI_OPCODE_KILL: 5282 exec_kill (mach, inst); 5283 break; 5284 5285 case TGSI_OPCODE_KILL_IF: 5286 exec_kill_if (mach, inst); 5287 break; 5288 5289 case TGSI_OPCODE_PK2H: 5290 exec_pk2h(mach, inst); 5291 break; 5292 5293 case TGSI_OPCODE_PK2US: 5294 assert (0); 5295 break; 5296 5297 case TGSI_OPCODE_PK4B: 5298 assert (0); 5299 break; 5300 5301 case TGSI_OPCODE_PK4UB: 5302 assert (0); 5303 break; 5304 5305 case TGSI_OPCODE_SEQ: 5306 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5307 break; 5308 5309 case TGSI_OPCODE_SGT: 5310 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5311 break; 5312 5313 case TGSI_OPCODE_SIN: 5314 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5315 break; 5316 5317 case TGSI_OPCODE_SLE: 5318 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5319 break; 5320 5321 case TGSI_OPCODE_SNE: 5322 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5323 break; 5324 5325 case TGSI_OPCODE_TEX: 5326 /* simple texture lookup */ 5327 /* src[0] = texcoord */ 5328 /* src[1] = sampler unit */ 5329 exec_tex(mach, inst, TEX_MODIFIER_NONE, 1); 5330 break; 5331 5332 case TGSI_OPCODE_TXB: 5333 /* Texture lookup with lod bias */ 5334 /* src[0] = texcoord (src[0].w = LOD bias) */ 5335 /* src[1] = sampler unit */ 5336 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1); 5337 break; 5338 5339 case TGSI_OPCODE_TXD: 5340 /* Texture lookup with explict partial derivatives */ 5341 /* src[0] = texcoord */ 5342 /* src[1] = d[strq]/dx */ 5343 /* src[2] = d[strq]/dy */ 5344 /* src[3] = sampler unit */ 5345 exec_txd(mach, inst); 5346 break; 5347 5348 case TGSI_OPCODE_TXL: 5349 /* Texture lookup with explit LOD */ 5350 /* src[0] = texcoord (src[0].w = LOD) */ 5351 /* src[1] = sampler unit */ 5352 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1); 5353 break; 5354 5355 case TGSI_OPCODE_TXP: 5356 /* Texture lookup with projection */ 5357 /* src[0] = texcoord (src[0].w = projection) */ 5358 /* src[1] = sampler unit */ 5359 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1); 5360 break; 5361 5362 case TGSI_OPCODE_TG4: 5363 /* src[0] = texcoord */ 5364 /* src[1] = component */ 5365 /* src[2] = sampler unit */ 5366 exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2); 5367 break; 5368 5369 case TGSI_OPCODE_LODQ: 5370 /* src[0] = texcoord */ 5371 /* src[1] = sampler unit */ 5372 exec_lodq(mach, inst); 5373 break; 5374 5375 case TGSI_OPCODE_UP2H: 5376 exec_up2h(mach, inst); 5377 break; 5378 5379 case TGSI_OPCODE_UP2US: 5380 assert (0); 5381 break; 5382 5383 case TGSI_OPCODE_UP4B: 5384 assert (0); 5385 break; 5386 5387 case TGSI_OPCODE_UP4UB: 5388 assert (0); 5389 break; 5390 5391 case TGSI_OPCODE_ARR: 5392 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 5393 break; 5394 5395 case TGSI_OPCODE_CAL: 5396 /* skip the call if no execution channels are enabled */ 5397 if (mach->ExecMask) { 5398 /* do the call */ 5399 5400 /* First, record the depths of the execution stacks. 5401 * This is important for deeply nested/looped return statements. 5402 * We have to unwind the stacks by the correct amount. For a 5403 * real code generator, we could determine the number of entries 5404 * to pop off each stack with simple static analysis and avoid 5405 * implementing this data structure at run time. 5406 */ 5407 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 5408 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 5409 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 5410 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; 5411 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; 5412 /* note that PC was already incremented above */ 5413 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 5414 5415 mach->CallStackTop++; 5416 5417 /* Second, push the Cond, Loop, Cont, Func stacks */ 5418 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5419 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5420 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5421 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 5422 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 5423 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 5424 5425 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5426 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 5427 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 5428 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 5429 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 5430 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 5431 5432 /* Finally, jump to the subroutine. The label is a pointer 5433 * (an instruction number) to the BGNSUB instruction. 5434 */ 5435 *pc = inst->Label.Label; 5436 assert(mach->Instructions[*pc].Instruction.Opcode 5437 == TGSI_OPCODE_BGNSUB); 5438 } 5439 break; 5440 5441 case TGSI_OPCODE_RET: 5442 mach->FuncMask &= ~mach->ExecMask; 5443 UPDATE_EXEC_MASK(mach); 5444 5445 if (mach->FuncMask == 0x0) { 5446 /* really return now (otherwise, keep executing */ 5447 5448 if (mach->CallStackTop == 0) { 5449 /* returning from main() */ 5450 mach->CondStackTop = 0; 5451 mach->LoopStackTop = 0; 5452 mach->ContStackTop = 0; 5453 mach->LoopLabelStackTop = 0; 5454 mach->SwitchStackTop = 0; 5455 mach->BreakStackTop = 0; 5456 *pc = -1; 5457 return FALSE; 5458 } 5459 5460 assert(mach->CallStackTop > 0); 5461 mach->CallStackTop--; 5462 5463 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 5464 mach->CondMask = mach->CondStack[mach->CondStackTop]; 5465 5466 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 5467 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 5468 5469 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 5470 mach->ContMask = mach->ContStack[mach->ContStackTop]; 5471 5472 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 5473 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 5474 5475 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 5476 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 5477 5478 assert(mach->FuncStackTop > 0); 5479 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 5480 5481 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 5482 5483 UPDATE_EXEC_MASK(mach); 5484 } 5485 break; 5486 5487 case TGSI_OPCODE_SSG: 5488 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5489 break; 5490 5491 case TGSI_OPCODE_CMP: 5492 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5493 break; 5494 5495 case TGSI_OPCODE_SCS: 5496 exec_scs(mach, inst); 5497 break; 5498 5499 case TGSI_OPCODE_DIV: 5500 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5501 break; 5502 5503 case TGSI_OPCODE_DP2: 5504 exec_dp2(mach, inst); 5505 break; 5506 5507 case TGSI_OPCODE_IF: 5508 /* push CondMask */ 5509 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5510 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5511 FETCH( &r[0], 0, TGSI_CHAN_X ); 5512 /* update CondMask */ 5513 if( ! r[0].f[0] ) { 5514 mach->CondMask &= ~0x1; 5515 } 5516 if( ! r[0].f[1] ) { 5517 mach->CondMask &= ~0x2; 5518 } 5519 if( ! r[0].f[2] ) { 5520 mach->CondMask &= ~0x4; 5521 } 5522 if( ! r[0].f[3] ) { 5523 mach->CondMask &= ~0x8; 5524 } 5525 UPDATE_EXEC_MASK(mach); 5526 /* Todo: If CondMask==0, jump to ELSE */ 5527 break; 5528 5529 case TGSI_OPCODE_UIF: 5530 /* push CondMask */ 5531 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5532 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5533 IFETCH( &r[0], 0, TGSI_CHAN_X ); 5534 /* update CondMask */ 5535 if( ! r[0].u[0] ) { 5536 mach->CondMask &= ~0x1; 5537 } 5538 if( ! r[0].u[1] ) { 5539 mach->CondMask &= ~0x2; 5540 } 5541 if( ! r[0].u[2] ) { 5542 mach->CondMask &= ~0x4; 5543 } 5544 if( ! r[0].u[3] ) { 5545 mach->CondMask &= ~0x8; 5546 } 5547 UPDATE_EXEC_MASK(mach); 5548 /* Todo: If CondMask==0, jump to ELSE */ 5549 break; 5550 5551 case TGSI_OPCODE_ELSE: 5552 /* invert CondMask wrt previous mask */ 5553 { 5554 uint prevMask; 5555 assert(mach->CondStackTop > 0); 5556 prevMask = mach->CondStack[mach->CondStackTop - 1]; 5557 mach->CondMask = ~mach->CondMask & prevMask; 5558 UPDATE_EXEC_MASK(mach); 5559 /* Todo: If CondMask==0, jump to ENDIF */ 5560 } 5561 break; 5562 5563 case TGSI_OPCODE_ENDIF: 5564 /* pop CondMask */ 5565 assert(mach->CondStackTop > 0); 5566 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 5567 UPDATE_EXEC_MASK(mach); 5568 break; 5569 5570 case TGSI_OPCODE_END: 5571 /* make sure we end primitives which haven't 5572 * been explicitly emitted */ 5573 conditional_emit_primitive(mach); 5574 /* halt execution */ 5575 *pc = -1; 5576 break; 5577 5578 case TGSI_OPCODE_PUSHA: 5579 assert (0); 5580 break; 5581 5582 case TGSI_OPCODE_POPA: 5583 assert (0); 5584 break; 5585 5586 case TGSI_OPCODE_CEIL: 5587 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5588 break; 5589 5590 case TGSI_OPCODE_I2F: 5591 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); 5592 break; 5593 5594 case TGSI_OPCODE_NOT: 5595 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5596 break; 5597 5598 case TGSI_OPCODE_TRUNC: 5599 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5600 break; 5601 5602 case TGSI_OPCODE_SHL: 5603 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5604 break; 5605 5606 case TGSI_OPCODE_AND: 5607 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5608 break; 5609 5610 case TGSI_OPCODE_OR: 5611 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5612 break; 5613 5614 case TGSI_OPCODE_MOD: 5615 exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5616 break; 5617 5618 case TGSI_OPCODE_XOR: 5619 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5620 break; 5621 5622 case TGSI_OPCODE_SAD: 5623 assert (0); 5624 break; 5625 5626 case TGSI_OPCODE_TXF: 5627 exec_txf(mach, inst); 5628 break; 5629 5630 case TGSI_OPCODE_TXQ: 5631 exec_txq(mach, inst); 5632 break; 5633 5634 case TGSI_OPCODE_EMIT: 5635 emit_vertex(mach); 5636 break; 5637 5638 case TGSI_OPCODE_ENDPRIM: 5639 emit_primitive(mach); 5640 break; 5641 5642 case TGSI_OPCODE_BGNLOOP: 5643 /* push LoopMask and ContMasks */ 5644 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5645 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5646 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5647 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 5648 5649 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 5650 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 5651 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 5652 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 5653 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; 5654 break; 5655 5656 case TGSI_OPCODE_ENDLOOP: 5657 /* Restore ContMask, but don't pop */ 5658 assert(mach->ContStackTop > 0); 5659 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 5660 UPDATE_EXEC_MASK(mach); 5661 if (mach->ExecMask) { 5662 /* repeat loop: jump to instruction just past BGNLOOP */ 5663 assert(mach->LoopLabelStackTop > 0); 5664 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 5665 } 5666 else { 5667 /* exit loop: pop LoopMask */ 5668 assert(mach->LoopStackTop > 0); 5669 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 5670 /* pop ContMask */ 5671 assert(mach->ContStackTop > 0); 5672 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 5673 assert(mach->LoopLabelStackTop > 0); 5674 --mach->LoopLabelStackTop; 5675 5676 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 5677 } 5678 UPDATE_EXEC_MASK(mach); 5679 break; 5680 5681 case TGSI_OPCODE_BRK: 5682 exec_break(mach); 5683 break; 5684 5685 case TGSI_OPCODE_CONT: 5686 /* turn off cont channels for each enabled exec channel */ 5687 mach->ContMask &= ~mach->ExecMask; 5688 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 5689 UPDATE_EXEC_MASK(mach); 5690 break; 5691 5692 case TGSI_OPCODE_BGNSUB: 5693 /* no-op */ 5694 break; 5695 5696 case TGSI_OPCODE_ENDSUB: 5697 /* 5698 * XXX: This really should be a no-op. We should never reach this opcode. 5699 */ 5700 5701 assert(mach->CallStackTop > 0); 5702 mach->CallStackTop--; 5703 5704 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 5705 mach->CondMask = mach->CondStack[mach->CondStackTop]; 5706 5707 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 5708 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 5709 5710 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 5711 mach->ContMask = mach->ContStack[mach->ContStackTop]; 5712 5713 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 5714 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 5715 5716 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 5717 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 5718 5719 assert(mach->FuncStackTop > 0); 5720 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 5721 5722 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 5723 5724 UPDATE_EXEC_MASK(mach); 5725 break; 5726 5727 case TGSI_OPCODE_NOP: 5728 break; 5729 5730 case TGSI_OPCODE_BREAKC: 5731 IFETCH(&r[0], 0, TGSI_CHAN_X); 5732 /* update CondMask */ 5733 if (r[0].u[0] && (mach->ExecMask & 0x1)) { 5734 mach->LoopMask &= ~0x1; 5735 } 5736 if (r[0].u[1] && (mach->ExecMask & 0x2)) { 5737 mach->LoopMask &= ~0x2; 5738 } 5739 if (r[0].u[2] && (mach->ExecMask & 0x4)) { 5740 mach->LoopMask &= ~0x4; 5741 } 5742 if (r[0].u[3] && (mach->ExecMask & 0x8)) { 5743 mach->LoopMask &= ~0x8; 5744 } 5745 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 5746 UPDATE_EXEC_MASK(mach); 5747 break; 5748 5749 case TGSI_OPCODE_F2I: 5750 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 5751 break; 5752 5753 case TGSI_OPCODE_FSEQ: 5754 exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5755 break; 5756 5757 case TGSI_OPCODE_FSGE: 5758 exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5759 break; 5760 5761 case TGSI_OPCODE_FSLT: 5762 exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5763 break; 5764 5765 case TGSI_OPCODE_FSNE: 5766 exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5767 break; 5768 5769 case TGSI_OPCODE_IDIV: 5770 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5771 break; 5772 5773 case TGSI_OPCODE_IMAX: 5774 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5775 break; 5776 5777 case TGSI_OPCODE_IMIN: 5778 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5779 break; 5780 5781 case TGSI_OPCODE_INEG: 5782 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5783 break; 5784 5785 case TGSI_OPCODE_ISGE: 5786 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5787 break; 5788 5789 case TGSI_OPCODE_ISHR: 5790 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5791 break; 5792 5793 case TGSI_OPCODE_ISLT: 5794 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5795 break; 5796 5797 case TGSI_OPCODE_F2U: 5798 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5799 break; 5800 5801 case TGSI_OPCODE_U2F: 5802 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); 5803 break; 5804 5805 case TGSI_OPCODE_UADD: 5806 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5807 break; 5808 5809 case TGSI_OPCODE_UDIV: 5810 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5811 break; 5812 5813 case TGSI_OPCODE_UMAD: 5814 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5815 break; 5816 5817 case TGSI_OPCODE_UMAX: 5818 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5819 break; 5820 5821 case TGSI_OPCODE_UMIN: 5822 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5823 break; 5824 5825 case TGSI_OPCODE_UMOD: 5826 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5827 break; 5828 5829 case TGSI_OPCODE_UMUL: 5830 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5831 break; 5832 5833 case TGSI_OPCODE_IMUL_HI: 5834 exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5835 break; 5836 5837 case TGSI_OPCODE_UMUL_HI: 5838 exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5839 break; 5840 5841 case TGSI_OPCODE_USEQ: 5842 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5843 break; 5844 5845 case TGSI_OPCODE_USGE: 5846 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5847 break; 5848 5849 case TGSI_OPCODE_USHR: 5850 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5851 break; 5852 5853 case TGSI_OPCODE_USLT: 5854 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5855 break; 5856 5857 case TGSI_OPCODE_USNE: 5858 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5859 break; 5860 5861 case TGSI_OPCODE_SWITCH: 5862 exec_switch(mach, inst); 5863 break; 5864 5865 case TGSI_OPCODE_CASE: 5866 exec_case(mach, inst); 5867 break; 5868 5869 case TGSI_OPCODE_DEFAULT: 5870 exec_default(mach); 5871 break; 5872 5873 case TGSI_OPCODE_ENDSWITCH: 5874 exec_endswitch(mach); 5875 break; 5876 5877 case TGSI_OPCODE_SAMPLE_I: 5878 exec_txf(mach, inst); 5879 break; 5880 5881 case TGSI_OPCODE_SAMPLE_I_MS: 5882 exec_txf(mach, inst); 5883 break; 5884 5885 case TGSI_OPCODE_SAMPLE: 5886 exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE); 5887 break; 5888 5889 case TGSI_OPCODE_SAMPLE_B: 5890 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE); 5891 break; 5892 5893 case TGSI_OPCODE_SAMPLE_C: 5894 exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE); 5895 break; 5896 5897 case TGSI_OPCODE_SAMPLE_C_LZ: 5898 exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE); 5899 break; 5900 5901 case TGSI_OPCODE_SAMPLE_D: 5902 exec_sample_d(mach, inst); 5903 break; 5904 5905 case TGSI_OPCODE_SAMPLE_L: 5906 exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE); 5907 break; 5908 5909 case TGSI_OPCODE_GATHER4: 5910 assert(0); 5911 break; 5912 5913 case TGSI_OPCODE_SVIEWINFO: 5914 exec_txq(mach, inst); 5915 break; 5916 5917 case TGSI_OPCODE_SAMPLE_POS: 5918 assert(0); 5919 break; 5920 5921 case TGSI_OPCODE_SAMPLE_INFO: 5922 assert(0); 5923 break; 5924 5925 case TGSI_OPCODE_UARL: 5926 exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); 5927 break; 5928 5929 case TGSI_OPCODE_UCMP: 5930 exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5931 break; 5932 5933 case TGSI_OPCODE_IABS: 5934 exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5935 break; 5936 5937 case TGSI_OPCODE_ISSG: 5938 exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5939 break; 5940 5941 case TGSI_OPCODE_TEX2: 5942 /* simple texture lookup */ 5943 /* src[0] = texcoord */ 5944 /* src[1] = compare */ 5945 /* src[2] = sampler unit */ 5946 exec_tex(mach, inst, TEX_MODIFIER_NONE, 2); 5947 break; 5948 case TGSI_OPCODE_TXB2: 5949 /* simple texture lookup */ 5950 /* src[0] = texcoord */ 5951 /* src[1] = bias */ 5952 /* src[2] = sampler unit */ 5953 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2); 5954 break; 5955 case TGSI_OPCODE_TXL2: 5956 /* simple texture lookup */ 5957 /* src[0] = texcoord */ 5958 /* src[1] = lod */ 5959 /* src[2] = sampler unit */ 5960 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2); 5961 break; 5962 5963 case TGSI_OPCODE_IBFE: 5964 exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5965 break; 5966 case TGSI_OPCODE_UBFE: 5967 exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5968 break; 5969 case TGSI_OPCODE_BFI: 5970 exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5971 break; 5972 case TGSI_OPCODE_BREV: 5973 exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5974 break; 5975 case TGSI_OPCODE_POPC: 5976 exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5977 break; 5978 case TGSI_OPCODE_LSB: 5979 exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); 5980 break; 5981 case TGSI_OPCODE_IMSB: 5982 exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5983 break; 5984 case TGSI_OPCODE_UMSB: 5985 exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); 5986 break; 5987 5988 case TGSI_OPCODE_F2D: 5989 exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT); 5990 break; 5991 5992 case TGSI_OPCODE_D2F: 5993 exec_64_2_t(mach, inst, micro_d2f, TGSI_EXEC_DATA_FLOAT); 5994 break; 5995 5996 case TGSI_OPCODE_DABS: 5997 exec_double_unary(mach, inst, micro_dabs); 5998 break; 5999 6000 case TGSI_OPCODE_DNEG: 6001 exec_double_unary(mach, inst, micro_dneg); 6002 break; 6003 6004 case TGSI_OPCODE_DADD: 6005 exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE); 6006 break; 6007 6008 case TGSI_OPCODE_DDIV: 6009 exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE); 6010 break; 6011 6012 case TGSI_OPCODE_DMUL: 6013 exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE); 6014 break; 6015 6016 case TGSI_OPCODE_DMAX: 6017 exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE); 6018 break; 6019 6020 case TGSI_OPCODE_DMIN: 6021 exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE); 6022 break; 6023 6024 case TGSI_OPCODE_DSLT: 6025 exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT); 6026 break; 6027 6028 case TGSI_OPCODE_DSGE: 6029 exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT); 6030 break; 6031 6032 case TGSI_OPCODE_DSEQ: 6033 exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT); 6034 break; 6035 6036 case TGSI_OPCODE_DSNE: 6037 exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT); 6038 break; 6039 6040 case TGSI_OPCODE_DRCP: 6041 exec_double_unary(mach, inst, micro_drcp); 6042 break; 6043 6044 case TGSI_OPCODE_DSQRT: 6045 exec_double_unary(mach, inst, micro_dsqrt); 6046 break; 6047 6048 case TGSI_OPCODE_DRSQ: 6049 exec_double_unary(mach, inst, micro_drsq); 6050 break; 6051 6052 case TGSI_OPCODE_DMAD: 6053 exec_double_trinary(mach, inst, micro_dmad); 6054 break; 6055 6056 case TGSI_OPCODE_DFRAC: 6057 exec_double_unary(mach, inst, micro_dfrac); 6058 break; 6059 6060 case TGSI_OPCODE_DLDEXP: 6061 exec_dldexp(mach, inst); 6062 break; 6063 6064 case TGSI_OPCODE_DFRACEXP: 6065 exec_dfracexp(mach, inst); 6066 break; 6067 6068 case TGSI_OPCODE_I2D: 6069 exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_INT); 6070 break; 6071 6072 case TGSI_OPCODE_D2I: 6073 exec_64_2_t(mach, inst, micro_d2i, TGSI_EXEC_DATA_INT); 6074 break; 6075 6076 case TGSI_OPCODE_U2D: 6077 exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_UINT); 6078 break; 6079 6080 case TGSI_OPCODE_D2U: 6081 exec_64_2_t(mach, inst, micro_d2u, TGSI_EXEC_DATA_INT); 6082 break; 6083 6084 case TGSI_OPCODE_LOAD: 6085 exec_load(mach, inst); 6086 break; 6087 6088 case TGSI_OPCODE_STORE: 6089 exec_store(mach, inst); 6090 break; 6091 6092 case TGSI_OPCODE_ATOMUADD: 6093 case TGSI_OPCODE_ATOMXCHG: 6094 case TGSI_OPCODE_ATOMCAS: 6095 case TGSI_OPCODE_ATOMAND: 6096 case TGSI_OPCODE_ATOMOR: 6097 case TGSI_OPCODE_ATOMXOR: 6098 case TGSI_OPCODE_ATOMUMIN: 6099 case TGSI_OPCODE_ATOMUMAX: 6100 case TGSI_OPCODE_ATOMIMIN: 6101 case TGSI_OPCODE_ATOMIMAX: 6102 exec_atomop(mach, inst); 6103 break; 6104 6105 case TGSI_OPCODE_RESQ: 6106 exec_resq(mach, inst); 6107 break; 6108 case TGSI_OPCODE_BARRIER: 6109 case TGSI_OPCODE_MEMBAR: 6110 return TRUE; 6111 break; 6112 6113 case TGSI_OPCODE_I64ABS: 6114 exec_double_unary(mach, inst, micro_i64abs); 6115 break; 6116 6117 case TGSI_OPCODE_I64SSG: 6118 exec_double_unary(mach, inst, micro_i64sgn); 6119 break; 6120 6121 case TGSI_OPCODE_I64NEG: 6122 exec_double_unary(mach, inst, micro_i64neg); 6123 break; 6124 6125 case TGSI_OPCODE_U64SEQ: 6126 exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT); 6127 break; 6128 6129 case TGSI_OPCODE_U64SNE: 6130 exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT); 6131 break; 6132 6133 case TGSI_OPCODE_I64SLT: 6134 exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT); 6135 break; 6136 case TGSI_OPCODE_U64SLT: 6137 exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT); 6138 break; 6139 6140 case TGSI_OPCODE_I64SGE: 6141 exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT); 6142 break; 6143 case TGSI_OPCODE_U64SGE: 6144 exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT); 6145 break; 6146 6147 case TGSI_OPCODE_I64MIN: 6148 exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64); 6149 break; 6150 case TGSI_OPCODE_U64MIN: 6151 exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64); 6152 break; 6153 case TGSI_OPCODE_I64MAX: 6154 exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64); 6155 break; 6156 case TGSI_OPCODE_U64MAX: 6157 exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64); 6158 break; 6159 case TGSI_OPCODE_U64ADD: 6160 exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64); 6161 break; 6162 case TGSI_OPCODE_U64MUL: 6163 exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64); 6164 break; 6165 case TGSI_OPCODE_U64SHL: 6166 exec_arg0_64_arg1_32(mach, inst, micro_u64shl); 6167 break; 6168 case TGSI_OPCODE_I64SHR: 6169 exec_arg0_64_arg1_32(mach, inst, micro_i64shr); 6170 break; 6171 case TGSI_OPCODE_U64SHR: 6172 exec_arg0_64_arg1_32(mach, inst, micro_u64shr); 6173 break; 6174 case TGSI_OPCODE_U64DIV: 6175 exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64); 6176 break; 6177 case TGSI_OPCODE_I64DIV: 6178 exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64); 6179 break; 6180 case TGSI_OPCODE_U64MOD: 6181 exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64); 6182 break; 6183 case TGSI_OPCODE_I64MOD: 6184 exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64); 6185 break; 6186 6187 case TGSI_OPCODE_F2U64: 6188 exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT); 6189 break; 6190 6191 case TGSI_OPCODE_F2I64: 6192 exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT); 6193 break; 6194 6195 case TGSI_OPCODE_U2I64: 6196 exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT); 6197 break; 6198 case TGSI_OPCODE_I2I64: 6199 exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT); 6200 break; 6201 6202 case TGSI_OPCODE_D2U64: 6203 exec_double_unary(mach, inst, micro_d2u64); 6204 break; 6205 6206 case TGSI_OPCODE_D2I64: 6207 exec_double_unary(mach, inst, micro_d2i64); 6208 break; 6209 6210 case TGSI_OPCODE_U642F: 6211 exec_64_2_t(mach, inst, micro_u642f, TGSI_EXEC_DATA_FLOAT); 6212 break; 6213 case TGSI_OPCODE_I642F: 6214 exec_64_2_t(mach, inst, micro_i642f, TGSI_EXEC_DATA_FLOAT); 6215 break; 6216 6217 case TGSI_OPCODE_U642D: 6218 exec_double_unary(mach, inst, micro_u642d); 6219 break; 6220 case TGSI_OPCODE_I642D: 6221 exec_double_unary(mach, inst, micro_i642d); 6222 break; 6223 6224 default: 6225 assert( 0 ); 6226 } 6227 return FALSE; 6228 } 6229 6230 static void 6231 tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach) 6232 { 6233 uint default_mask = 0xf; 6234 6235 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 6236 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 6237 6238 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { 6239 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 6240 mach->Primitives[0] = 0; 6241 /* GS runs on a single primitive for now */ 6242 default_mask = 0x1; 6243 } 6244 6245 if (mach->NonHelperMask == 0) 6246 mach->NonHelperMask = default_mask; 6247 mach->CondMask = default_mask; 6248 mach->LoopMask = default_mask; 6249 mach->ContMask = default_mask; 6250 mach->FuncMask = default_mask; 6251 mach->ExecMask = default_mask; 6252 6253 mach->Switch.mask = default_mask; 6254 6255 assert(mach->CondStackTop == 0); 6256 assert(mach->LoopStackTop == 0); 6257 assert(mach->ContStackTop == 0); 6258 assert(mach->SwitchStackTop == 0); 6259 assert(mach->BreakStackTop == 0); 6260 assert(mach->CallStackTop == 0); 6261 } 6262 6263 /** 6264 * Run TGSI interpreter. 6265 * \return bitmask of "alive" quad components 6266 */ 6267 uint 6268 tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc ) 6269 { 6270 uint i; 6271 6272 mach->pc = start_pc; 6273 6274 if (!start_pc) { 6275 tgsi_exec_machine_setup_masks(mach); 6276 6277 /* execute declarations (interpolants) */ 6278 for (i = 0; i < mach->NumDeclarations; i++) { 6279 exec_declaration( mach, mach->Declarations+i ); 6280 } 6281 } 6282 6283 { 6284 #if DEBUG_EXECUTION 6285 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; 6286 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 6287 uint inst = 1; 6288 6289 if (!start_pc) { 6290 memset(mach->Temps, 0, sizeof(temps)); 6291 if (mach->Outputs) 6292 memset(mach->Outputs, 0, sizeof(outputs)); 6293 memset(temps, 0, sizeof(temps)); 6294 memset(outputs, 0, sizeof(outputs)); 6295 } 6296 #endif 6297 6298 /* execute instructions, until pc is set to -1 */ 6299 while (mach->pc != -1) { 6300 boolean barrier_hit; 6301 #if DEBUG_EXECUTION 6302 uint i; 6303 6304 tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++); 6305 #endif 6306 6307 assert(mach->pc < (int) mach->NumInstructions); 6308 barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc); 6309 6310 /* for compute shaders if we hit a barrier return now for later rescheduling */ 6311 if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE) 6312 return 0; 6313 6314 #if DEBUG_EXECUTION 6315 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { 6316 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 6317 uint j; 6318 6319 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 6320 debug_printf("TEMP[%2u] = ", i); 6321 for (j = 0; j < 4; j++) { 6322 if (j > 0) { 6323 debug_printf(" "); 6324 } 6325 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 6326 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], 6327 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], 6328 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], 6329 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); 6330 } 6331 } 6332 } 6333 if (mach->Outputs) { 6334 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 6335 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 6336 uint j; 6337 6338 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 6339 debug_printf("OUT[%2u] = ", i); 6340 for (j = 0; j < 4; j++) { 6341 if (j > 0) { 6342 debug_printf(" "); 6343 } 6344 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 6345 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], 6346 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], 6347 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], 6348 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); 6349 } 6350 } 6351 } 6352 } 6353 #endif 6354 } 6355 } 6356 6357 #if 0 6358 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 6359 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { 6360 /* 6361 * Scale back depth component. 6362 */ 6363 for (i = 0; i < 4; i++) 6364 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 6365 } 6366 #endif 6367 6368 /* Strictly speaking, these assertions aren't really needed but they 6369 * can potentially catch some bugs in the control flow code. 6370 */ 6371 assert(mach->CondStackTop == 0); 6372 assert(mach->LoopStackTop == 0); 6373 assert(mach->ContStackTop == 0); 6374 assert(mach->SwitchStackTop == 0); 6375 assert(mach->BreakStackTop == 0); 6376 assert(mach->CallStackTop == 0); 6377 6378 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 6379 } 6380