1 /* 2 * Mesa 3-D graphics library 3 * 4 * Copyright (C) 2012-2013 LunarG, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Chia-I Wu <olv (at) lunarg.com> 26 */ 27 28 #include "tgsi/tgsi_parse.h" 29 #include "tgsi/tgsi_info.h" 30 #include "tgsi/tgsi_strings.h" 31 #include "util/u_hash_table.h" 32 #include "toy_helpers.h" 33 #include "toy_tgsi.h" 34 35 /* map TGSI opcode to GEN opcode 1-to-1 */ 36 static const struct { 37 int opcode; 38 int num_dst; 39 int num_src; 40 } aos_simple_opcode_map[TGSI_OPCODE_LAST] = { 41 [TGSI_OPCODE_ARL] = { GEN6_OPCODE_RNDD, 1, 1 }, 42 [TGSI_OPCODE_MOV] = { GEN6_OPCODE_MOV, 1, 1 }, 43 [TGSI_OPCODE_RCP] = { TOY_OPCODE_INV, 1, 1 }, 44 [TGSI_OPCODE_RSQ] = { TOY_OPCODE_RSQ, 1, 1 }, 45 [TGSI_OPCODE_MUL] = { GEN6_OPCODE_MUL, 1, 2 }, 46 [TGSI_OPCODE_ADD] = { GEN6_OPCODE_ADD, 1, 2 }, 47 [TGSI_OPCODE_DP3] = { GEN6_OPCODE_DP3, 1, 2 }, 48 [TGSI_OPCODE_DP4] = { GEN6_OPCODE_DP4, 1, 2 }, 49 [TGSI_OPCODE_MIN] = { GEN6_OPCODE_SEL, 1, 2 }, 50 [TGSI_OPCODE_MAX] = { GEN6_OPCODE_SEL, 1, 2 }, 51 /* a later pass will move src[2] to accumulator */ 52 [TGSI_OPCODE_MAD] = { GEN6_OPCODE_MAC, 1, 3 }, 53 [TGSI_OPCODE_SQRT] = { TOY_OPCODE_SQRT, 1, 1 }, 54 [TGSI_OPCODE_FRC] = { GEN6_OPCODE_FRC, 1, 1 }, 55 [TGSI_OPCODE_FLR] = { GEN6_OPCODE_RNDD, 1, 1 }, 56 [TGSI_OPCODE_ROUND] = { GEN6_OPCODE_RNDE, 1, 1 }, 57 [TGSI_OPCODE_EX2] = { TOY_OPCODE_EXP, 1, 1 }, 58 [TGSI_OPCODE_LG2] = { TOY_OPCODE_LOG, 1, 1 }, 59 [TGSI_OPCODE_POW] = { TOY_OPCODE_POW, 1, 2 }, 60 [TGSI_OPCODE_DPH] = { GEN6_OPCODE_DPH, 1, 2 }, 61 [TGSI_OPCODE_COS] = { TOY_OPCODE_COS, 1, 1 }, 62 [TGSI_OPCODE_KILL] = { TOY_OPCODE_KIL, 0, 0 }, 63 [TGSI_OPCODE_SIN] = { TOY_OPCODE_SIN, 1, 1 }, 64 [TGSI_OPCODE_ARR] = { GEN6_OPCODE_RNDZ, 1, 1 }, 65 [TGSI_OPCODE_DP2] = { GEN6_OPCODE_DP2, 1, 2 }, 66 [TGSI_OPCODE_IF] = { GEN6_OPCODE_IF, 0, 1 }, 67 [TGSI_OPCODE_UIF] = { GEN6_OPCODE_IF, 0, 1 }, 68 [TGSI_OPCODE_ELSE] = { GEN6_OPCODE_ELSE, 0, 0 }, 69 [TGSI_OPCODE_ENDIF] = { GEN6_OPCODE_ENDIF, 0, 0 }, 70 [TGSI_OPCODE_I2F] = { GEN6_OPCODE_MOV, 1, 1 }, 71 [TGSI_OPCODE_NOT] = { GEN6_OPCODE_NOT, 1, 1 }, 72 [TGSI_OPCODE_TRUNC] = { GEN6_OPCODE_RNDZ, 1, 1 }, 73 [TGSI_OPCODE_SHL] = { GEN6_OPCODE_SHL, 1, 2 }, 74 [TGSI_OPCODE_AND] = { GEN6_OPCODE_AND, 1, 2 }, 75 [TGSI_OPCODE_OR] = { GEN6_OPCODE_OR, 1, 2 }, 76 [TGSI_OPCODE_MOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 }, 77 [TGSI_OPCODE_XOR] = { GEN6_OPCODE_XOR, 1, 2 }, 78 [TGSI_OPCODE_EMIT] = { TOY_OPCODE_EMIT, 0, 0 }, 79 [TGSI_OPCODE_ENDPRIM] = { TOY_OPCODE_ENDPRIM, 0, 0 }, 80 [TGSI_OPCODE_NOP] = { GEN6_OPCODE_NOP, 0, 0 }, 81 [TGSI_OPCODE_KILL_IF] = { TOY_OPCODE_KIL, 0, 1 }, 82 [TGSI_OPCODE_END] = { GEN6_OPCODE_NOP, 0, 0 }, 83 [TGSI_OPCODE_F2I] = { GEN6_OPCODE_MOV, 1, 1 }, 84 [TGSI_OPCODE_IDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 }, 85 [TGSI_OPCODE_IMAX] = { GEN6_OPCODE_SEL, 1, 2 }, 86 [TGSI_OPCODE_IMIN] = { GEN6_OPCODE_SEL, 1, 2 }, 87 [TGSI_OPCODE_INEG] = { GEN6_OPCODE_MOV, 1, 1 }, 88 [TGSI_OPCODE_ISHR] = { GEN6_OPCODE_ASR, 1, 2 }, 89 [TGSI_OPCODE_F2U] = { GEN6_OPCODE_MOV, 1, 1 }, 90 [TGSI_OPCODE_U2F] = { GEN6_OPCODE_MOV, 1, 1 }, 91 [TGSI_OPCODE_UADD] = { GEN6_OPCODE_ADD, 1, 2 }, 92 [TGSI_OPCODE_UDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 }, 93 /* a later pass will move src[2] to accumulator */ 94 [TGSI_OPCODE_UMAD] = { GEN6_OPCODE_MAC, 1, 3 }, 95 [TGSI_OPCODE_UMAX] = { GEN6_OPCODE_SEL, 1, 2 }, 96 [TGSI_OPCODE_UMIN] = { GEN6_OPCODE_SEL, 1, 2 }, 97 [TGSI_OPCODE_UMOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 }, 98 [TGSI_OPCODE_UMUL] = { GEN6_OPCODE_MUL, 1, 2 }, 99 [TGSI_OPCODE_USHR] = { GEN6_OPCODE_SHR, 1, 2 }, 100 [TGSI_OPCODE_UARL] = { GEN6_OPCODE_MOV, 1, 1 }, 101 [TGSI_OPCODE_IABS] = { GEN6_OPCODE_MOV, 1, 1 }, 102 }; 103 104 static void 105 aos_simple(struct toy_compiler *tc, 106 const struct tgsi_full_instruction *tgsi_inst, 107 struct toy_dst *dst, 108 struct toy_src *src) 109 { 110 struct toy_inst *inst; 111 int opcode; 112 int cond_modifier = GEN6_COND_NONE; 113 int num_dst = tgsi_inst->Instruction.NumDstRegs; 114 int num_src = tgsi_inst->Instruction.NumSrcRegs; 115 int i; 116 117 opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode; 118 assert(num_dst == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_dst); 119 assert(num_src == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_src); 120 if (!opcode) { 121 assert(!"invalid aos_simple() call"); 122 return; 123 } 124 125 /* no need to emit nop */ 126 if (opcode == GEN6_OPCODE_NOP) 127 return; 128 129 inst = tc_add(tc); 130 if (!inst) 131 return; 132 133 inst->opcode = opcode; 134 135 switch (tgsi_inst->Instruction.Opcode) { 136 case TGSI_OPCODE_MIN: 137 case TGSI_OPCODE_IMIN: 138 case TGSI_OPCODE_UMIN: 139 cond_modifier = GEN6_COND_L; 140 break; 141 case TGSI_OPCODE_MAX: 142 case TGSI_OPCODE_IMAX: 143 case TGSI_OPCODE_UMAX: 144 cond_modifier = GEN6_COND_GE; 145 break; 146 case TGSI_OPCODE_IABS: 147 src[0] = tsrc_absolute(src[0]); 148 break; 149 case TGSI_OPCODE_IF: 150 cond_modifier = GEN6_COND_NZ; 151 num_src = 2; 152 assert(src[0].type == TOY_TYPE_F); 153 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); 154 src[1] = tsrc_imm_f(0.0f); 155 break; 156 case TGSI_OPCODE_UIF: 157 cond_modifier = GEN6_COND_NZ; 158 num_src = 2; 159 assert(src[0].type == TOY_TYPE_UD); 160 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); 161 src[1] = tsrc_imm_d(0); 162 break; 163 case TGSI_OPCODE_INEG: 164 src[0] = tsrc_negate(src[0]); 165 break; 166 case TGSI_OPCODE_RCP: 167 case TGSI_OPCODE_RSQ: 168 case TGSI_OPCODE_EX2: 169 case TGSI_OPCODE_LG2: 170 case TGSI_OPCODE_COS: 171 case TGSI_OPCODE_SIN: 172 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); 173 break; 174 case TGSI_OPCODE_POW: 175 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); 176 src[1] = tsrc_swizzle1(src[1], TOY_SWIZZLE_X); 177 break; 178 } 179 180 inst->cond_modifier = cond_modifier; 181 182 if (num_dst) { 183 assert(num_dst == 1); 184 inst->dst = dst[0]; 185 } 186 187 assert(num_src <= ARRAY_SIZE(inst->src)); 188 for (i = 0; i < num_src; i++) 189 inst->src[i] = src[i]; 190 } 191 192 static void 193 aos_set_on_cond(struct toy_compiler *tc, 194 const struct tgsi_full_instruction *tgsi_inst, 195 struct toy_dst *dst, 196 struct toy_src *src) 197 { 198 struct toy_inst *inst; 199 int cond; 200 struct toy_src zero, one; 201 202 switch (tgsi_inst->Instruction.Opcode) { 203 case TGSI_OPCODE_SLT: 204 case TGSI_OPCODE_ISLT: 205 case TGSI_OPCODE_USLT: 206 case TGSI_OPCODE_FSLT: 207 cond = GEN6_COND_L; 208 break; 209 case TGSI_OPCODE_SGE: 210 case TGSI_OPCODE_ISGE: 211 case TGSI_OPCODE_USGE: 212 case TGSI_OPCODE_FSGE: 213 cond = GEN6_COND_GE; 214 break; 215 case TGSI_OPCODE_SEQ: 216 case TGSI_OPCODE_USEQ: 217 case TGSI_OPCODE_FSEQ: 218 cond = GEN6_COND_Z; 219 break; 220 case TGSI_OPCODE_SGT: 221 cond = GEN6_COND_G; 222 break; 223 case TGSI_OPCODE_SLE: 224 cond = GEN6_COND_LE; 225 break; 226 case TGSI_OPCODE_SNE: 227 case TGSI_OPCODE_USNE: 228 case TGSI_OPCODE_FSNE: 229 cond = GEN6_COND_NZ; 230 break; 231 default: 232 assert(!"invalid aos_set_on_cond() call"); 233 return; 234 } 235 236 /* note that for integer versions, all bits are set */ 237 switch (dst[0].type) { 238 case TOY_TYPE_F: 239 default: 240 zero = tsrc_imm_f(0.0f); 241 one = tsrc_imm_f(1.0f); 242 break; 243 case TOY_TYPE_D: 244 zero = tsrc_imm_d(0); 245 one = tsrc_imm_d(-1); 246 break; 247 case TOY_TYPE_UD: 248 zero = tsrc_imm_ud(0); 249 one = tsrc_imm_ud(~0); 250 break; 251 } 252 253 tc_MOV(tc, dst[0], zero); 254 tc_CMP(tc, tdst_null(), src[0], src[1], cond); 255 inst = tc_MOV(tc, dst[0], one); 256 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL; 257 } 258 259 static void 260 aos_compare(struct toy_compiler *tc, 261 const struct tgsi_full_instruction *tgsi_inst, 262 struct toy_dst *dst, 263 struct toy_src *src) 264 { 265 struct toy_inst *inst; 266 struct toy_src zero; 267 268 switch (tgsi_inst->Instruction.Opcode) { 269 case TGSI_OPCODE_CMP: 270 zero = tsrc_imm_f(0.0f); 271 break; 272 case TGSI_OPCODE_UCMP: 273 zero = tsrc_imm_ud(0); 274 break; 275 default: 276 assert(!"invalid aos_compare() call"); 277 return; 278 } 279 280 tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_L); 281 inst = tc_SEL(tc, dst[0], src[1], src[2], GEN6_COND_NONE); 282 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL; 283 } 284 285 static void 286 aos_set_sign(struct toy_compiler *tc, 287 const struct tgsi_full_instruction *tgsi_inst, 288 struct toy_dst *dst, 289 struct toy_src *src) 290 { 291 struct toy_inst *inst; 292 struct toy_src zero, one, neg_one; 293 294 switch (tgsi_inst->Instruction.Opcode) { 295 case TGSI_OPCODE_SSG: 296 zero = tsrc_imm_f(0.0f); 297 one = tsrc_imm_f(1.0f); 298 neg_one = tsrc_imm_f(-1.0f); 299 break; 300 case TGSI_OPCODE_ISSG: 301 zero = tsrc_imm_d(0); 302 one = tsrc_imm_d(1); 303 neg_one = tsrc_imm_d(-1); 304 break; 305 default: 306 assert(!"invalid aos_set_sign() call"); 307 return; 308 } 309 310 tc_MOV(tc, dst[0], zero); 311 312 tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_G); 313 inst = tc_MOV(tc, dst[0], one); 314 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL; 315 316 tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_L); 317 inst = tc_MOV(tc, dst[0], neg_one); 318 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL; 319 } 320 321 static void 322 aos_tex(struct toy_compiler *tc, 323 const struct tgsi_full_instruction *tgsi_inst, 324 struct toy_dst *dst, 325 struct toy_src *src) 326 { 327 struct toy_inst *inst; 328 enum toy_opcode opcode; 329 int i; 330 331 switch (tgsi_inst->Instruction.Opcode) { 332 case TGSI_OPCODE_TEX: 333 opcode = TOY_OPCODE_TGSI_TEX; 334 break; 335 case TGSI_OPCODE_TXD: 336 opcode = TOY_OPCODE_TGSI_TXD; 337 break; 338 case TGSI_OPCODE_TXP: 339 opcode = TOY_OPCODE_TGSI_TXP; 340 break; 341 case TGSI_OPCODE_TXB: 342 opcode = TOY_OPCODE_TGSI_TXB; 343 break; 344 case TGSI_OPCODE_TXL: 345 opcode = TOY_OPCODE_TGSI_TXL; 346 break; 347 case TGSI_OPCODE_TXF: 348 opcode = TOY_OPCODE_TGSI_TXF; 349 break; 350 case TGSI_OPCODE_TXQ: 351 opcode = TOY_OPCODE_TGSI_TXQ; 352 break; 353 case TGSI_OPCODE_TXQ_LZ: 354 opcode = TOY_OPCODE_TGSI_TXQ_LZ; 355 break; 356 case TGSI_OPCODE_TEX2: 357 opcode = TOY_OPCODE_TGSI_TEX2; 358 break; 359 case TGSI_OPCODE_TXB2: 360 opcode = TOY_OPCODE_TGSI_TXB2; 361 break; 362 case TGSI_OPCODE_TXL2: 363 opcode = TOY_OPCODE_TGSI_TXL2; 364 break; 365 default: 366 assert(!"unsupported texturing opcode"); 367 return; 368 break; 369 } 370 371 assert(tgsi_inst->Instruction.Texture); 372 373 inst = tc_add(tc); 374 inst->opcode = opcode; 375 inst->tex.target = tgsi_inst->Texture.Texture; 376 377 assert(tgsi_inst->Instruction.NumSrcRegs <= ARRAY_SIZE(inst->src)); 378 assert(tgsi_inst->Instruction.NumDstRegs == 1); 379 380 inst->dst = dst[0]; 381 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) 382 inst->src[i] = src[i]; 383 384 for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++) 385 tc_fail(tc, "texelFetchOffset unsupported"); 386 } 387 388 static void 389 aos_sample(struct toy_compiler *tc, 390 const struct tgsi_full_instruction *tgsi_inst, 391 struct toy_dst *dst, 392 struct toy_src *src) 393 { 394 struct toy_inst *inst; 395 enum toy_opcode opcode; 396 int i; 397 398 assert(!"sampling untested"); 399 400 switch (tgsi_inst->Instruction.Opcode) { 401 case TGSI_OPCODE_SAMPLE: 402 opcode = TOY_OPCODE_TGSI_SAMPLE; 403 break; 404 case TGSI_OPCODE_SAMPLE_I: 405 opcode = TOY_OPCODE_TGSI_SAMPLE_I; 406 break; 407 case TGSI_OPCODE_SAMPLE_I_MS: 408 opcode = TOY_OPCODE_TGSI_SAMPLE_I_MS; 409 break; 410 case TGSI_OPCODE_SAMPLE_B: 411 opcode = TOY_OPCODE_TGSI_SAMPLE_B; 412 break; 413 case TGSI_OPCODE_SAMPLE_C: 414 opcode = TOY_OPCODE_TGSI_SAMPLE_C; 415 break; 416 case TGSI_OPCODE_SAMPLE_C_LZ: 417 opcode = TOY_OPCODE_TGSI_SAMPLE_C_LZ; 418 break; 419 case TGSI_OPCODE_SAMPLE_D: 420 opcode = TOY_OPCODE_TGSI_SAMPLE_D; 421 break; 422 case TGSI_OPCODE_SAMPLE_L: 423 opcode = TOY_OPCODE_TGSI_SAMPLE_L; 424 break; 425 case TGSI_OPCODE_GATHER4: 426 opcode = TOY_OPCODE_TGSI_GATHER4; 427 break; 428 case TGSI_OPCODE_SVIEWINFO: 429 opcode = TOY_OPCODE_TGSI_SVIEWINFO; 430 break; 431 case TGSI_OPCODE_SAMPLE_POS: 432 opcode = TOY_OPCODE_TGSI_SAMPLE_POS; 433 break; 434 case TGSI_OPCODE_SAMPLE_INFO: 435 opcode = TOY_OPCODE_TGSI_SAMPLE_INFO; 436 break; 437 default: 438 assert(!"unsupported sampling opcode"); 439 return; 440 break; 441 } 442 443 inst = tc_add(tc); 444 inst->opcode = opcode; 445 446 assert(tgsi_inst->Instruction.NumSrcRegs <= ARRAY_SIZE(inst->src)); 447 assert(tgsi_inst->Instruction.NumDstRegs == 1); 448 449 inst->dst = dst[0]; 450 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) 451 inst->src[i] = src[i]; 452 } 453 454 static void 455 aos_LIT(struct toy_compiler *tc, 456 const struct tgsi_full_instruction *tgsi_inst, 457 struct toy_dst *dst, 458 struct toy_src *src) 459 { 460 struct toy_inst *inst; 461 462 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XW), tsrc_imm_f(1.0f)); 463 464 if (!(dst[0].writemask & TOY_WRITEMASK_YZ)) 465 return; 466 467 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_YZ), tsrc_imm_f(0.0f)); 468 469 tc_CMP(tc, tdst_null(), 470 tsrc_swizzle1(src[0], TOY_SWIZZLE_X), 471 tsrc_imm_f(0.0f), 472 GEN6_COND_G); 473 474 inst = tc_MOV(tc, 475 tdst_writemask(dst[0], TOY_WRITEMASK_Y), 476 tsrc_swizzle1(src[0], TOY_SWIZZLE_X)); 477 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL; 478 479 /* clamp W to (-128, 128)? */ 480 inst = tc_POW(tc, 481 tdst_writemask(dst[0], TOY_WRITEMASK_Z), 482 tsrc_swizzle1(src[0], TOY_SWIZZLE_Y), 483 tsrc_swizzle1(src[0], TOY_SWIZZLE_W)); 484 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL; 485 } 486 487 static void 488 aos_EXP(struct toy_compiler *tc, 489 const struct tgsi_full_instruction *tgsi_inst, 490 struct toy_dst *dst, 491 struct toy_src *src) 492 { 493 struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); 494 495 if (dst[0].writemask & TOY_WRITEMASK_X) { 496 struct toy_dst tmp = 497 tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X)); 498 499 tc_RNDD(tc, tmp, src0); 500 501 /* construct the floating point number manually */ 502 tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127)); 503 tc_SHL(tc, tdst_d(tdst_writemask(dst[0], TOY_WRITEMASK_X)), 504 tsrc_from(tmp), tsrc_imm_d(23)); 505 } 506 507 tc_FRC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src0); 508 tc_EXP(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0); 509 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f)); 510 } 511 512 static void 513 aos_LOG(struct toy_compiler *tc, 514 const struct tgsi_full_instruction *tgsi_inst, 515 struct toy_dst *dst, 516 struct toy_src *src) 517 { 518 struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); 519 520 if (dst[0].writemask & TOY_WRITEMASK_XY) { 521 struct toy_dst tmp; 522 523 tmp = tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X)); 524 525 /* exponent */ 526 tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0)), tsrc_imm_d(23)); 527 tc_ADD(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X), 528 tsrc_from(tmp), tsrc_imm_d(-127)); 529 530 /* mantissa */ 531 tc_AND(tc, tmp, tsrc_d(src0), tsrc_imm_d((1 << 23) - 1)); 532 tc_OR(tc, tdst_writemask(tdst_d(dst[0]), TOY_WRITEMASK_Y), 533 tsrc_from(tmp), tsrc_imm_d(127 << 23)); 534 } 535 536 tc_LOG(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0); 537 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f)); 538 } 539 540 static void 541 aos_DST(struct toy_compiler *tc, 542 const struct tgsi_full_instruction *tgsi_inst, 543 struct toy_dst *dst, 544 struct toy_src *src) 545 { 546 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X), tsrc_imm_f(1.0f)); 547 tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0], src[1]); 548 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src[0]); 549 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), src[1]); 550 } 551 552 static void 553 aos_LRP(struct toy_compiler *tc, 554 const struct tgsi_full_instruction *tgsi_inst, 555 struct toy_dst *dst, 556 struct toy_src *src) 557 { 558 struct toy_dst tmp = tc_alloc_tmp(tc); 559 560 tc_ADD(tc, tmp, tsrc_negate(src[0]), tsrc_imm_f(1.0f)); 561 tc_MUL(tc, tmp, tsrc_from(tmp), src[2]); 562 tc_MAC(tc, dst[0], src[0], src[1], tsrc_from(tmp)); 563 } 564 565 static void 566 aos_DP2A(struct toy_compiler *tc, 567 const struct tgsi_full_instruction *tgsi_inst, 568 struct toy_dst *dst, 569 struct toy_src *src) 570 { 571 struct toy_dst tmp = tc_alloc_tmp(tc); 572 573 assert(!"DP2A untested"); 574 575 tc_DP2(tc, tmp, src[0], src[1]); 576 tc_ADD(tc, dst[0], tsrc_swizzle1(tsrc_from(tmp), TOY_SWIZZLE_X), src[2]); 577 } 578 579 static void 580 aos_CLAMP(struct toy_compiler *tc, 581 const struct tgsi_full_instruction *tgsi_inst, 582 struct toy_dst *dst, 583 struct toy_src *src) 584 { 585 assert(!"CLAMP untested"); 586 587 tc_SEL(tc, dst[0], src[0], src[1], GEN6_COND_GE); 588 tc_SEL(tc, dst[0], src[2], tsrc_from(dst[0]), GEN6_COND_L); 589 } 590 591 static void 592 aos_XPD(struct toy_compiler *tc, 593 const struct tgsi_full_instruction *tgsi_inst, 594 struct toy_dst *dst, 595 struct toy_src *src) 596 { 597 struct toy_dst tmp = tc_alloc_tmp(tc); 598 599 tc_MUL(tc, tdst_writemask(tmp, TOY_WRITEMASK_XYZ), 600 tsrc_swizzle(src[0], TOY_SWIZZLE_Z, TOY_SWIZZLE_X, 601 TOY_SWIZZLE_Y, TOY_SWIZZLE_W), 602 tsrc_swizzle(src[1], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z, 603 TOY_SWIZZLE_X, TOY_SWIZZLE_W)); 604 605 tc_MAC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ), 606 tsrc_swizzle(src[0], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z, 607 TOY_SWIZZLE_X, TOY_SWIZZLE_W), 608 tsrc_swizzle(src[1], TOY_SWIZZLE_Z, TOY_SWIZZLE_X, 609 TOY_SWIZZLE_Y, TOY_SWIZZLE_W), 610 tsrc_negate(tsrc_from(tmp))); 611 612 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), 613 tsrc_imm_f(1.0f)); 614 } 615 616 static void 617 aos_PK2H(struct toy_compiler *tc, 618 const struct tgsi_full_instruction *tgsi_inst, 619 struct toy_dst *dst, 620 struct toy_src *src) 621 { 622 const struct toy_src h1 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_X)); 623 const struct toy_src h2 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_Y)); 624 struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc)); 625 626 assert(!"PK2H untested"); 627 628 tc_SHL(tc, tmp, h2, tsrc_imm_ud(16)); 629 tc_OR(tc, tdst_ud(dst[0]), h1, tsrc_from(tmp)); 630 } 631 632 static void 633 aos_UP2H(struct toy_compiler *tc, 634 const struct tgsi_full_instruction *tgsi_inst, 635 struct toy_dst *dst, 636 struct toy_src *src) 637 { 638 assert(!"UP2H untested"); 639 640 tc_AND(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_XZ), 641 tsrc_ud(src[0]), tsrc_imm_ud(0xffff)); 642 tc_SHR(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_YW), 643 tsrc_ud(src[0]), tsrc_imm_ud(16)); 644 } 645 646 static void 647 aos_SCS(struct toy_compiler *tc, 648 const struct tgsi_full_instruction *tgsi_inst, 649 struct toy_dst *dst, 650 struct toy_src *src) 651 { 652 assert(!"SCS untested"); 653 654 tc_add1(tc, TOY_OPCODE_COS, 655 tdst_writemask(dst[0], TOY_WRITEMASK_X), src[0]); 656 657 tc_add1(tc, TOY_OPCODE_SIN, 658 tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0]); 659 660 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), tsrc_imm_f(0.0f)); 661 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f)); 662 } 663 664 static void 665 aos_DIV(struct toy_compiler *tc, 666 const struct tgsi_full_instruction *tgsi_inst, 667 struct toy_dst *dst, 668 struct toy_src *src) 669 { 670 struct toy_dst tmp = tc_alloc_tmp(tc); 671 672 assert(!"DIV untested"); 673 674 tc_INV(tc, tmp, src[1]); 675 tc_MUL(tc, dst[0], src[0], tsrc_from(tmp)); 676 } 677 678 static void 679 aos_BRK(struct toy_compiler *tc, 680 const struct tgsi_full_instruction *tgsi_inst, 681 struct toy_dst *dst, 682 struct toy_src *src) 683 { 684 tc_add0(tc, GEN6_OPCODE_BREAK); 685 } 686 687 static void 688 aos_CEIL(struct toy_compiler *tc, 689 const struct tgsi_full_instruction *tgsi_inst, 690 struct toy_dst *dst, 691 struct toy_src *src) 692 { 693 struct toy_dst tmp = tc_alloc_tmp(tc); 694 695 tc_RNDD(tc, tmp, tsrc_negate(src[0])); 696 tc_MOV(tc, dst[0], tsrc_negate(tsrc_from(tmp))); 697 } 698 699 static void 700 aos_SAD(struct toy_compiler *tc, 701 const struct tgsi_full_instruction *tgsi_inst, 702 struct toy_dst *dst, 703 struct toy_src *src) 704 { 705 struct toy_dst tmp = tc_alloc_tmp(tc); 706 707 assert(!"SAD untested"); 708 709 tc_ADD(tc, tmp, src[0], tsrc_negate(src[1])); 710 tc_ADD(tc, dst[0], tsrc_absolute(tsrc_from(tmp)), src[2]); 711 } 712 713 static void 714 aos_CONT(struct toy_compiler *tc, 715 const struct tgsi_full_instruction *tgsi_inst, 716 struct toy_dst *dst, 717 struct toy_src *src) 718 { 719 tc_add0(tc, GEN6_OPCODE_CONT); 720 } 721 722 static void 723 aos_BGNLOOP(struct toy_compiler *tc, 724 const struct tgsi_full_instruction *tgsi_inst, 725 struct toy_dst *dst, 726 struct toy_src *src) 727 { 728 struct toy_inst *inst; 729 730 inst = tc_add0(tc, TOY_OPCODE_DO); 731 /* this is just a marker */ 732 inst->marker = true; 733 } 734 735 static void 736 aos_ENDLOOP(struct toy_compiler *tc, 737 const struct tgsi_full_instruction *tgsi_inst, 738 struct toy_dst *dst, 739 struct toy_src *src) 740 { 741 tc_add0(tc, GEN6_OPCODE_WHILE); 742 } 743 744 static void 745 aos_unsupported(struct toy_compiler *tc, 746 const struct tgsi_full_instruction *tgsi_inst, 747 struct toy_dst *dst, 748 struct toy_src *src) 749 { 750 const char *name = tgsi_get_opcode_name(tgsi_inst->Instruction.Opcode); 751 752 ilo_warn("unsupported TGSI opcode: TGSI_OPCODE_%s\n", name); 753 754 tc_fail(tc, "unsupported TGSI instruction"); 755 } 756 757 static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = { 758 [TGSI_OPCODE_ARL] = aos_simple, 759 [TGSI_OPCODE_MOV] = aos_simple, 760 [TGSI_OPCODE_LIT] = aos_LIT, 761 [TGSI_OPCODE_RCP] = aos_simple, 762 [TGSI_OPCODE_RSQ] = aos_simple, 763 [TGSI_OPCODE_EXP] = aos_EXP, 764 [TGSI_OPCODE_LOG] = aos_LOG, 765 [TGSI_OPCODE_MUL] = aos_simple, 766 [TGSI_OPCODE_ADD] = aos_simple, 767 [TGSI_OPCODE_DP3] = aos_simple, 768 [TGSI_OPCODE_DP4] = aos_simple, 769 [TGSI_OPCODE_DST] = aos_DST, 770 [TGSI_OPCODE_MIN] = aos_simple, 771 [TGSI_OPCODE_MAX] = aos_simple, 772 [TGSI_OPCODE_SLT] = aos_set_on_cond, 773 [TGSI_OPCODE_SGE] = aos_set_on_cond, 774 [TGSI_OPCODE_MAD] = aos_simple, 775 [TGSI_OPCODE_LRP] = aos_LRP, 776 [TGSI_OPCODE_SQRT] = aos_simple, 777 [TGSI_OPCODE_DP2A] = aos_DP2A, 778 [TGSI_OPCODE_FRC] = aos_simple, 779 [TGSI_OPCODE_CLAMP] = aos_CLAMP, 780 [TGSI_OPCODE_FLR] = aos_simple, 781 [TGSI_OPCODE_ROUND] = aos_simple, 782 [TGSI_OPCODE_EX2] = aos_simple, 783 [TGSI_OPCODE_LG2] = aos_simple, 784 [TGSI_OPCODE_POW] = aos_simple, 785 [TGSI_OPCODE_XPD] = aos_XPD, 786 [TGSI_OPCODE_DPH] = aos_simple, 787 [TGSI_OPCODE_COS] = aos_simple, 788 [TGSI_OPCODE_DDX] = aos_unsupported, 789 [TGSI_OPCODE_DDY] = aos_unsupported, 790 [TGSI_OPCODE_KILL] = aos_simple, 791 [TGSI_OPCODE_PK2H] = aos_PK2H, 792 [TGSI_OPCODE_PK2US] = aos_unsupported, 793 [TGSI_OPCODE_PK4B] = aos_unsupported, 794 [TGSI_OPCODE_PK4UB] = aos_unsupported, 795 [TGSI_OPCODE_SEQ] = aos_set_on_cond, 796 [TGSI_OPCODE_SGT] = aos_set_on_cond, 797 [TGSI_OPCODE_SIN] = aos_simple, 798 [TGSI_OPCODE_SLE] = aos_set_on_cond, 799 [TGSI_OPCODE_SNE] = aos_set_on_cond, 800 [TGSI_OPCODE_TEX] = aos_tex, 801 [TGSI_OPCODE_TXD] = aos_tex, 802 [TGSI_OPCODE_TXP] = aos_tex, 803 [TGSI_OPCODE_UP2H] = aos_UP2H, 804 [TGSI_OPCODE_UP2US] = aos_unsupported, 805 [TGSI_OPCODE_UP4B] = aos_unsupported, 806 [TGSI_OPCODE_UP4UB] = aos_unsupported, 807 [TGSI_OPCODE_ARR] = aos_simple, 808 [TGSI_OPCODE_CAL] = aos_unsupported, 809 [TGSI_OPCODE_RET] = aos_unsupported, 810 [TGSI_OPCODE_SSG] = aos_set_sign, 811 [TGSI_OPCODE_CMP] = aos_compare, 812 [TGSI_OPCODE_SCS] = aos_SCS, 813 [TGSI_OPCODE_TXB] = aos_tex, 814 [TGSI_OPCODE_DIV] = aos_DIV, 815 [TGSI_OPCODE_DP2] = aos_simple, 816 [TGSI_OPCODE_TXL] = aos_tex, 817 [TGSI_OPCODE_BRK] = aos_BRK, 818 [TGSI_OPCODE_IF] = aos_simple, 819 [TGSI_OPCODE_UIF] = aos_simple, 820 [TGSI_OPCODE_ELSE] = aos_simple, 821 [TGSI_OPCODE_ENDIF] = aos_simple, 822 [TGSI_OPCODE_PUSHA] = aos_unsupported, 823 [TGSI_OPCODE_POPA] = aos_unsupported, 824 [TGSI_OPCODE_CEIL] = aos_CEIL, 825 [TGSI_OPCODE_I2F] = aos_simple, 826 [TGSI_OPCODE_NOT] = aos_simple, 827 [TGSI_OPCODE_TRUNC] = aos_simple, 828 [TGSI_OPCODE_SHL] = aos_simple, 829 [TGSI_OPCODE_AND] = aos_simple, 830 [TGSI_OPCODE_OR] = aos_simple, 831 [TGSI_OPCODE_MOD] = aos_simple, 832 [TGSI_OPCODE_XOR] = aos_simple, 833 [TGSI_OPCODE_SAD] = aos_SAD, 834 [TGSI_OPCODE_TXF] = aos_tex, 835 [TGSI_OPCODE_TXQ] = aos_tex, 836 [TGSI_OPCODE_CONT] = aos_CONT, 837 [TGSI_OPCODE_EMIT] = aos_simple, 838 [TGSI_OPCODE_ENDPRIM] = aos_simple, 839 [TGSI_OPCODE_BGNLOOP] = aos_BGNLOOP, 840 [TGSI_OPCODE_BGNSUB] = aos_unsupported, 841 [TGSI_OPCODE_ENDLOOP] = aos_ENDLOOP, 842 [TGSI_OPCODE_ENDSUB] = aos_unsupported, 843 [TGSI_OPCODE_TXQ_LZ] = aos_tex, 844 [TGSI_OPCODE_NOP] = aos_simple, 845 [TGSI_OPCODE_FSEQ] = aos_set_on_cond, 846 [TGSI_OPCODE_FSGE] = aos_set_on_cond, 847 [TGSI_OPCODE_FSLT] = aos_set_on_cond, 848 [TGSI_OPCODE_FSNE] = aos_set_on_cond, 849 [TGSI_OPCODE_CALLNZ] = aos_unsupported, 850 [TGSI_OPCODE_BREAKC] = aos_unsupported, 851 [TGSI_OPCODE_KILL_IF] = aos_simple, 852 [TGSI_OPCODE_END] = aos_simple, 853 [TGSI_OPCODE_F2I] = aos_simple, 854 [TGSI_OPCODE_IDIV] = aos_simple, 855 [TGSI_OPCODE_IMAX] = aos_simple, 856 [TGSI_OPCODE_IMIN] = aos_simple, 857 [TGSI_OPCODE_INEG] = aos_simple, 858 [TGSI_OPCODE_ISGE] = aos_set_on_cond, 859 [TGSI_OPCODE_ISHR] = aos_simple, 860 [TGSI_OPCODE_ISLT] = aos_set_on_cond, 861 [TGSI_OPCODE_F2U] = aos_simple, 862 [TGSI_OPCODE_U2F] = aos_simple, 863 [TGSI_OPCODE_UADD] = aos_simple, 864 [TGSI_OPCODE_UDIV] = aos_simple, 865 [TGSI_OPCODE_UMAD] = aos_simple, 866 [TGSI_OPCODE_UMAX] = aos_simple, 867 [TGSI_OPCODE_UMIN] = aos_simple, 868 [TGSI_OPCODE_UMOD] = aos_simple, 869 [TGSI_OPCODE_UMUL] = aos_simple, 870 [TGSI_OPCODE_USEQ] = aos_set_on_cond, 871 [TGSI_OPCODE_USGE] = aos_set_on_cond, 872 [TGSI_OPCODE_USHR] = aos_simple, 873 [TGSI_OPCODE_USLT] = aos_set_on_cond, 874 [TGSI_OPCODE_USNE] = aos_set_on_cond, 875 [TGSI_OPCODE_SWITCH] = aos_unsupported, 876 [TGSI_OPCODE_CASE] = aos_unsupported, 877 [TGSI_OPCODE_DEFAULT] = aos_unsupported, 878 [TGSI_OPCODE_ENDSWITCH] = aos_unsupported, 879 [TGSI_OPCODE_SAMPLE] = aos_sample, 880 [TGSI_OPCODE_SAMPLE_I] = aos_sample, 881 [TGSI_OPCODE_SAMPLE_I_MS] = aos_sample, 882 [TGSI_OPCODE_SAMPLE_B] = aos_sample, 883 [TGSI_OPCODE_SAMPLE_C] = aos_sample, 884 [TGSI_OPCODE_SAMPLE_C_LZ] = aos_sample, 885 [TGSI_OPCODE_SAMPLE_D] = aos_sample, 886 [TGSI_OPCODE_SAMPLE_L] = aos_sample, 887 [TGSI_OPCODE_GATHER4] = aos_sample, 888 [TGSI_OPCODE_SVIEWINFO] = aos_sample, 889 [TGSI_OPCODE_SAMPLE_POS] = aos_sample, 890 [TGSI_OPCODE_SAMPLE_INFO] = aos_sample, 891 [TGSI_OPCODE_UARL] = aos_simple, 892 [TGSI_OPCODE_UCMP] = aos_compare, 893 [TGSI_OPCODE_IABS] = aos_simple, 894 [TGSI_OPCODE_ISSG] = aos_set_sign, 895 [TGSI_OPCODE_LOAD] = aos_unsupported, 896 [TGSI_OPCODE_STORE] = aos_unsupported, 897 [TGSI_OPCODE_MFENCE] = aos_unsupported, 898 [TGSI_OPCODE_LFENCE] = aos_unsupported, 899 [TGSI_OPCODE_SFENCE] = aos_unsupported, 900 [TGSI_OPCODE_BARRIER] = aos_unsupported, 901 [TGSI_OPCODE_ATOMUADD] = aos_unsupported, 902 [TGSI_OPCODE_ATOMXCHG] = aos_unsupported, 903 [TGSI_OPCODE_ATOMCAS] = aos_unsupported, 904 [TGSI_OPCODE_ATOMAND] = aos_unsupported, 905 [TGSI_OPCODE_ATOMOR] = aos_unsupported, 906 [TGSI_OPCODE_ATOMXOR] = aos_unsupported, 907 [TGSI_OPCODE_ATOMUMIN] = aos_unsupported, 908 [TGSI_OPCODE_ATOMUMAX] = aos_unsupported, 909 [TGSI_OPCODE_ATOMIMIN] = aos_unsupported, 910 [TGSI_OPCODE_ATOMIMAX] = aos_unsupported, 911 [TGSI_OPCODE_TEX2] = aos_tex, 912 [TGSI_OPCODE_TXB2] = aos_tex, 913 [TGSI_OPCODE_TXL2] = aos_tex, 914 }; 915 916 static void 917 soa_passthrough(struct toy_compiler *tc, 918 const struct tgsi_full_instruction *tgsi_inst, 919 struct toy_dst *dst_, 920 struct toy_src *src_) 921 { 922 const toy_tgsi_translate translate = 923 aos_translate_table[tgsi_inst->Instruction.Opcode]; 924 925 translate(tc, tgsi_inst, dst_, src_); 926 } 927 928 static void 929 soa_per_channel(struct toy_compiler *tc, 930 const struct tgsi_full_instruction *tgsi_inst, 931 struct toy_dst *dst_, 932 struct toy_src *src_) 933 { 934 struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS][4]; 935 struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4]; 936 int i, ch; 937 938 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) 939 tdst_transpose(dst_[i], dst[i]); 940 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) 941 tsrc_transpose(src_[i], src[i]); 942 943 /* emit the same instruction four times for the four channels */ 944 for (ch = 0; ch < 4; ch++) { 945 struct toy_dst aos_dst[TGSI_FULL_MAX_DST_REGISTERS]; 946 struct toy_src aos_src[TGSI_FULL_MAX_SRC_REGISTERS]; 947 948 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) 949 aos_dst[i] = dst[i][ch]; 950 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) 951 aos_src[i] = src[i][ch]; 952 953 aos_translate_table[tgsi_inst->Instruction.Opcode](tc, 954 tgsi_inst, aos_dst, aos_src); 955 } 956 } 957 958 static void 959 soa_scalar_replicate(struct toy_compiler *tc, 960 const struct tgsi_full_instruction *tgsi_inst, 961 struct toy_dst *dst_, 962 struct toy_src *src_) 963 { 964 struct toy_dst dst0[4], tmp; 965 struct toy_src srcx[TGSI_FULL_MAX_SRC_REGISTERS]; 966 int opcode, i; 967 968 assert(tgsi_inst->Instruction.NumDstRegs == 1); 969 970 tdst_transpose(dst_[0], dst0); 971 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) { 972 struct toy_src tmp[4]; 973 974 tsrc_transpose(src_[i], tmp); 975 /* only the X channels */ 976 srcx[i] = tmp[0]; 977 } 978 979 tmp = tc_alloc_tmp(tc); 980 981 opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode; 982 assert(opcode); 983 984 switch (tgsi_inst->Instruction.Opcode) { 985 case TGSI_OPCODE_RCP: 986 case TGSI_OPCODE_RSQ: 987 case TGSI_OPCODE_SQRT: 988 case TGSI_OPCODE_EX2: 989 case TGSI_OPCODE_LG2: 990 case TGSI_OPCODE_COS: 991 case TGSI_OPCODE_SIN: 992 tc_add1(tc, opcode, tmp, srcx[0]); 993 break; 994 case TGSI_OPCODE_POW: 995 tc_add2(tc, opcode, tmp, srcx[0], srcx[1]); 996 break; 997 default: 998 assert(!"invalid soa_scalar_replicate() call"); 999 return; 1000 } 1001 1002 /* replicate the result */ 1003 for (i = 0; i < 4; i++) 1004 tc_MOV(tc, dst0[i], tsrc_from(tmp)); 1005 } 1006 1007 static void 1008 soa_dot_product(struct toy_compiler *tc, 1009 const struct tgsi_full_instruction *tgsi_inst, 1010 struct toy_dst *dst_, 1011 struct toy_src *src_) 1012 { 1013 struct toy_dst dst0[4], tmp; 1014 struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4]; 1015 int i; 1016 1017 tdst_transpose(dst_[0], dst0); 1018 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) 1019 tsrc_transpose(src_[i], src[i]); 1020 1021 tmp = tc_alloc_tmp(tc); 1022 1023 switch (tgsi_inst->Instruction.Opcode) { 1024 case TGSI_OPCODE_DP2: 1025 tc_MUL(tc, tmp, src[0][1], src[1][1]); 1026 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp)); 1027 break; 1028 case TGSI_OPCODE_DP2A: 1029 tc_MAC(tc, tmp, src[0][1], src[1][1], src[2][0]); 1030 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp)); 1031 break; 1032 case TGSI_OPCODE_DP3: 1033 tc_MUL(tc, tmp, src[0][2], src[1][2]); 1034 tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp)); 1035 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp)); 1036 break; 1037 case TGSI_OPCODE_DPH: 1038 tc_MAC(tc, tmp, src[0][2], src[1][2], src[1][3]); 1039 tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp)); 1040 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp)); 1041 break; 1042 case TGSI_OPCODE_DP4: 1043 tc_MUL(tc, tmp, src[0][3], src[1][3]); 1044 tc_MAC(tc, tmp, src[0][2], src[1][2], tsrc_from(tmp)); 1045 tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp)); 1046 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp)); 1047 break; 1048 default: 1049 assert(!"invalid soa_dot_product() call"); 1050 return; 1051 } 1052 1053 for (i = 0; i < 4; i++) 1054 tc_MOV(tc, dst0[i], tsrc_from(tmp)); 1055 } 1056 1057 static void 1058 soa_partial_derivative(struct toy_compiler *tc, 1059 const struct tgsi_full_instruction *tgsi_inst, 1060 struct toy_dst *dst_, 1061 struct toy_src *src_) 1062 { 1063 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_DDX) 1064 tc_add1(tc, TOY_OPCODE_DDX, dst_[0], src_[0]); 1065 else 1066 tc_add1(tc, TOY_OPCODE_DDY, dst_[0], src_[0]); 1067 } 1068 1069 static void 1070 soa_if(struct toy_compiler *tc, 1071 const struct tgsi_full_instruction *tgsi_inst, 1072 struct toy_dst *dst_, 1073 struct toy_src *src_) 1074 { 1075 struct toy_src src0[4]; 1076 1077 assert(tsrc_is_swizzle1(src_[0])); 1078 tsrc_transpose(src_[0], src0); 1079 1080 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_IF) 1081 tc_IF(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), GEN6_COND_NZ); 1082 else 1083 tc_IF(tc, tdst_null(), src0[0], tsrc_imm_d(0), GEN6_COND_NZ); 1084 } 1085 1086 static void 1087 soa_LIT(struct toy_compiler *tc, 1088 const struct tgsi_full_instruction *tgsi_inst, 1089 struct toy_dst *dst_, 1090 struct toy_src *src_) 1091 { 1092 struct toy_inst *inst; 1093 struct toy_dst dst0[4]; 1094 struct toy_src src0[4]; 1095 1096 tdst_transpose(dst_[0], dst0); 1097 tsrc_transpose(src_[0], src0); 1098 1099 tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f)); 1100 tc_MOV(tc, dst0[1], src0[0]); 1101 tc_POW(tc, dst0[2], src0[1], src0[3]); 1102 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); 1103 1104 /* 1105 * POW is calculated first because math with pred_ctrl is broken here. 1106 * But, why? 1107 */ 1108 tc_CMP(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), GEN6_COND_L); 1109 inst = tc_MOV(tc, dst0[1], tsrc_imm_f(0.0f)); 1110 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL; 1111 inst = tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f)); 1112 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL; 1113 } 1114 1115 static void 1116 soa_EXP(struct toy_compiler *tc, 1117 const struct tgsi_full_instruction *tgsi_inst, 1118 struct toy_dst *dst_, 1119 struct toy_src *src_) 1120 { 1121 struct toy_dst dst0[4]; 1122 struct toy_src src0[4]; 1123 1124 assert(!"SoA EXP untested"); 1125 1126 tdst_transpose(dst_[0], dst0); 1127 tsrc_transpose(src_[0], src0); 1128 1129 if (!tdst_is_null(dst0[0])) { 1130 struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc)); 1131 1132 tc_RNDD(tc, tmp, src0[0]); 1133 1134 /* construct the floating point number manually */ 1135 tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127)); 1136 tc_SHL(tc, tdst_d(dst0[0]), tsrc_from(tmp), tsrc_imm_d(23)); 1137 } 1138 1139 tc_FRC(tc, dst0[1], src0[0]); 1140 tc_EXP(tc, dst0[2], src0[0]); 1141 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); 1142 } 1143 1144 static void 1145 soa_LOG(struct toy_compiler *tc, 1146 const struct tgsi_full_instruction *tgsi_inst, 1147 struct toy_dst *dst_, 1148 struct toy_src *src_) 1149 { 1150 struct toy_dst dst0[4]; 1151 struct toy_src src0[4]; 1152 1153 assert(!"SoA LOG untested"); 1154 1155 tdst_transpose(dst_[0], dst0); 1156 tsrc_transpose(src_[0], src0); 1157 1158 if (dst_[0].writemask & TOY_WRITEMASK_XY) { 1159 struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc)); 1160 1161 /* exponent */ 1162 tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0[0])), tsrc_imm_d(23)); 1163 tc_ADD(tc, dst0[0], tsrc_from(tmp), tsrc_imm_d(-127)); 1164 1165 /* mantissa */ 1166 tc_AND(tc, tmp, tsrc_d(src0[0]), tsrc_imm_d((1 << 23) - 1)); 1167 tc_OR(tc, dst0[1], tsrc_from(tmp), tsrc_imm_d(127 << 23)); 1168 } 1169 1170 tc_LOG(tc, dst0[2], src0[0]); 1171 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); 1172 } 1173 1174 static void 1175 soa_DST(struct toy_compiler *tc, 1176 const struct tgsi_full_instruction *tgsi_inst, 1177 struct toy_dst *dst_, 1178 struct toy_src *src_) 1179 { 1180 struct toy_dst dst0[4]; 1181 struct toy_src src[2][4]; 1182 1183 tdst_transpose(dst_[0], dst0); 1184 tsrc_transpose(src_[0], src[0]); 1185 tsrc_transpose(src_[1], src[1]); 1186 1187 tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f)); 1188 tc_MUL(tc, dst0[1], src[0][1], src[1][1]); 1189 tc_MOV(tc, dst0[2], src[0][2]); 1190 tc_MOV(tc, dst0[3], src[1][3]); 1191 } 1192 1193 static void 1194 soa_XPD(struct toy_compiler *tc, 1195 const struct tgsi_full_instruction *tgsi_inst, 1196 struct toy_dst *dst_, 1197 struct toy_src *src_) 1198 { 1199 struct toy_dst dst0[4]; 1200 struct toy_src src[2][4]; 1201 1202 tdst_transpose(dst_[0], dst0); 1203 tsrc_transpose(src_[0], src[0]); 1204 tsrc_transpose(src_[1], src[1]); 1205 1206 /* dst.x = src0.y * src1.z - src1.y * src0.z */ 1207 tc_MUL(tc, dst0[0], src[0][2], src[1][1]); 1208 tc_MAC(tc, dst0[0], src[0][1], src[1][2], tsrc_negate(tsrc_from(dst0[0]))); 1209 1210 /* dst.y = src0.z * src1.x - src1.z * src0.x */ 1211 tc_MUL(tc, dst0[1], src[0][0], src[1][2]); 1212 tc_MAC(tc, dst0[1], src[0][2], src[1][0], tsrc_negate(tsrc_from(dst0[1]))); 1213 1214 /* dst.z = src0.x * src1.y - src1.x * src0.y */ 1215 tc_MUL(tc, dst0[2], src[0][1], src[1][0]); 1216 tc_MAC(tc, dst0[2], src[0][0], src[1][1], tsrc_negate(tsrc_from(dst0[2]))); 1217 1218 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); 1219 } 1220 1221 static void 1222 soa_PK2H(struct toy_compiler *tc, 1223 const struct tgsi_full_instruction *tgsi_inst, 1224 struct toy_dst *dst_, 1225 struct toy_src *src_) 1226 { 1227 struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc)); 1228 struct toy_dst dst0[4]; 1229 struct toy_src src0[4]; 1230 int i; 1231 1232 assert(!"SoA PK2H untested"); 1233 1234 tdst_transpose(dst_[0], dst0); 1235 tsrc_transpose(src_[0], src0); 1236 1237 tc_SHL(tc, tmp, src0[1], tsrc_imm_ud(16)); 1238 tc_OR(tc, tmp, src0[0], tsrc_from(tmp)); 1239 1240 for (i = 0; i < 4; i++) 1241 tc_MOV(tc, dst0[i], tsrc_from(tmp)); 1242 } 1243 1244 static void 1245 soa_UP2H(struct toy_compiler *tc, 1246 const struct tgsi_full_instruction *tgsi_inst, 1247 struct toy_dst *dst_, 1248 struct toy_src *src_) 1249 { 1250 struct toy_dst dst0[4]; 1251 struct toy_src src0[4]; 1252 1253 assert(!"SoA UP2H untested"); 1254 1255 tdst_transpose(dst_[0], dst0); 1256 tsrc_transpose(src_[0], src0); 1257 1258 tc_AND(tc, tdst_ud(dst0[0]), tsrc_ud(src0[0]), tsrc_imm_ud(0xffff)); 1259 tc_SHR(tc, tdst_ud(dst0[1]), tsrc_ud(src0[1]), tsrc_imm_ud(16)); 1260 tc_AND(tc, tdst_ud(dst0[2]), tsrc_ud(src0[2]), tsrc_imm_ud(0xffff)); 1261 tc_SHR(tc, tdst_ud(dst0[3]), tsrc_ud(src0[3]), tsrc_imm_ud(16)); 1262 1263 } 1264 1265 static void 1266 soa_SCS(struct toy_compiler *tc, 1267 const struct tgsi_full_instruction *tgsi_inst, 1268 struct toy_dst *dst_, 1269 struct toy_src *src_) 1270 { 1271 struct toy_dst dst0[4]; 1272 struct toy_src src0[4]; 1273 1274 tdst_transpose(dst_[0], dst0); 1275 tsrc_transpose(src_[0], src0); 1276 1277 tc_add1(tc, TOY_OPCODE_COS, dst0[0], src0[0]); 1278 tc_add1(tc, TOY_OPCODE_SIN, dst0[1], src0[0]); 1279 tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f)); 1280 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); 1281 } 1282 1283 static void 1284 soa_unsupported(struct toy_compiler *tc, 1285 const struct tgsi_full_instruction *tgsi_inst, 1286 struct toy_dst *dst_, 1287 struct toy_src *src_) 1288 { 1289 const struct tgsi_opcode_info *info = 1290 tgsi_get_opcode_info(tgsi_inst->Instruction.Opcode); 1291 1292 ilo_warn("unsupported TGSI opcode in SoA form: TGSI_OPCODE_%s\n", 1293 info->mnemonic); 1294 1295 tc_fail(tc, "unsupported TGSI instruction in SoA form"); 1296 } 1297 1298 static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = { 1299 [TGSI_OPCODE_ARL] = soa_per_channel, 1300 [TGSI_OPCODE_MOV] = soa_per_channel, 1301 [TGSI_OPCODE_LIT] = soa_LIT, 1302 [TGSI_OPCODE_RCP] = soa_scalar_replicate, 1303 [TGSI_OPCODE_RSQ] = soa_scalar_replicate, 1304 [TGSI_OPCODE_EXP] = soa_EXP, 1305 [TGSI_OPCODE_LOG] = soa_LOG, 1306 [TGSI_OPCODE_MUL] = soa_per_channel, 1307 [TGSI_OPCODE_ADD] = soa_per_channel, 1308 [TGSI_OPCODE_DP3] = soa_dot_product, 1309 [TGSI_OPCODE_DP4] = soa_dot_product, 1310 [TGSI_OPCODE_DST] = soa_DST, 1311 [TGSI_OPCODE_MIN] = soa_per_channel, 1312 [TGSI_OPCODE_MAX] = soa_per_channel, 1313 [TGSI_OPCODE_SLT] = soa_per_channel, 1314 [TGSI_OPCODE_SGE] = soa_per_channel, 1315 [TGSI_OPCODE_MAD] = soa_per_channel, 1316 [TGSI_OPCODE_LRP] = soa_per_channel, 1317 [TGSI_OPCODE_SQRT] = soa_scalar_replicate, 1318 [TGSI_OPCODE_DP2A] = soa_dot_product, 1319 [TGSI_OPCODE_FRC] = soa_per_channel, 1320 [TGSI_OPCODE_CLAMP] = soa_per_channel, 1321 [TGSI_OPCODE_FLR] = soa_per_channel, 1322 [TGSI_OPCODE_ROUND] = soa_per_channel, 1323 [TGSI_OPCODE_EX2] = soa_scalar_replicate, 1324 [TGSI_OPCODE_LG2] = soa_scalar_replicate, 1325 [TGSI_OPCODE_POW] = soa_scalar_replicate, 1326 [TGSI_OPCODE_XPD] = soa_XPD, 1327 [TGSI_OPCODE_DPH] = soa_dot_product, 1328 [TGSI_OPCODE_COS] = soa_scalar_replicate, 1329 [TGSI_OPCODE_DDX] = soa_partial_derivative, 1330 [TGSI_OPCODE_DDY] = soa_partial_derivative, 1331 [TGSI_OPCODE_KILL] = soa_passthrough, 1332 [TGSI_OPCODE_PK2H] = soa_PK2H, 1333 [TGSI_OPCODE_PK2US] = soa_unsupported, 1334 [TGSI_OPCODE_PK4B] = soa_unsupported, 1335 [TGSI_OPCODE_PK4UB] = soa_unsupported, 1336 [TGSI_OPCODE_SEQ] = soa_per_channel, 1337 [TGSI_OPCODE_SGT] = soa_per_channel, 1338 [TGSI_OPCODE_SIN] = soa_scalar_replicate, 1339 [TGSI_OPCODE_SLE] = soa_per_channel, 1340 [TGSI_OPCODE_SNE] = soa_per_channel, 1341 [TGSI_OPCODE_TEX] = soa_passthrough, 1342 [TGSI_OPCODE_TXD] = soa_passthrough, 1343 [TGSI_OPCODE_TXP] = soa_passthrough, 1344 [TGSI_OPCODE_UP2H] = soa_UP2H, 1345 [TGSI_OPCODE_UP2US] = soa_unsupported, 1346 [TGSI_OPCODE_UP4B] = soa_unsupported, 1347 [TGSI_OPCODE_UP4UB] = soa_unsupported, 1348 [TGSI_OPCODE_ARR] = soa_per_channel, 1349 [TGSI_OPCODE_CAL] = soa_unsupported, 1350 [TGSI_OPCODE_RET] = soa_unsupported, 1351 [TGSI_OPCODE_SSG] = soa_per_channel, 1352 [TGSI_OPCODE_CMP] = soa_per_channel, 1353 [TGSI_OPCODE_SCS] = soa_SCS, 1354 [TGSI_OPCODE_TXB] = soa_passthrough, 1355 [TGSI_OPCODE_DIV] = soa_per_channel, 1356 [TGSI_OPCODE_DP2] = soa_dot_product, 1357 [TGSI_OPCODE_TXL] = soa_passthrough, 1358 [TGSI_OPCODE_BRK] = soa_passthrough, 1359 [TGSI_OPCODE_IF] = soa_if, 1360 [TGSI_OPCODE_UIF] = soa_if, 1361 [TGSI_OPCODE_ELSE] = soa_passthrough, 1362 [TGSI_OPCODE_ENDIF] = soa_passthrough, 1363 [TGSI_OPCODE_PUSHA] = soa_unsupported, 1364 [TGSI_OPCODE_POPA] = soa_unsupported, 1365 [TGSI_OPCODE_CEIL] = soa_per_channel, 1366 [TGSI_OPCODE_I2F] = soa_per_channel, 1367 [TGSI_OPCODE_NOT] = soa_per_channel, 1368 [TGSI_OPCODE_TRUNC] = soa_per_channel, 1369 [TGSI_OPCODE_SHL] = soa_per_channel, 1370 [TGSI_OPCODE_AND] = soa_per_channel, 1371 [TGSI_OPCODE_OR] = soa_per_channel, 1372 [TGSI_OPCODE_MOD] = soa_per_channel, 1373 [TGSI_OPCODE_XOR] = soa_per_channel, 1374 [TGSI_OPCODE_SAD] = soa_per_channel, 1375 [TGSI_OPCODE_TXF] = soa_passthrough, 1376 [TGSI_OPCODE_TXQ] = soa_passthrough, 1377 [TGSI_OPCODE_CONT] = soa_passthrough, 1378 [TGSI_OPCODE_EMIT] = soa_unsupported, 1379 [TGSI_OPCODE_ENDPRIM] = soa_unsupported, 1380 [TGSI_OPCODE_BGNLOOP] = soa_passthrough, 1381 [TGSI_OPCODE_BGNSUB] = soa_unsupported, 1382 [TGSI_OPCODE_ENDLOOP] = soa_passthrough, 1383 [TGSI_OPCODE_ENDSUB] = soa_unsupported, 1384 [TGSI_OPCODE_TXQ_LZ] = soa_passthrough, 1385 [TGSI_OPCODE_NOP] = soa_passthrough, 1386 [TGSI_OPCODE_FSEQ] = soa_per_channel, 1387 [TGSI_OPCODE_FSGE] = soa_per_channel, 1388 [TGSI_OPCODE_FSLT] = soa_per_channel, 1389 [TGSI_OPCODE_FSNE] = soa_per_channel, 1390 [TGSI_OPCODE_CALLNZ] = soa_unsupported, 1391 [TGSI_OPCODE_BREAKC] = soa_unsupported, 1392 [TGSI_OPCODE_KILL_IF] = soa_passthrough, 1393 [TGSI_OPCODE_END] = soa_passthrough, 1394 [TGSI_OPCODE_F2I] = soa_per_channel, 1395 [TGSI_OPCODE_IDIV] = soa_per_channel, 1396 [TGSI_OPCODE_IMAX] = soa_per_channel, 1397 [TGSI_OPCODE_IMIN] = soa_per_channel, 1398 [TGSI_OPCODE_INEG] = soa_per_channel, 1399 [TGSI_OPCODE_ISGE] = soa_per_channel, 1400 [TGSI_OPCODE_ISHR] = soa_per_channel, 1401 [TGSI_OPCODE_ISLT] = soa_per_channel, 1402 [TGSI_OPCODE_F2U] = soa_per_channel, 1403 [TGSI_OPCODE_U2F] = soa_per_channel, 1404 [TGSI_OPCODE_UADD] = soa_per_channel, 1405 [TGSI_OPCODE_UDIV] = soa_per_channel, 1406 [TGSI_OPCODE_UMAD] = soa_per_channel, 1407 [TGSI_OPCODE_UMAX] = soa_per_channel, 1408 [TGSI_OPCODE_UMIN] = soa_per_channel, 1409 [TGSI_OPCODE_UMOD] = soa_per_channel, 1410 [TGSI_OPCODE_UMUL] = soa_per_channel, 1411 [TGSI_OPCODE_USEQ] = soa_per_channel, 1412 [TGSI_OPCODE_USGE] = soa_per_channel, 1413 [TGSI_OPCODE_USHR] = soa_per_channel, 1414 [TGSI_OPCODE_USLT] = soa_per_channel, 1415 [TGSI_OPCODE_USNE] = soa_per_channel, 1416 [TGSI_OPCODE_SWITCH] = soa_unsupported, 1417 [TGSI_OPCODE_CASE] = soa_unsupported, 1418 [TGSI_OPCODE_DEFAULT] = soa_unsupported, 1419 [TGSI_OPCODE_ENDSWITCH] = soa_unsupported, 1420 [TGSI_OPCODE_SAMPLE] = soa_passthrough, 1421 [TGSI_OPCODE_SAMPLE_I] = soa_passthrough, 1422 [TGSI_OPCODE_SAMPLE_I_MS] = soa_passthrough, 1423 [TGSI_OPCODE_SAMPLE_B] = soa_passthrough, 1424 [TGSI_OPCODE_SAMPLE_C] = soa_passthrough, 1425 [TGSI_OPCODE_SAMPLE_C_LZ] = soa_passthrough, 1426 [TGSI_OPCODE_SAMPLE_D] = soa_passthrough, 1427 [TGSI_OPCODE_SAMPLE_L] = soa_passthrough, 1428 [TGSI_OPCODE_GATHER4] = soa_passthrough, 1429 [TGSI_OPCODE_SVIEWINFO] = soa_passthrough, 1430 [TGSI_OPCODE_SAMPLE_POS] = soa_passthrough, 1431 [TGSI_OPCODE_SAMPLE_INFO] = soa_passthrough, 1432 [TGSI_OPCODE_UARL] = soa_per_channel, 1433 [TGSI_OPCODE_UCMP] = soa_per_channel, 1434 [TGSI_OPCODE_IABS] = soa_per_channel, 1435 [TGSI_OPCODE_ISSG] = soa_per_channel, 1436 [TGSI_OPCODE_LOAD] = soa_unsupported, 1437 [TGSI_OPCODE_STORE] = soa_unsupported, 1438 [TGSI_OPCODE_MFENCE] = soa_unsupported, 1439 [TGSI_OPCODE_LFENCE] = soa_unsupported, 1440 [TGSI_OPCODE_SFENCE] = soa_unsupported, 1441 [TGSI_OPCODE_BARRIER] = soa_unsupported, 1442 [TGSI_OPCODE_ATOMUADD] = soa_unsupported, 1443 [TGSI_OPCODE_ATOMXCHG] = soa_unsupported, 1444 [TGSI_OPCODE_ATOMCAS] = soa_unsupported, 1445 [TGSI_OPCODE_ATOMAND] = soa_unsupported, 1446 [TGSI_OPCODE_ATOMOR] = soa_unsupported, 1447 [TGSI_OPCODE_ATOMXOR] = soa_unsupported, 1448 [TGSI_OPCODE_ATOMUMIN] = soa_unsupported, 1449 [TGSI_OPCODE_ATOMUMAX] = soa_unsupported, 1450 [TGSI_OPCODE_ATOMIMIN] = soa_unsupported, 1451 [TGSI_OPCODE_ATOMIMAX] = soa_unsupported, 1452 [TGSI_OPCODE_TEX2] = soa_passthrough, 1453 [TGSI_OPCODE_TXB2] = soa_passthrough, 1454 [TGSI_OPCODE_TXL2] = soa_passthrough, 1455 }; 1456 1457 static bool 1458 ra_dst_is_indirect(const struct tgsi_full_dst_register *d) 1459 { 1460 return (d->Register.Indirect || 1461 (d->Register.Dimension && d->Dimension.Indirect)); 1462 } 1463 1464 static int 1465 ra_dst_index(const struct tgsi_full_dst_register *d) 1466 { 1467 assert(!d->Register.Indirect); 1468 return d->Register.Index; 1469 } 1470 1471 static int 1472 ra_dst_dimension(const struct tgsi_full_dst_register *d) 1473 { 1474 if (d->Register.Dimension) { 1475 assert(!d->Dimension.Indirect); 1476 return d->Dimension.Index; 1477 } 1478 else { 1479 return 0; 1480 } 1481 } 1482 1483 static bool 1484 ra_is_src_indirect(const struct tgsi_full_src_register *s) 1485 { 1486 return (s->Register.Indirect || 1487 (s->Register.Dimension && s->Dimension.Indirect)); 1488 } 1489 1490 static int 1491 ra_src_index(const struct tgsi_full_src_register *s) 1492 { 1493 assert(!s->Register.Indirect); 1494 return s->Register.Index; 1495 } 1496 1497 static int 1498 ra_src_dimension(const struct tgsi_full_src_register *s) 1499 { 1500 if (s->Register.Dimension) { 1501 assert(!s->Dimension.Indirect); 1502 return s->Dimension.Index; 1503 } 1504 else { 1505 return 0; 1506 } 1507 } 1508 1509 /** 1510 * Infer the type of either the sources or the destination. 1511 */ 1512 static enum toy_type 1513 ra_infer_opcode_type(int tgsi_opcode, bool is_dst) 1514 { 1515 enum tgsi_opcode_type type; 1516 1517 if (is_dst) 1518 type = tgsi_opcode_infer_dst_type(tgsi_opcode); 1519 else 1520 type = tgsi_opcode_infer_src_type(tgsi_opcode); 1521 1522 switch (type) { 1523 case TGSI_TYPE_UNSIGNED: 1524 return TOY_TYPE_UD; 1525 case TGSI_TYPE_SIGNED: 1526 return TOY_TYPE_D; 1527 case TGSI_TYPE_FLOAT: 1528 return TOY_TYPE_F; 1529 case TGSI_TYPE_UNTYPED: 1530 case TGSI_TYPE_VOID: 1531 case TGSI_TYPE_DOUBLE: 1532 default: 1533 assert(!"unsupported TGSI type"); 1534 return TOY_TYPE_UD; 1535 } 1536 } 1537 1538 /** 1539 * Return the type of an operand of the specified instruction. 1540 */ 1541 static enum toy_type 1542 ra_get_type(struct toy_tgsi *tgsi, const struct tgsi_full_instruction *tgsi_inst, 1543 int operand, bool is_dst) 1544 { 1545 enum toy_type type; 1546 enum tgsi_file_type file; 1547 1548 /* we need to look at both src and dst for MOV */ 1549 /* XXX it should not be this complex */ 1550 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_MOV) { 1551 const enum tgsi_file_type dst_file = tgsi_inst->Dst[0].Register.File; 1552 const enum tgsi_file_type src_file = tgsi_inst->Src[0].Register.File; 1553 1554 if (dst_file == TGSI_FILE_ADDRESS || src_file == TGSI_FILE_ADDRESS) { 1555 type = TOY_TYPE_D; 1556 } 1557 else if (src_file == TGSI_FILE_IMMEDIATE && 1558 !tgsi_inst->Src[0].Register.Indirect) { 1559 const int src_idx = tgsi_inst->Src[0].Register.Index; 1560 type = tgsi->imm_data.types[src_idx]; 1561 } 1562 else { 1563 /* this is the best we can do */ 1564 type = TOY_TYPE_F; 1565 } 1566 1567 return type; 1568 } 1569 else if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_UCMP) { 1570 if (!is_dst && operand == 0) 1571 type = TOY_TYPE_UD; 1572 else 1573 type = TOY_TYPE_F; 1574 1575 return type; 1576 } 1577 1578 type = ra_infer_opcode_type(tgsi_inst->Instruction.Opcode, is_dst); 1579 1580 /* fix the type */ 1581 file = (is_dst) ? 1582 tgsi_inst->Dst[operand].Register.File : 1583 tgsi_inst->Src[operand].Register.File; 1584 switch (file) { 1585 case TGSI_FILE_SAMPLER: 1586 case TGSI_FILE_IMAGE: 1587 case TGSI_FILE_SAMPLER_VIEW: 1588 type = TOY_TYPE_D; 1589 break; 1590 case TGSI_FILE_ADDRESS: 1591 assert(type == TOY_TYPE_D); 1592 break; 1593 default: 1594 break; 1595 } 1596 1597 return type; 1598 } 1599 1600 /** 1601 * Allocate a VRF register. 1602 */ 1603 static int 1604 ra_alloc_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file) 1605 { 1606 const int count = (tgsi->aos) ? 1 : 4; 1607 return tc_alloc_vrf(tgsi->tc, count); 1608 } 1609 1610 /** 1611 * Construct the key for VRF mapping look-up. 1612 */ 1613 static void * 1614 ra_get_map_key(enum tgsi_file_type file, unsigned dim, unsigned index) 1615 { 1616 intptr_t key; 1617 1618 /* this is ugly... */ 1619 assert(file < 1 << 4); 1620 assert(dim < 1 << 12); 1621 assert(index < 1 << 16); 1622 key = (file << 28) | (dim << 16) | index; 1623 1624 return intptr_to_pointer(key); 1625 } 1626 1627 /** 1628 * Map a TGSI register to a VRF register. 1629 */ 1630 static int 1631 ra_map_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file, 1632 int dim, int index, bool *is_new) 1633 { 1634 void *key, *val; 1635 intptr_t vrf; 1636 1637 key = ra_get_map_key(file, dim, index); 1638 1639 /* 1640 * because we allocate vrf from 1 and on, val is never NULL as long as the 1641 * key exists 1642 */ 1643 val = util_hash_table_get(tgsi->reg_mapping, key); 1644 if (val) { 1645 vrf = pointer_to_intptr(val); 1646 1647 if (is_new) 1648 *is_new = false; 1649 } 1650 else { 1651 vrf = (intptr_t) ra_alloc_reg(tgsi, file); 1652 1653 /* add to the mapping */ 1654 val = intptr_to_pointer(vrf); 1655 util_hash_table_set(tgsi->reg_mapping, key, val); 1656 1657 if (is_new) 1658 *is_new = true; 1659 } 1660 1661 return (int) vrf; 1662 } 1663 1664 /** 1665 * Return true if the destination aliases any of the sources. 1666 */ 1667 static bool 1668 ra_dst_is_aliasing(const struct tgsi_full_instruction *tgsi_inst, int dst_index) 1669 { 1670 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index]; 1671 int i; 1672 1673 /* we need a scratch register for indirect dst anyway */ 1674 if (ra_dst_is_indirect(d)) 1675 return true; 1676 1677 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) { 1678 const struct tgsi_full_src_register *s = &tgsi_inst->Src[i]; 1679 1680 if (s->Register.File != d->Register.File) 1681 continue; 1682 1683 /* 1684 * we can go on to check dimension and index respectively, but 1685 * keep it simple for now 1686 */ 1687 if (ra_is_src_indirect(s)) 1688 return true; 1689 if (ra_src_dimension(s) == ra_dst_dimension(d) && 1690 ra_src_index(s) == ra_dst_index(d)) 1691 return true; 1692 } 1693 1694 return false; 1695 } 1696 1697 /** 1698 * Return the toy register for a TGSI destination operand. 1699 */ 1700 static struct toy_dst 1701 ra_get_dst(struct toy_tgsi *tgsi, 1702 const struct tgsi_full_instruction *tgsi_inst, int dst_index, 1703 bool *is_scratch) 1704 { 1705 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index]; 1706 bool need_vrf = false; 1707 struct toy_dst dst; 1708 1709 switch (d->Register.File) { 1710 case TGSI_FILE_NULL: 1711 dst = tdst_null(); 1712 break; 1713 case TGSI_FILE_OUTPUT: 1714 case TGSI_FILE_TEMPORARY: 1715 case TGSI_FILE_ADDRESS: 1716 case TGSI_FILE_PREDICATE: 1717 need_vrf = true; 1718 break; 1719 default: 1720 assert(!"unhandled dst file"); 1721 dst = tdst_null(); 1722 break; 1723 } 1724 1725 if (need_vrf) { 1726 /* XXX we do not always need a scratch given the conditions... */ 1727 const bool need_scratch = 1728 (ra_dst_is_indirect(d) || ra_dst_is_aliasing(tgsi_inst, dst_index) || 1729 tgsi_inst->Instruction.Saturate); 1730 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, dst_index, true); 1731 int vrf; 1732 1733 if (need_scratch) { 1734 vrf = ra_alloc_reg(tgsi, d->Register.File); 1735 } 1736 else { 1737 vrf = ra_map_reg(tgsi, d->Register.File, 1738 ra_dst_dimension(d), ra_dst_index(d), NULL); 1739 } 1740 1741 if (is_scratch) 1742 *is_scratch = need_scratch; 1743 1744 dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR, 1745 false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH); 1746 } 1747 1748 return dst; 1749 } 1750 1751 static struct toy_src 1752 ra_get_src_for_vrf(const struct tgsi_full_src_register *s, 1753 enum toy_type type, int vrf) 1754 { 1755 return tsrc_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR, 1756 false, 0, 1757 s->Register.SwizzleX, s->Register.SwizzleY, 1758 s->Register.SwizzleZ, s->Register.SwizzleW, 1759 s->Register.Absolute, s->Register.Negate, 1760 vrf * TOY_REG_WIDTH); 1761 } 1762 1763 static int 1764 init_tgsi_reg(struct toy_tgsi *tgsi, struct toy_inst *inst, 1765 enum tgsi_file_type file, int index, 1766 const struct tgsi_ind_register *indirect, 1767 const struct tgsi_dimension *dimension, 1768 const struct tgsi_ind_register *dim_indirect) 1769 { 1770 struct toy_src src; 1771 int num_src = 0; 1772 1773 /* src[0]: TGSI file */ 1774 inst->src[num_src++] = tsrc_imm_d(file); 1775 1776 /* src[1]: TGSI dimension */ 1777 inst->src[num_src++] = tsrc_imm_d((dimension) ? dimension->Index : 0); 1778 1779 /* src[2]: TGSI dimension indirection */ 1780 if (dim_indirect) { 1781 const int vrf = ra_map_reg(tgsi, dim_indirect->File, 0, 1782 dim_indirect->Index, NULL); 1783 1784 src = tsrc(TOY_FILE_VRF, vrf, 0); 1785 src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle); 1786 } 1787 else { 1788 src = tsrc_imm_d(0); 1789 } 1790 1791 inst->src[num_src++] = src; 1792 1793 /* src[3]: TGSI index */ 1794 inst->src[num_src++] = tsrc_imm_d(index); 1795 1796 /* src[4]: TGSI index indirection */ 1797 if (indirect) { 1798 const int vrf = ra_map_reg(tgsi, indirect->File, 0, 1799 indirect->Index, NULL); 1800 1801 src = tsrc(TOY_FILE_VRF, vrf, 0); 1802 src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle); 1803 } 1804 else { 1805 src = tsrc_imm_d(0); 1806 } 1807 1808 inst->src[num_src++] = src; 1809 1810 return num_src; 1811 } 1812 1813 static struct toy_src 1814 ra_get_src_indirect(struct toy_tgsi *tgsi, 1815 const struct tgsi_full_instruction *tgsi_inst, 1816 int src_index) 1817 { 1818 const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index]; 1819 bool need_vrf = false, is_resource = false; 1820 struct toy_src src; 1821 1822 switch (s->Register.File) { 1823 case TGSI_FILE_NULL: 1824 src = tsrc_null(); 1825 break; 1826 case TGSI_FILE_SAMPLER: 1827 case TGSI_FILE_IMAGE: 1828 case TGSI_FILE_SAMPLER_VIEW: 1829 is_resource = true; 1830 /* fall through */ 1831 case TGSI_FILE_CONSTANT: 1832 case TGSI_FILE_INPUT: 1833 case TGSI_FILE_SYSTEM_VALUE: 1834 case TGSI_FILE_TEMPORARY: 1835 case TGSI_FILE_ADDRESS: 1836 case TGSI_FILE_IMMEDIATE: 1837 case TGSI_FILE_PREDICATE: 1838 need_vrf = true; 1839 break; 1840 default: 1841 assert(!"unhandled src file"); 1842 src = tsrc_null(); 1843 break; 1844 } 1845 1846 if (need_vrf) { 1847 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false); 1848 int vrf; 1849 1850 if (is_resource) { 1851 assert(!s->Register.Dimension); 1852 assert(s->Register.Indirect); 1853 1854 vrf = ra_map_reg(tgsi, s->Indirect.File, 0, s->Indirect.Index, NULL); 1855 } 1856 else { 1857 vrf = ra_alloc_reg(tgsi, s->Register.File); 1858 } 1859 1860 src = ra_get_src_for_vrf(s, type, vrf); 1861 1862 /* emit indirect fetch */ 1863 if (!is_resource) { 1864 struct toy_inst *inst; 1865 1866 inst = tc_add(tgsi->tc); 1867 inst->opcode = TOY_OPCODE_TGSI_INDIRECT_FETCH; 1868 inst->dst = tdst_from(src); 1869 inst->dst.writemask = TOY_WRITEMASK_XYZW; 1870 1871 init_tgsi_reg(tgsi, inst, s->Register.File, s->Register.Index, 1872 (s->Register.Indirect) ? &s->Indirect : NULL, 1873 (s->Register.Dimension) ? &s->Dimension : NULL, 1874 (s->Dimension.Indirect) ? &s->DimIndirect : NULL); 1875 } 1876 } 1877 1878 return src; 1879 } 1880 1881 /** 1882 * Return the toy register for a TGSI source operand. 1883 */ 1884 static struct toy_src 1885 ra_get_src(struct toy_tgsi *tgsi, 1886 const struct tgsi_full_instruction *tgsi_inst, 1887 int src_index) 1888 { 1889 const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index]; 1890 bool need_vrf = false; 1891 struct toy_src src; 1892 1893 if (ra_is_src_indirect(s)) 1894 return ra_get_src_indirect(tgsi, tgsi_inst, src_index); 1895 1896 switch (s->Register.File) { 1897 case TGSI_FILE_NULL: 1898 src = tsrc_null(); 1899 break; 1900 case TGSI_FILE_CONSTANT: 1901 case TGSI_FILE_INPUT: 1902 case TGSI_FILE_SYSTEM_VALUE: 1903 need_vrf = true; 1904 break; 1905 case TGSI_FILE_TEMPORARY: 1906 case TGSI_FILE_ADDRESS: 1907 case TGSI_FILE_PREDICATE: 1908 need_vrf = true; 1909 break; 1910 case TGSI_FILE_SAMPLER: 1911 case TGSI_FILE_IMAGE: 1912 case TGSI_FILE_SAMPLER_VIEW: 1913 assert(!s->Register.Dimension); 1914 src = tsrc_imm_d(s->Register.Index); 1915 break; 1916 case TGSI_FILE_IMMEDIATE: 1917 { 1918 const uint32_t *imm; 1919 enum toy_type imm_type; 1920 bool is_scalar; 1921 1922 imm = toy_tgsi_get_imm(tgsi, s->Register.Index, &imm_type); 1923 1924 is_scalar = 1925 (imm[s->Register.SwizzleX] == imm[s->Register.SwizzleY] && 1926 imm[s->Register.SwizzleX] == imm[s->Register.SwizzleZ] && 1927 imm[s->Register.SwizzleX] == imm[s->Register.SwizzleW]); 1928 1929 if (is_scalar) { 1930 const enum toy_type type = 1931 ra_get_type(tgsi, tgsi_inst, src_index, false); 1932 1933 /* ignore imm_type */ 1934 src = tsrc_imm_ud(imm[s->Register.SwizzleX]); 1935 src.type = type; 1936 src.absolute = s->Register.Absolute; 1937 src.negate = s->Register.Negate; 1938 } 1939 else { 1940 need_vrf = true; 1941 } 1942 } 1943 break; 1944 default: 1945 assert(!"unhandled src file"); 1946 src = tsrc_null(); 1947 break; 1948 } 1949 1950 if (need_vrf) { 1951 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false); 1952 bool is_new; 1953 int vrf; 1954 1955 vrf = ra_map_reg(tgsi, s->Register.File, 1956 ra_src_dimension(s), ra_src_index(s), &is_new); 1957 1958 src = ra_get_src_for_vrf(s, type, vrf); 1959 1960 if (is_new) { 1961 switch (s->Register.File) { 1962 case TGSI_FILE_TEMPORARY: 1963 case TGSI_FILE_ADDRESS: 1964 case TGSI_FILE_PREDICATE: 1965 { 1966 struct toy_dst dst = tdst_from(src); 1967 dst.writemask = TOY_WRITEMASK_XYZW; 1968 1969 /* always initialize registers before use */ 1970 if (tgsi->aos) { 1971 tc_MOV(tgsi->tc, dst, tsrc_type(tsrc_imm_d(0), type)); 1972 } 1973 else { 1974 struct toy_dst tdst[4]; 1975 int i; 1976 1977 tdst_transpose(dst, tdst); 1978 1979 for (i = 0; i < 4; i++) { 1980 tc_MOV(tgsi->tc, tdst[i], 1981 tsrc_type(tsrc_imm_d(0), type)); 1982 } 1983 } 1984 } 1985 break; 1986 default: 1987 break; 1988 } 1989 } 1990 1991 } 1992 1993 return src; 1994 } 1995 1996 static void 1997 parse_instruction(struct toy_tgsi *tgsi, 1998 const struct tgsi_full_instruction *tgsi_inst) 1999 { 2000 struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS]; 2001 struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS]; 2002 bool dst_is_scratch[TGSI_FULL_MAX_DST_REGISTERS]; 2003 toy_tgsi_translate translate; 2004 int i; 2005 2006 /* convert TGSI registers to toy registers */ 2007 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) 2008 src[i] = ra_get_src(tgsi, tgsi_inst, i); 2009 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) 2010 dst[i] = ra_get_dst(tgsi, tgsi_inst, i, &dst_is_scratch[i]); 2011 2012 /* translate the instruction */ 2013 translate = tgsi->translate_table[tgsi_inst->Instruction.Opcode]; 2014 if (!translate) { 2015 if (tgsi->translate_table == soa_translate_table) 2016 soa_unsupported(tgsi->tc, tgsi_inst, dst, src); 2017 else 2018 aos_unsupported(tgsi->tc, tgsi_inst, dst, src); 2019 } 2020 translate(tgsi->tc, tgsi_inst, dst, src); 2021 2022 /* write the result to the real destinations if needed */ 2023 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) { 2024 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i]; 2025 2026 if (!dst_is_scratch[i]) 2027 continue; 2028 2029 tgsi->tc->templ.saturate = tgsi_inst->Instruction.Saturate; 2030 2031 /* emit indirect store */ 2032 if (ra_dst_is_indirect(d)) { 2033 struct toy_inst *inst; 2034 2035 inst = tc_add(tgsi->tc); 2036 inst->opcode = TOY_OPCODE_TGSI_INDIRECT_STORE; 2037 inst->dst = dst[i]; 2038 2039 init_tgsi_reg(tgsi, inst, d->Register.File, d->Register.Index, 2040 (d->Register.Indirect) ? &d->Indirect : NULL, 2041 (d->Register.Dimension) ? &d->Dimension : NULL, 2042 (d->Dimension.Indirect) ? &d->DimIndirect : NULL); 2043 } 2044 else { 2045 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, i, true); 2046 struct toy_dst real_dst; 2047 int vrf; 2048 2049 vrf = ra_map_reg(tgsi, d->Register.File, 2050 ra_dst_dimension(d), ra_dst_index(d), NULL); 2051 real_dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR, 2052 false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH); 2053 2054 if (tgsi->aos) { 2055 tc_MOV(tgsi->tc, real_dst, tsrc_from(dst[i])); 2056 } 2057 else { 2058 struct toy_dst tdst[4]; 2059 struct toy_src tsrc[4]; 2060 int j; 2061 2062 tdst_transpose(real_dst, tdst); 2063 tsrc_transpose(tsrc_from(dst[i]), tsrc); 2064 2065 for (j = 0; j < 4; j++) 2066 tc_MOV(tgsi->tc, tdst[j], tsrc[j]); 2067 } 2068 } 2069 2070 tgsi->tc->templ.saturate = false; 2071 } 2072 2073 switch (tgsi_inst->Instruction.Opcode) { 2074 case TGSI_OPCODE_KILL_IF: 2075 case TGSI_OPCODE_KILL: 2076 tgsi->uses_kill = true; 2077 break; 2078 } 2079 2080 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) { 2081 const struct tgsi_full_src_register *s = &tgsi_inst->Src[i]; 2082 if (s->Register.File == TGSI_FILE_CONSTANT && s->Register.Indirect) 2083 tgsi->const_indirect = true; 2084 } 2085 2086 /* remember channels written */ 2087 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) { 2088 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i]; 2089 2090 if (d->Register.File != TGSI_FILE_OUTPUT) 2091 continue; 2092 for (i = 0; i < tgsi->num_outputs; i++) { 2093 if (tgsi->outputs[i].index == d->Register.Index) { 2094 tgsi->outputs[i].undefined_mask &= ~d->Register.WriteMask; 2095 break; 2096 } 2097 } 2098 } 2099 } 2100 2101 static void 2102 decl_add_in(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl) 2103 { 2104 static const struct tgsi_declaration_interp default_interp = { 2105 TGSI_INTERPOLATE_PERSPECTIVE, false, 0, 2106 }; 2107 const struct tgsi_declaration_interp *interp = 2108 (decl->Declaration.Interpolate) ? &decl->Interp: &default_interp; 2109 int index; 2110 2111 if (decl->Range.Last >= ARRAY_SIZE(tgsi->inputs)) { 2112 assert(!"invalid IN"); 2113 return; 2114 } 2115 2116 for (index = decl->Range.First; index <= decl->Range.Last; index++) { 2117 const int slot = tgsi->num_inputs++; 2118 2119 tgsi->inputs[slot].index = index; 2120 tgsi->inputs[slot].usage_mask = decl->Declaration.UsageMask; 2121 if (decl->Declaration.Semantic) { 2122 tgsi->inputs[slot].semantic_name = decl->Semantic.Name; 2123 tgsi->inputs[slot].semantic_index = decl->Semantic.Index; 2124 } 2125 else { 2126 tgsi->inputs[slot].semantic_name = TGSI_SEMANTIC_GENERIC; 2127 tgsi->inputs[slot].semantic_index = index; 2128 } 2129 tgsi->inputs[slot].interp = interp->Interpolate; 2130 tgsi->inputs[slot].centroid = interp->Location == TGSI_INTERPOLATE_LOC_CENTROID; 2131 } 2132 } 2133 2134 static void 2135 decl_add_out(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl) 2136 { 2137 int index; 2138 2139 if (decl->Range.Last >= ARRAY_SIZE(tgsi->outputs)) { 2140 assert(!"invalid OUT"); 2141 return; 2142 } 2143 2144 assert(decl->Declaration.Semantic); 2145 2146 for (index = decl->Range.First; index <= decl->Range.Last; index++) { 2147 const int slot = tgsi->num_outputs++; 2148 2149 tgsi->outputs[slot].index = index; 2150 tgsi->outputs[slot].undefined_mask = TOY_WRITEMASK_XYZW; 2151 tgsi->outputs[slot].usage_mask = decl->Declaration.UsageMask; 2152 tgsi->outputs[slot].semantic_name = decl->Semantic.Name; 2153 tgsi->outputs[slot].semantic_index = decl->Semantic.Index; 2154 } 2155 } 2156 2157 static void 2158 decl_add_sv(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl) 2159 { 2160 int index; 2161 2162 if (decl->Range.Last >= ARRAY_SIZE(tgsi->system_values)) { 2163 assert(!"invalid SV"); 2164 return; 2165 } 2166 2167 for (index = decl->Range.First; index <= decl->Range.Last; index++) { 2168 const int slot = tgsi->num_system_values++; 2169 2170 tgsi->system_values[slot].index = index; 2171 if (decl->Declaration.Semantic) { 2172 tgsi->system_values[slot].semantic_name = decl->Semantic.Name; 2173 tgsi->system_values[slot].semantic_index = decl->Semantic.Index; 2174 } 2175 else { 2176 tgsi->system_values[slot].semantic_name = TGSI_SEMANTIC_GENERIC; 2177 tgsi->system_values[slot].semantic_index = index; 2178 } 2179 } 2180 } 2181 2182 /** 2183 * Emit an instruction to fetch the value of a TGSI register. 2184 */ 2185 static void 2186 fetch_source(struct toy_tgsi *tgsi, enum tgsi_file_type file, int dim, int idx) 2187 { 2188 struct toy_dst dst; 2189 int vrf; 2190 enum toy_opcode opcode; 2191 enum toy_type type = TOY_TYPE_F; 2192 2193 switch (file) { 2194 case TGSI_FILE_INPUT: 2195 opcode = TOY_OPCODE_TGSI_IN; 2196 break; 2197 case TGSI_FILE_CONSTANT: 2198 opcode = TOY_OPCODE_TGSI_CONST; 2199 break; 2200 case TGSI_FILE_SYSTEM_VALUE: 2201 opcode = TOY_OPCODE_TGSI_SV; 2202 break; 2203 case TGSI_FILE_IMMEDIATE: 2204 opcode = TOY_OPCODE_TGSI_IMM; 2205 toy_tgsi_get_imm(tgsi, idx, &type); 2206 break; 2207 default: 2208 /* no need to fetch */ 2209 return; 2210 break; 2211 } 2212 2213 vrf = ra_map_reg(tgsi, file, dim, idx, NULL); 2214 dst = tdst(TOY_FILE_VRF, vrf, 0); 2215 dst = tdst_type(dst, type); 2216 2217 tc_add2(tgsi->tc, opcode, dst, tsrc_imm_d(dim), tsrc_imm_d(idx)); 2218 } 2219 2220 static void 2221 parse_declaration(struct toy_tgsi *tgsi, 2222 const struct tgsi_full_declaration *decl) 2223 { 2224 int i; 2225 2226 switch (decl->Declaration.File) { 2227 case TGSI_FILE_INPUT: 2228 decl_add_in(tgsi, decl); 2229 break; 2230 case TGSI_FILE_OUTPUT: 2231 decl_add_out(tgsi, decl); 2232 break; 2233 case TGSI_FILE_SYSTEM_VALUE: 2234 decl_add_sv(tgsi, decl); 2235 break; 2236 case TGSI_FILE_IMMEDIATE: 2237 /* immediates should be declared with TGSI_TOKEN_TYPE_IMMEDIATE */ 2238 assert(!"unexpected immediate declaration"); 2239 break; 2240 case TGSI_FILE_CONSTANT: 2241 if (tgsi->const_count <= decl->Range.Last) 2242 tgsi->const_count = decl->Range.Last + 1; 2243 break; 2244 case TGSI_FILE_NULL: 2245 case TGSI_FILE_TEMPORARY: 2246 case TGSI_FILE_SAMPLER: 2247 case TGSI_FILE_PREDICATE: 2248 case TGSI_FILE_ADDRESS: 2249 case TGSI_FILE_IMAGE: 2250 case TGSI_FILE_SAMPLER_VIEW: 2251 /* nothing to do */ 2252 break; 2253 default: 2254 assert(!"unhandled TGSI file"); 2255 break; 2256 } 2257 2258 /* fetch the registers now */ 2259 for (i = decl->Range.First; i <= decl->Range.Last; i++) { 2260 const int dim = (decl->Declaration.Dimension) ? decl->Dim.Index2D : 0; 2261 fetch_source(tgsi, decl->Declaration.File, dim, i); 2262 } 2263 } 2264 2265 static int 2266 add_imm(struct toy_tgsi *tgsi, enum toy_type type, const uint32_t *buf) 2267 { 2268 /* reallocate the buffer if necessary */ 2269 if (tgsi->imm_data.cur >= tgsi->imm_data.size) { 2270 const int cur_size = tgsi->imm_data.size; 2271 int new_size; 2272 enum toy_type *new_types; 2273 uint32_t (*new_buf)[4]; 2274 2275 new_size = (cur_size) ? cur_size << 1 : 16; 2276 while (new_size <= tgsi->imm_data.cur) 2277 new_size <<= 1; 2278 2279 new_buf = REALLOC(tgsi->imm_data.buf, 2280 cur_size * sizeof(new_buf[0]), 2281 new_size * sizeof(new_buf[0])); 2282 new_types = REALLOC(tgsi->imm_data.types, 2283 cur_size * sizeof(new_types[0]), 2284 new_size * sizeof(new_types[0])); 2285 if (!new_buf || !new_types) { 2286 FREE(new_buf); 2287 FREE(new_types); 2288 return -1; 2289 } 2290 2291 tgsi->imm_data.buf = new_buf; 2292 tgsi->imm_data.types = new_types; 2293 tgsi->imm_data.size = new_size; 2294 } 2295 2296 tgsi->imm_data.types[tgsi->imm_data.cur] = type; 2297 memcpy(&tgsi->imm_data.buf[tgsi->imm_data.cur], 2298 buf, sizeof(tgsi->imm_data.buf[0])); 2299 2300 return tgsi->imm_data.cur++; 2301 } 2302 2303 static void 2304 parse_immediate(struct toy_tgsi *tgsi, const struct tgsi_full_immediate *imm) 2305 { 2306 enum toy_type type; 2307 uint32_t imm_buf[4]; 2308 int idx; 2309 2310 switch (imm->Immediate.DataType) { 2311 case TGSI_IMM_FLOAT32: 2312 type = TOY_TYPE_F; 2313 imm_buf[0] = fui(imm->u[0].Float); 2314 imm_buf[1] = fui(imm->u[1].Float); 2315 imm_buf[2] = fui(imm->u[2].Float); 2316 imm_buf[3] = fui(imm->u[3].Float); 2317 break; 2318 case TGSI_IMM_INT32: 2319 type = TOY_TYPE_D; 2320 imm_buf[0] = (uint32_t) imm->u[0].Int; 2321 imm_buf[1] = (uint32_t) imm->u[1].Int; 2322 imm_buf[2] = (uint32_t) imm->u[2].Int; 2323 imm_buf[3] = (uint32_t) imm->u[3].Int; 2324 break; 2325 case TGSI_IMM_UINT32: 2326 type = TOY_TYPE_UD; 2327 imm_buf[0] = imm->u[0].Uint; 2328 imm_buf[1] = imm->u[1].Uint; 2329 imm_buf[2] = imm->u[2].Uint; 2330 imm_buf[3] = imm->u[3].Uint; 2331 break; 2332 default: 2333 assert(!"unhandled TGSI imm type"); 2334 type = TOY_TYPE_F; 2335 memset(imm_buf, 0, sizeof(imm_buf)); 2336 break; 2337 } 2338 2339 idx = add_imm(tgsi, type, imm_buf); 2340 if (idx >= 0) 2341 fetch_source(tgsi, TGSI_FILE_IMMEDIATE, 0, idx); 2342 else 2343 tc_fail(tgsi->tc, "failed to add TGSI imm"); 2344 } 2345 2346 static void 2347 parse_property(struct toy_tgsi *tgsi, const struct tgsi_full_property *prop) 2348 { 2349 switch (prop->Property.PropertyName) { 2350 case TGSI_PROPERTY_VS_PROHIBIT_UCPS: 2351 tgsi->props.vs_prohibit_ucps = prop->u[0].Data; 2352 break; 2353 case TGSI_PROPERTY_FS_COORD_ORIGIN: 2354 tgsi->props.fs_coord_origin = prop->u[0].Data; 2355 break; 2356 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: 2357 tgsi->props.fs_coord_pixel_center = prop->u[0].Data; 2358 break; 2359 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: 2360 tgsi->props.fs_color0_writes_all_cbufs = prop->u[0].Data; 2361 break; 2362 case TGSI_PROPERTY_FS_DEPTH_LAYOUT: 2363 tgsi->props.fs_depth_layout = prop->u[0].Data; 2364 break; 2365 case TGSI_PROPERTY_GS_INPUT_PRIM: 2366 tgsi->props.gs_input_prim = prop->u[0].Data; 2367 break; 2368 case TGSI_PROPERTY_GS_OUTPUT_PRIM: 2369 tgsi->props.gs_output_prim = prop->u[0].Data; 2370 break; 2371 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: 2372 tgsi->props.gs_max_output_vertices = prop->u[0].Data; 2373 break; 2374 default: 2375 assert(!"unhandled TGSI property"); 2376 break; 2377 } 2378 } 2379 2380 static void 2381 parse_token(struct toy_tgsi *tgsi, const union tgsi_full_token *token) 2382 { 2383 switch (token->Token.Type) { 2384 case TGSI_TOKEN_TYPE_DECLARATION: 2385 parse_declaration(tgsi, &token->FullDeclaration); 2386 break; 2387 case TGSI_TOKEN_TYPE_IMMEDIATE: 2388 parse_immediate(tgsi, &token->FullImmediate); 2389 break; 2390 case TGSI_TOKEN_TYPE_INSTRUCTION: 2391 parse_instruction(tgsi, &token->FullInstruction); 2392 break; 2393 case TGSI_TOKEN_TYPE_PROPERTY: 2394 parse_property(tgsi, &token->FullProperty); 2395 break; 2396 default: 2397 assert(!"unhandled TGSI token type"); 2398 break; 2399 } 2400 } 2401 2402 static enum pipe_error 2403 dump_reg_mapping(void *key, void *val, void *data) 2404 { 2405 int tgsi_file, tgsi_dim, tgsi_index; 2406 uint32_t sig, vrf; 2407 2408 sig = (uint32_t) pointer_to_intptr(key); 2409 vrf = (uint32_t) pointer_to_intptr(val); 2410 2411 /* see ra_get_map_key() */ 2412 tgsi_file = (sig >> 28) & 0xf; 2413 tgsi_dim = (sig >> 16) & 0xfff; 2414 tgsi_index = (sig >> 0) & 0xffff; 2415 2416 if (tgsi_dim) { 2417 ilo_printf(" v%d:\t%s[%d][%d]\n", vrf, 2418 tgsi_file_name(tgsi_file), tgsi_dim, tgsi_index); 2419 } 2420 else { 2421 ilo_printf(" v%d:\t%s[%d]\n", vrf, 2422 tgsi_file_name(tgsi_file), tgsi_index); 2423 } 2424 2425 return PIPE_OK; 2426 } 2427 2428 /** 2429 * Dump the TGSI translator, currently only the register mapping. 2430 */ 2431 void 2432 toy_tgsi_dump(const struct toy_tgsi *tgsi) 2433 { 2434 util_hash_table_foreach(tgsi->reg_mapping, dump_reg_mapping, NULL); 2435 } 2436 2437 /** 2438 * Clean up the TGSI translator. 2439 */ 2440 void 2441 toy_tgsi_cleanup(struct toy_tgsi *tgsi) 2442 { 2443 FREE(tgsi->imm_data.buf); 2444 FREE(tgsi->imm_data.types); 2445 2446 util_hash_table_destroy(tgsi->reg_mapping); 2447 } 2448 2449 static unsigned 2450 reg_mapping_hash(void *key) 2451 { 2452 return (unsigned) pointer_to_intptr(key); 2453 } 2454 2455 static int 2456 reg_mapping_compare(void *key1, void *key2) 2457 { 2458 return (key1 != key2); 2459 } 2460 2461 /** 2462 * Initialize the TGSI translator. 2463 */ 2464 static bool 2465 init_tgsi(struct toy_tgsi *tgsi, struct toy_compiler *tc, bool aos) 2466 { 2467 memset(tgsi, 0, sizeof(*tgsi)); 2468 2469 tgsi->tc = tc; 2470 tgsi->aos = aos; 2471 tgsi->translate_table = (aos) ? aos_translate_table : soa_translate_table; 2472 2473 /* create a mapping of TGSI registers to VRF reigsters */ 2474 tgsi->reg_mapping = 2475 util_hash_table_create(reg_mapping_hash, reg_mapping_compare); 2476 2477 return (tgsi->reg_mapping != NULL); 2478 } 2479 2480 /** 2481 * Translate TGSI tokens into toy instructions. 2482 */ 2483 void 2484 toy_compiler_translate_tgsi(struct toy_compiler *tc, 2485 const struct tgsi_token *tokens, bool aos, 2486 struct toy_tgsi *tgsi) 2487 { 2488 struct tgsi_parse_context parse; 2489 2490 if (!init_tgsi(tgsi, tc, aos)) { 2491 tc_fail(tc, "failed to initialize TGSI translator"); 2492 return; 2493 } 2494 2495 tgsi_parse_init(&parse, tokens); 2496 while (!tgsi_parse_end_of_tokens(&parse)) { 2497 tgsi_parse_token(&parse); 2498 parse_token(tgsi, &parse.FullToken); 2499 } 2500 tgsi_parse_free(&parse); 2501 } 2502 2503 /** 2504 * Map the TGSI register to VRF register. 2505 */ 2506 int 2507 toy_tgsi_get_vrf(const struct toy_tgsi *tgsi, 2508 enum tgsi_file_type file, int dimension, int index) 2509 { 2510 void *key, *val; 2511 2512 key = ra_get_map_key(file, dimension, index); 2513 2514 val = util_hash_table_get(tgsi->reg_mapping, key); 2515 2516 return (val) ? pointer_to_intptr(val) : -1; 2517 } 2518