1 /* 2 * Copyright 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "ir.h" 25 #include "ir_builder.h" 26 #include "ir_optimization.h" 27 #include "ir_rvalue_visitor.h" 28 29 namespace { 30 31 using namespace ir_builder; 32 33 /** 34 * A visitor that lowers built-in floating-point pack/unpack expressions 35 * such packSnorm2x16. 36 */ 37 class lower_packing_builtins_visitor : public ir_rvalue_visitor { 38 public: 39 /** 40 * \param op_mask is a bitmask of `enum lower_packing_builtins_op` 41 */ 42 explicit lower_packing_builtins_visitor(int op_mask) 43 : op_mask(op_mask), 44 progress(false) 45 { 46 factory.instructions = &factory_instructions; 47 } 48 49 virtual ~lower_packing_builtins_visitor() 50 { 51 assert(factory_instructions.is_empty()); 52 } 53 54 bool get_progress() { return progress; } 55 56 void handle_rvalue(ir_rvalue **rvalue) 57 { 58 if (!*rvalue) 59 return; 60 61 ir_expression *expr = (*rvalue)->as_expression(); 62 if (!expr) 63 return; 64 65 enum lower_packing_builtins_op lowering_op = 66 choose_lowering_op(expr->operation); 67 68 if (lowering_op == LOWER_PACK_UNPACK_NONE) 69 return; 70 71 setup_factory(ralloc_parent(expr)); 72 73 ir_rvalue *op0 = expr->operands[0]; 74 ralloc_steal(factory.mem_ctx, op0); 75 76 switch (lowering_op) { 77 case LOWER_PACK_SNORM_2x16: 78 *rvalue = lower_pack_snorm_2x16(op0); 79 break; 80 case LOWER_PACK_SNORM_4x8: 81 *rvalue = lower_pack_snorm_4x8(op0); 82 break; 83 case LOWER_PACK_UNORM_2x16: 84 *rvalue = lower_pack_unorm_2x16(op0); 85 break; 86 case LOWER_PACK_UNORM_4x8: 87 *rvalue = lower_pack_unorm_4x8(op0); 88 break; 89 case LOWER_PACK_HALF_2x16: 90 *rvalue = lower_pack_half_2x16(op0); 91 break; 92 case LOWER_UNPACK_SNORM_2x16: 93 *rvalue = lower_unpack_snorm_2x16(op0); 94 break; 95 case LOWER_UNPACK_SNORM_4x8: 96 *rvalue = lower_unpack_snorm_4x8(op0); 97 break; 98 case LOWER_UNPACK_UNORM_2x16: 99 *rvalue = lower_unpack_unorm_2x16(op0); 100 break; 101 case LOWER_UNPACK_UNORM_4x8: 102 *rvalue = lower_unpack_unorm_4x8(op0); 103 break; 104 case LOWER_UNPACK_HALF_2x16: 105 *rvalue = lower_unpack_half_2x16(op0); 106 break; 107 case LOWER_PACK_UNPACK_NONE: 108 case LOWER_PACK_USE_BFI: 109 case LOWER_PACK_USE_BFE: 110 assert(!"not reached"); 111 break; 112 } 113 114 teardown_factory(); 115 progress = true; 116 } 117 118 private: 119 const int op_mask; 120 bool progress; 121 ir_factory factory; 122 exec_list factory_instructions; 123 124 /** 125 * Determine the needed lowering operation by filtering \a expr_op 126 * through \ref op_mask. 127 */ 128 enum lower_packing_builtins_op 129 choose_lowering_op(ir_expression_operation expr_op) 130 { 131 /* C++ regards int and enum as fundamentally different types. 132 * So, we can't simply return from each case; we must cast the return 133 * value. 134 */ 135 int result; 136 137 switch (expr_op) { 138 case ir_unop_pack_snorm_2x16: 139 result = op_mask & LOWER_PACK_SNORM_2x16; 140 break; 141 case ir_unop_pack_snorm_4x8: 142 result = op_mask & LOWER_PACK_SNORM_4x8; 143 break; 144 case ir_unop_pack_unorm_2x16: 145 result = op_mask & LOWER_PACK_UNORM_2x16; 146 break; 147 case ir_unop_pack_unorm_4x8: 148 result = op_mask & LOWER_PACK_UNORM_4x8; 149 break; 150 case ir_unop_pack_half_2x16: 151 result = op_mask & LOWER_PACK_HALF_2x16; 152 break; 153 case ir_unop_unpack_snorm_2x16: 154 result = op_mask & LOWER_UNPACK_SNORM_2x16; 155 break; 156 case ir_unop_unpack_snorm_4x8: 157 result = op_mask & LOWER_UNPACK_SNORM_4x8; 158 break; 159 case ir_unop_unpack_unorm_2x16: 160 result = op_mask & LOWER_UNPACK_UNORM_2x16; 161 break; 162 case ir_unop_unpack_unorm_4x8: 163 result = op_mask & LOWER_UNPACK_UNORM_4x8; 164 break; 165 case ir_unop_unpack_half_2x16: 166 result = op_mask & LOWER_UNPACK_HALF_2x16; 167 break; 168 default: 169 result = LOWER_PACK_UNPACK_NONE; 170 break; 171 } 172 173 return static_cast<enum lower_packing_builtins_op>(result); 174 } 175 176 void 177 setup_factory(void *mem_ctx) 178 { 179 assert(factory.mem_ctx == NULL); 180 assert(factory.instructions->is_empty()); 181 182 factory.mem_ctx = mem_ctx; 183 } 184 185 void 186 teardown_factory() 187 { 188 base_ir->insert_before(factory.instructions); 189 assert(factory.instructions->is_empty()); 190 factory.mem_ctx = NULL; 191 } 192 193 template <typename T> 194 ir_constant* 195 constant(T x) 196 { 197 return factory.constant(x); 198 } 199 200 /** 201 * \brief Pack two uint16's into a single uint32. 202 * 203 * Interpret the given uvec2 as a uint16 pair. Pack the pair into a uint32 204 * where the least significant bits specify the first element of the pair. 205 * Return the uint32. 206 */ 207 ir_rvalue* 208 pack_uvec2_to_uint(ir_rvalue *uvec2_rval) 209 { 210 assert(uvec2_rval->type == glsl_type::uvec2_type); 211 212 /* uvec2 u = UVEC2_RVAL; */ 213 ir_variable *u = factory.make_temp(glsl_type::uvec2_type, 214 "tmp_pack_uvec2_to_uint"); 215 factory.emit(assign(u, uvec2_rval)); 216 217 if (op_mask & LOWER_PACK_USE_BFI) { 218 return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)), 219 swizzle_y(u), 220 constant(16u), 221 constant(16u)); 222 } 223 224 /* return (u.y << 16) | (u.x & 0xffff); */ 225 return bit_or(lshift(swizzle_y(u), constant(16u)), 226 bit_and(swizzle_x(u), constant(0xffffu))); 227 } 228 229 /** 230 * \brief Pack four uint8's into a single uint32. 231 * 232 * Interpret the given uvec4 as a uint32 4-typle. Pack the 4-tuple into a 233 * uint32 where the least significant bits specify the first element of the 234 * 4-tuple. Return the uint32. 235 */ 236 ir_rvalue* 237 pack_uvec4_to_uint(ir_rvalue *uvec4_rval) 238 { 239 assert(uvec4_rval->type == glsl_type::uvec4_type); 240 241 ir_variable *u = factory.make_temp(glsl_type::uvec4_type, 242 "tmp_pack_uvec4_to_uint"); 243 244 if (op_mask & LOWER_PACK_USE_BFI) { 245 /* uvec4 u = UVEC4_RVAL; */ 246 factory.emit(assign(u, uvec4_rval)); 247 248 return bitfield_insert(bitfield_insert( 249 bitfield_insert( 250 bit_and(swizzle_x(u), constant(0xffu)), 251 swizzle_y(u), constant(8u), constant(8u)), 252 swizzle_z(u), constant(16u), constant(8u)), 253 swizzle_w(u), constant(24u), constant(8u)); 254 } 255 256 /* uvec4 u = UVEC4_RVAL & 0xff */ 257 factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu)))); 258 259 /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */ 260 return bit_or(bit_or(lshift(swizzle_w(u), constant(24u)), 261 lshift(swizzle_z(u), constant(16u))), 262 bit_or(lshift(swizzle_y(u), constant(8u)), 263 swizzle_x(u))); 264 } 265 266 /** 267 * \brief Unpack a uint32 into two uint16's. 268 * 269 * Interpret the given uint32 as a uint16 pair where the uint32's least 270 * significant bits specify the pair's first element. Return the uint16 271 * pair as a uvec2. 272 */ 273 ir_rvalue* 274 unpack_uint_to_uvec2(ir_rvalue *uint_rval) 275 { 276 assert(uint_rval->type == glsl_type::uint_type); 277 278 /* uint u = UINT_RVAL; */ 279 ir_variable *u = factory.make_temp(glsl_type::uint_type, 280 "tmp_unpack_uint_to_uvec2_u"); 281 factory.emit(assign(u, uint_rval)); 282 283 /* uvec2 u2; */ 284 ir_variable *u2 = factory.make_temp(glsl_type::uvec2_type, 285 "tmp_unpack_uint_to_uvec2_u2"); 286 287 /* u2.x = u & 0xffffu; */ 288 factory.emit(assign(u2, bit_and(u, constant(0xffffu)), WRITEMASK_X)); 289 290 /* u2.y = u >> 16u; */ 291 factory.emit(assign(u2, rshift(u, constant(16u)), WRITEMASK_Y)); 292 293 return deref(u2).val; 294 } 295 296 /** 297 * \brief Unpack a uint32 into two int16's. 298 * 299 * Specifically each 16-bit value is sign-extended to the full width of an 300 * int32 on return. 301 */ 302 ir_rvalue * 303 unpack_uint_to_ivec2(ir_rvalue *uint_rval) 304 { 305 assert(uint_rval->type == glsl_type::uint_type); 306 307 if (!(op_mask & LOWER_PACK_USE_BFE)) { 308 return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), 309 constant(16u)), 310 constant(16u)); 311 } 312 313 ir_variable *i = factory.make_temp(glsl_type::int_type, 314 "tmp_unpack_uint_to_ivec2_i"); 315 factory.emit(assign(i, u2i(uint_rval))); 316 317 /* ivec2 i2; */ 318 ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type, 319 "tmp_unpack_uint_to_ivec2_i2"); 320 321 factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)), 322 WRITEMASK_X)); 323 factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)), 324 WRITEMASK_Y)); 325 326 return deref(i2).val; 327 } 328 329 /** 330 * \brief Unpack a uint32 into four uint8's. 331 * 332 * Interpret the given uint32 as a uint8 4-tuple where the uint32's least 333 * significant bits specify the 4-tuple's first element. Return the uint8 334 * 4-tuple as a uvec4. 335 */ 336 ir_rvalue* 337 unpack_uint_to_uvec4(ir_rvalue *uint_rval) 338 { 339 assert(uint_rval->type == glsl_type::uint_type); 340 341 /* uint u = UINT_RVAL; */ 342 ir_variable *u = factory.make_temp(glsl_type::uint_type, 343 "tmp_unpack_uint_to_uvec4_u"); 344 factory.emit(assign(u, uint_rval)); 345 346 /* uvec4 u4; */ 347 ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type, 348 "tmp_unpack_uint_to_uvec4_u4"); 349 350 /* u4.x = u & 0xffu; */ 351 factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X)); 352 353 if (op_mask & LOWER_PACK_USE_BFE) { 354 /* u4.y = bitfield_extract(u, 8, 8); */ 355 factory.emit(assign(u4, bitfield_extract(u, constant(8u), constant(8u)), 356 WRITEMASK_Y)); 357 358 /* u4.z = bitfield_extract(u, 16, 8); */ 359 factory.emit(assign(u4, bitfield_extract(u, constant(16u), constant(8u)), 360 WRITEMASK_Z)); 361 } else { 362 /* u4.y = (u >> 8u) & 0xffu; */ 363 factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), 364 constant(0xffu)), WRITEMASK_Y)); 365 366 /* u4.z = (u >> 16u) & 0xffu; */ 367 factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), 368 constant(0xffu)), WRITEMASK_Z)); 369 } 370 371 /* u4.w = (u >> 24u) */ 372 factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W)); 373 374 return deref(u4).val; 375 } 376 377 /** 378 * \brief Unpack a uint32 into four int8's. 379 * 380 * Specifically each 8-bit value is sign-extended to the full width of an 381 * int32 on return. 382 */ 383 ir_rvalue * 384 unpack_uint_to_ivec4(ir_rvalue *uint_rval) 385 { 386 assert(uint_rval->type == glsl_type::uint_type); 387 388 if (!(op_mask & LOWER_PACK_USE_BFE)) { 389 return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), 390 constant(24u)), 391 constant(24u)); 392 } 393 394 ir_variable *i = factory.make_temp(glsl_type::int_type, 395 "tmp_unpack_uint_to_ivec4_i"); 396 factory.emit(assign(i, u2i(uint_rval))); 397 398 /* ivec4 i4; */ 399 ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type, 400 "tmp_unpack_uint_to_ivec4_i4"); 401 402 factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)), 403 WRITEMASK_X)); 404 factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)), 405 WRITEMASK_Y)); 406 factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)), 407 WRITEMASK_Z)); 408 factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)), 409 WRITEMASK_W)); 410 411 return deref(i4).val; 412 } 413 414 /** 415 * \brief Lower a packSnorm2x16 expression. 416 * 417 * \param vec2_rval is packSnorm2x16's input 418 * \return packSnorm2x16's output as a uint rvalue 419 */ 420 ir_rvalue* 421 lower_pack_snorm_2x16(ir_rvalue *vec2_rval) 422 { 423 /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: 424 * 425 * highp uint packSnorm2x16(vec2 v) 426 * -------------------------------- 427 * First, converts each component of the normalized floating-point value 428 * v into 16-bit integer values. Then, the results are packed into the 429 * returned 32-bit unsigned integer. 430 * 431 * The conversion for component c of v to fixed point is done as 432 * follows: 433 * 434 * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) 435 * 436 * The first component of the vector will be written to the least 437 * significant bits of the output; the last component will be written to 438 * the most significant bits. 439 * 440 * This function generates IR that approximates the following pseudo-GLSL: 441 * 442 * return pack_uvec2_to_uint( 443 * uvec2(ivec2( 444 * round(clamp(VEC2_RVALUE, -1.0f, 1.0f) * 32767.0f)))); 445 * 446 * It is necessary to first convert the vec2 to ivec2 rather than directly 447 * converting vec2 to uvec2 because the latter conversion is undefined. 448 * From page 56 (62 of pdf) of the GLSL ES 3.00 spec: "It is undefined to 449 * convert a negative floating point value to an uint". 450 */ 451 assert(vec2_rval->type == glsl_type::vec2_type); 452 453 ir_rvalue *result = pack_uvec2_to_uint( 454 i2u(f2i(round_even(mul(clamp(vec2_rval, 455 constant(-1.0f), 456 constant(1.0f)), 457 constant(32767.0f)))))); 458 459 assert(result->type == glsl_type::uint_type); 460 return result; 461 } 462 463 /** 464 * \brief Lower a packSnorm4x8 expression. 465 * 466 * \param vec4_rval is packSnorm4x8's input 467 * \return packSnorm4x8's output as a uint rvalue 468 */ 469 ir_rvalue* 470 lower_pack_snorm_4x8(ir_rvalue *vec4_rval) 471 { 472 /* From page 137 (143 of pdf) of the GLSL 4.30 spec: 473 * 474 * highp uint packSnorm4x8(vec4 v) 475 * ------------------------------- 476 * First, converts each component of the normalized floating-point value 477 * v into 8-bit integer values. Then, the results are packed into the 478 * returned 32-bit unsigned integer. 479 * 480 * The conversion for component c of v to fixed point is done as 481 * follows: 482 * 483 * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) 484 * 485 * The first component of the vector will be written to the least 486 * significant bits of the output; the last component will be written to 487 * the most significant bits. 488 * 489 * This function generates IR that approximates the following pseudo-GLSL: 490 * 491 * return pack_uvec4_to_uint( 492 * uvec4(ivec4( 493 * round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f)))); 494 * 495 * It is necessary to first convert the vec4 to ivec4 rather than directly 496 * converting vec4 to uvec4 because the latter conversion is undefined. 497 * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to 498 * convert a negative floating point value to an uint". 499 */ 500 assert(vec4_rval->type == glsl_type::vec4_type); 501 502 ir_rvalue *result = pack_uvec4_to_uint( 503 i2u(f2i(round_even(mul(clamp(vec4_rval, 504 constant(-1.0f), 505 constant(1.0f)), 506 constant(127.0f)))))); 507 508 assert(result->type == glsl_type::uint_type); 509 return result; 510 } 511 512 /** 513 * \brief Lower an unpackSnorm2x16 expression. 514 * 515 * \param uint_rval is unpackSnorm2x16's input 516 * \return unpackSnorm2x16's output as a vec2 rvalue 517 */ 518 ir_rvalue* 519 lower_unpack_snorm_2x16(ir_rvalue *uint_rval) 520 { 521 /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: 522 * 523 * highp vec2 unpackSnorm2x16 (highp uint p) 524 * ----------------------------------------- 525 * First, unpacks a single 32-bit unsigned integer p into a pair of 526 * 16-bit unsigned integers. Then, each component is converted to 527 * a normalized floating-point value to generate the returned 528 * two-component vector. 529 * 530 * The conversion for unpacked fixed-point value f to floating point is 531 * done as follows: 532 * 533 * unpackSnorm2x16: clamp(f / 32767.0, -1,+1) 534 * 535 * The first component of the returned vector will be extracted from the 536 * least significant bits of the input; the last component will be 537 * extracted from the most significant bits. 538 * 539 * This function generates IR that approximates the following pseudo-GLSL: 540 * 541 * return clamp( 542 * ((ivec2(unpack_uint_to_uvec2(UINT_RVALUE)) << 16) >> 16) / 32767.0f, 543 * -1.0f, 1.0f); 544 * 545 * The above IR may appear unnecessarily complex, but the intermediate 546 * conversion to ivec2 and the bit shifts are necessary to correctly unpack 547 * negative floats. 548 * 549 * To see why, consider packing and then unpacking vec2(-1.0, 0.0). 550 * packSnorm2x16 encodes -1.0 as the int16 0xffff. During unpacking, we 551 * place that int16 into an int32, which results in the *positive* integer 552 * 0x0000ffff. The int16's sign bit becomes, in the int32, the rather 553 * unimportant bit 16. We must now extend the int16's sign bit into bits 554 * 17-32, which is accomplished by left-shifting then right-shifting. 555 */ 556 557 assert(uint_rval->type == glsl_type::uint_type); 558 559 ir_rvalue *result = 560 clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)), 561 constant(32767.0f)), 562 constant(-1.0f), 563 constant(1.0f)); 564 565 assert(result->type == glsl_type::vec2_type); 566 return result; 567 } 568 569 /** 570 * \brief Lower an unpackSnorm4x8 expression. 571 * 572 * \param uint_rval is unpackSnorm4x8's input 573 * \return unpackSnorm4x8's output as a vec4 rvalue 574 */ 575 ir_rvalue* 576 lower_unpack_snorm_4x8(ir_rvalue *uint_rval) 577 { 578 /* From page 137 (143 of pdf) of the GLSL 4.30 spec: 579 * 580 * highp vec4 unpackSnorm4x8 (highp uint p) 581 * ---------------------------------------- 582 * First, unpacks a single 32-bit unsigned integer p into four 583 * 8-bit unsigned integers. Then, each component is converted to 584 * a normalized floating-point value to generate the returned 585 * four-component vector. 586 * 587 * The conversion for unpacked fixed-point value f to floating point is 588 * done as follows: 589 * 590 * unpackSnorm4x8: clamp(f / 127.0, -1, +1) 591 * 592 * The first component of the returned vector will be extracted from the 593 * least significant bits of the input; the last component will be 594 * extracted from the most significant bits. 595 * 596 * This function generates IR that approximates the following pseudo-GLSL: 597 * 598 * return clamp( 599 * ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f, 600 * -1.0f, 1.0f); 601 * 602 * The above IR may appear unnecessarily complex, but the intermediate 603 * conversion to ivec4 and the bit shifts are necessary to correctly unpack 604 * negative floats. 605 * 606 * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0, 607 * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we 608 * place that int8 into an int32, which results in the *positive* integer 609 * 0x000000ff. The int8's sign bit becomes, in the int32, the rather 610 * unimportant bit 8. We must now extend the int8's sign bit into bits 611 * 9-32, which is accomplished by left-shifting then right-shifting. 612 */ 613 614 assert(uint_rval->type == glsl_type::uint_type); 615 616 ir_rvalue *result = 617 clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)), 618 constant(127.0f)), 619 constant(-1.0f), 620 constant(1.0f)); 621 622 assert(result->type == glsl_type::vec4_type); 623 return result; 624 } 625 626 /** 627 * \brief Lower a packUnorm2x16 expression. 628 * 629 * \param vec2_rval is packUnorm2x16's input 630 * \return packUnorm2x16's output as a uint rvalue 631 */ 632 ir_rvalue* 633 lower_pack_unorm_2x16(ir_rvalue *vec2_rval) 634 { 635 /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: 636 * 637 * highp uint packUnorm2x16 (vec2 v) 638 * --------------------------------- 639 * First, converts each component of the normalized floating-point value 640 * v into 16-bit integer values. Then, the results are packed into the 641 * returned 32-bit unsigned integer. 642 * 643 * The conversion for component c of v to fixed point is done as 644 * follows: 645 * 646 * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) 647 * 648 * The first component of the vector will be written to the least 649 * significant bits of the output; the last component will be written to 650 * the most significant bits. 651 * 652 * This function generates IR that approximates the following pseudo-GLSL: 653 * 654 * return pack_uvec2_to_uint(uvec2( 655 * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 65535.0f))); 656 * 657 * Here it is safe to directly convert the vec2 to uvec2 because the vec2 658 * has been clamped to a non-negative range. 659 */ 660 661 assert(vec2_rval->type == glsl_type::vec2_type); 662 663 ir_rvalue *result = pack_uvec2_to_uint( 664 f2u(round_even(mul(saturate(vec2_rval), constant(65535.0f))))); 665 666 assert(result->type == glsl_type::uint_type); 667 return result; 668 } 669 670 /** 671 * \brief Lower a packUnorm4x8 expression. 672 * 673 * \param vec4_rval is packUnorm4x8's input 674 * \return packUnorm4x8's output as a uint rvalue 675 */ 676 ir_rvalue* 677 lower_pack_unorm_4x8(ir_rvalue *vec4_rval) 678 { 679 /* From page 137 (143 of pdf) of the GLSL 4.30 spec: 680 * 681 * highp uint packUnorm4x8 (vec4 v) 682 * -------------------------------- 683 * First, converts each component of the normalized floating-point value 684 * v into 8-bit integer values. Then, the results are packed into the 685 * returned 32-bit unsigned integer. 686 * 687 * The conversion for component c of v to fixed point is done as 688 * follows: 689 * 690 * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) 691 * 692 * The first component of the vector will be written to the least 693 * significant bits of the output; the last component will be written to 694 * the most significant bits. 695 * 696 * This function generates IR that approximates the following pseudo-GLSL: 697 * 698 * return pack_uvec4_to_uint(uvec4( 699 * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f))); 700 * 701 * Here it is safe to directly convert the vec4 to uvec4 because the vec4 702 * has been clamped to a non-negative range. 703 */ 704 705 assert(vec4_rval->type == glsl_type::vec4_type); 706 707 ir_rvalue *result = pack_uvec4_to_uint( 708 f2u(round_even(mul(saturate(vec4_rval), constant(255.0f))))); 709 710 assert(result->type == glsl_type::uint_type); 711 return result; 712 } 713 714 /** 715 * \brief Lower an unpackUnorm2x16 expression. 716 * 717 * \param uint_rval is unpackUnorm2x16's input 718 * \return unpackUnorm2x16's output as a vec2 rvalue 719 */ 720 ir_rvalue* 721 lower_unpack_unorm_2x16(ir_rvalue *uint_rval) 722 { 723 /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: 724 * 725 * highp vec2 unpackUnorm2x16 (highp uint p) 726 * ----------------------------------------- 727 * First, unpacks a single 32-bit unsigned integer p into a pair of 728 * 16-bit unsigned integers. Then, each component is converted to 729 * a normalized floating-point value to generate the returned 730 * two-component vector. 731 * 732 * The conversion for unpacked fixed-point value f to floating point is 733 * done as follows: 734 * 735 * unpackUnorm2x16: f / 65535.0 736 * 737 * The first component of the returned vector will be extracted from the 738 * least significant bits of the input; the last component will be 739 * extracted from the most significant bits. 740 * 741 * This function generates IR that approximates the following pseudo-GLSL: 742 * 743 * return vec2(unpack_uint_to_uvec2(UINT_RVALUE)) / 65535.0; 744 */ 745 746 assert(uint_rval->type == glsl_type::uint_type); 747 748 ir_rvalue *result = div(u2f(unpack_uint_to_uvec2(uint_rval)), 749 constant(65535.0f)); 750 751 assert(result->type == glsl_type::vec2_type); 752 return result; 753 } 754 755 /** 756 * \brief Lower an unpackUnorm4x8 expression. 757 * 758 * \param uint_rval is unpackUnorm4x8's input 759 * \return unpackUnorm4x8's output as a vec4 rvalue 760 */ 761 ir_rvalue* 762 lower_unpack_unorm_4x8(ir_rvalue *uint_rval) 763 { 764 /* From page 137 (143 of pdf) of the GLSL 4.30 spec: 765 * 766 * highp vec4 unpackUnorm4x8 (highp uint p) 767 * ---------------------------------------- 768 * First, unpacks a single 32-bit unsigned integer p into four 769 * 8-bit unsigned integers. Then, each component is converted to 770 * a normalized floating-point value to generate the returned 771 * two-component vector. 772 * 773 * The conversion for unpacked fixed-point value f to floating point is 774 * done as follows: 775 * 776 * unpackUnorm4x8: f / 255.0 777 * 778 * The first component of the returned vector will be extracted from the 779 * least significant bits of the input; the last component will be 780 * extracted from the most significant bits. 781 * 782 * This function generates IR that approximates the following pseudo-GLSL: 783 * 784 * return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0; 785 */ 786 787 assert(uint_rval->type == glsl_type::uint_type); 788 789 ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)), 790 constant(255.0f)); 791 792 assert(result->type == glsl_type::vec4_type); 793 return result; 794 } 795 796 /** 797 * \brief Lower the component-wise calculation of packHalf2x16. 798 * 799 * \param f_rval is one component of packHafl2x16's input 800 * \param e_rval is the unshifted exponent bits of f_rval 801 * \param m_rval is the unshifted mantissa bits of f_rval 802 * 803 * \return a uint rvalue that encodes a float16 in its lower 16 bits 804 */ 805 ir_rvalue* 806 pack_half_1x16_nosign(ir_rvalue *f_rval, 807 ir_rvalue *e_rval, 808 ir_rvalue *m_rval) 809 { 810 assert(e_rval->type == glsl_type::uint_type); 811 assert(m_rval->type == glsl_type::uint_type); 812 813 /* uint u16; */ 814 ir_variable *u16 = factory.make_temp(glsl_type::uint_type, 815 "tmp_pack_half_1x16_u16"); 816 817 /* float f = FLOAT_RVAL; */ 818 ir_variable *f = factory.make_temp(glsl_type::float_type, 819 "tmp_pack_half_1x16_f"); 820 factory.emit(assign(f, f_rval)); 821 822 /* uint e = E_RVAL; */ 823 ir_variable *e = factory.make_temp(glsl_type::uint_type, 824 "tmp_pack_half_1x16_e"); 825 factory.emit(assign(e, e_rval)); 826 827 /* uint m = M_RVAL; */ 828 ir_variable *m = factory.make_temp(glsl_type::uint_type, 829 "tmp_pack_half_1x16_m"); 830 factory.emit(assign(m, m_rval)); 831 832 /* Preliminaries 833 * ------------- 834 * 835 * For a float16, the bit layout is: 836 * 837 * sign: 15 838 * exponent: 10:14 839 * mantissa: 0:9 840 * 841 * Let f16 be a float16 value. The sign, exponent, and mantissa 842 * determine its value thus: 843 * 844 * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) 845 * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) 846 * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) 847 * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) 848 * if e16 = 31 and m16 != 0, then NaN (5) 849 * 850 * where 0 <= m16 < 2^10. 851 * 852 * For a float32, the bit layout is: 853 * 854 * sign: 31 855 * exponent: 23:30 856 * mantissa: 0:22 857 * 858 * Let f32 be a float32 value. The sign, exponent, and mantissa 859 * determine its value thus: 860 * 861 * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) 862 * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) 863 * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) 864 * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) 865 * if e32 = 255 and m32 != 0, then NaN (14) 866 * 867 * where 0 <= m32 < 2^23. 868 * 869 * The minimum and maximum normal float16 values are 870 * 871 * min_norm16 = 2^(1 - 15) * (1 + 0 / 2^10) = 2^(-14) (20) 872 * max_norm16 = 2^(30 - 15) * (1 + 1023 / 2^10) (21) 873 * 874 * The step at max_norm16 is 875 * 876 * max_step16 = 2^5 (22) 877 * 878 * Observe that the float16 boundary values in equations 20-21 lie in the 879 * range of normal float32 values. 880 * 881 * 882 * Rounding Behavior 883 * ----------------- 884 * Not all float32 values can be exactly represented as a float16. We 885 * round all such intermediate float32 values to the nearest float16; if 886 * the float32 is exactly between to float16 values, we round to the one 887 * with an even mantissa. This rounding behavior has several benefits: 888 * 889 * - It has no sign bias. 890 * 891 * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's 892 * GPU ISA. 893 * 894 * - By reproducing the behavior of the GPU (at least on Intel hardware), 895 * compile-time evaluation of constant packHalf2x16 GLSL expressions will 896 * result in the same value as if the expression were executed on the 897 * GPU. 898 * 899 * Calculation 900 * ----------- 901 * Our task is to compute s16, e16, m16 given f32. Since this function 902 * ignores the sign bit, assume that s32 = s16 = 0. There are several 903 * cases consider. 904 */ 905 906 factory.emit( 907 908 /* Case 1) f32 is NaN 909 * 910 * The resultant f16 will also be NaN. 911 */ 912 913 /* if (e32 == 255 && m32 != 0) { */ 914 if_tree(logic_and(equal(e, constant(0xffu << 23u)), 915 logic_not(equal(m, constant(0u)))), 916 917 assign(u16, constant(0x7fffu)), 918 919 /* Case 2) f32 lies in the range [0, min_norm16). 920 * 921 * The resultant float16 will be either zero, subnormal, or normal. 922 * 923 * Solving 924 * 925 * f32 = min_norm16 (30) 926 * 927 * gives 928 * 929 * e32 = 113 and m32 = 0 (31) 930 * 931 * Therefore this case occurs if and only if 932 * 933 * e32 < 113 (32) 934 */ 935 936 /* } else if (e32 < 113) { */ 937 if_tree(less(e, constant(113u << 23u)), 938 939 /* u16 = uint(round_to_even(abs(f32) * float(1u << 24u))); */ 940 assign(u16, f2u(round_even(mul(expr(ir_unop_abs, f), 941 constant((float) (1 << 24)))))), 942 943 /* Case 3) f32 lies in the range 944 * [min_norm16, max_norm16 + max_step16). 945 * 946 * The resultant float16 will be either normal or infinite. 947 * 948 * Solving 949 * 950 * f32 = max_norm16 + max_step16 (40) 951 * = 2^15 * (1 + 1023 / 2^10) + 2^5 (41) 952 * = 2^16 (42) 953 * gives 954 * 955 * e32 = 143 and m32 = 0 (43) 956 * 957 * We already solved the boundary condition f32 = min_norm16 above 958 * in equation 31. Therefore this case occurs if and only if 959 * 960 * 113 <= e32 and e32 < 143 961 */ 962 963 /* } else if (e32 < 143) { */ 964 if_tree(less(e, constant(143u << 23u)), 965 966 /* The addition below handles the case where the mantissa rounds 967 * up to 1024 and bumps the exponent. 968 * 969 * u16 = ((e - (112u << 23u)) >> 13u) 970 * + round_to_even((float(m) / (1u << 13u)); 971 */ 972 assign(u16, add(rshift(sub(e, constant(112u << 23u)), 973 constant(13u)), 974 f2u(round_even( 975 div(u2f(m), constant((float) (1 << 13))))))), 976 977 /* Case 4) f32 lies in the range [max_norm16 + max_step16, inf]. 978 * 979 * The resultant float16 will be infinite. 980 * 981 * The cases above caught all float32 values in the range 982 * [0, max_norm16 + max_step16), so this is the fall-through case. 983 */ 984 985 /* } else { */ 986 987 assign(u16, constant(31u << 10u)))))); 988 989 /* } */ 990 991 return deref(u16).val; 992 } 993 994 /** 995 * \brief Lower a packHalf2x16 expression. 996 * 997 * \param vec2_rval is packHalf2x16's input 998 * \return packHalf2x16's output as a uint rvalue 999 */ 1000 ir_rvalue* 1001 lower_pack_half_2x16(ir_rvalue *vec2_rval) 1002 { 1003 /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: 1004 * 1005 * highp uint packHalf2x16 (mediump vec2 v) 1006 * ---------------------------------------- 1007 * Returns an unsigned integer obtained by converting the components of 1008 * a two-component floating-point vector to the 16-bit floating-point 1009 * representation found in the OpenGL ES Specification, and then packing 1010 * these two 16-bit integers into a 32-bit unsigned integer. 1011 * 1012 * The first vector component specifies the 16 least- significant bits 1013 * of the result; the second component specifies the 16 most-significant 1014 * bits. 1015 */ 1016 1017 assert(vec2_rval->type == glsl_type::vec2_type); 1018 1019 /* vec2 f = VEC2_RVAL; */ 1020 ir_variable *f = factory.make_temp(glsl_type::vec2_type, 1021 "tmp_pack_half_2x16_f"); 1022 factory.emit(assign(f, vec2_rval)); 1023 1024 /* uvec2 f32 = bitcast_f2u(f); */ 1025 ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, 1026 "tmp_pack_half_2x16_f32"); 1027 factory.emit(assign(f32, expr(ir_unop_bitcast_f2u, f))); 1028 1029 /* uvec2 f16; */ 1030 ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, 1031 "tmp_pack_half_2x16_f16"); 1032 1033 /* Get f32's unshifted exponent bits. 1034 * 1035 * uvec2 e = f32 & 0x7f800000u; 1036 */ 1037 ir_variable *e = factory.make_temp(glsl_type::uvec2_type, 1038 "tmp_pack_half_2x16_e"); 1039 factory.emit(assign(e, bit_and(f32, constant(0x7f800000u)))); 1040 1041 /* Get f32's unshifted mantissa bits. 1042 * 1043 * uvec2 m = f32 & 0x007fffffu; 1044 */ 1045 ir_variable *m = factory.make_temp(glsl_type::uvec2_type, 1046 "tmp_pack_half_2x16_m"); 1047 factory.emit(assign(m, bit_and(f32, constant(0x007fffffu)))); 1048 1049 /* Set f16's exponent and mantissa bits. 1050 * 1051 * f16.x = pack_half_1x16_nosign(e.x, m.x); 1052 * f16.y = pack_half_1y16_nosign(e.y, m.y); 1053 */ 1054 factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_x(f), 1055 swizzle_x(e), 1056 swizzle_x(m)), 1057 WRITEMASK_X)); 1058 factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_y(f), 1059 swizzle_y(e), 1060 swizzle_y(m)), 1061 WRITEMASK_Y)); 1062 1063 /* Set f16's sign bits. 1064 * 1065 * f16 |= (f32 & (1u << 31u) >> 16u; 1066 */ 1067 factory.emit( 1068 assign(f16, bit_or(f16, 1069 rshift(bit_and(f32, constant(1u << 31u)), 1070 constant(16u))))); 1071 1072 1073 /* return (f16.y << 16u) | f16.x; */ 1074 ir_rvalue *result = bit_or(lshift(swizzle_y(f16), 1075 constant(16u)), 1076 swizzle_x(f16)); 1077 1078 assert(result->type == glsl_type::uint_type); 1079 return result; 1080 } 1081 1082 /** 1083 * \brief Lower the component-wise calculation of unpackHalf2x16. 1084 * 1085 * Given a uint that encodes a float16 in its lower 16 bits, this function 1086 * returns a uint that encodes a float32 with the same value. The sign bit 1087 * of the float16 is ignored. 1088 * 1089 * \param e_rval is the unshifted exponent bits of a float16 1090 * \param m_rval is the unshifted mantissa bits of a float16 1091 * \param a uint rvalue that encodes a float32 1092 */ 1093 ir_rvalue* 1094 unpack_half_1x16_nosign(ir_rvalue *e_rval, ir_rvalue *m_rval) 1095 { 1096 assert(e_rval->type == glsl_type::uint_type); 1097 assert(m_rval->type == glsl_type::uint_type); 1098 1099 /* uint u32; */ 1100 ir_variable *u32 = factory.make_temp(glsl_type::uint_type, 1101 "tmp_unpack_half_1x16_u32"); 1102 1103 /* uint e = E_RVAL; */ 1104 ir_variable *e = factory.make_temp(glsl_type::uint_type, 1105 "tmp_unpack_half_1x16_e"); 1106 factory.emit(assign(e, e_rval)); 1107 1108 /* uint m = M_RVAL; */ 1109 ir_variable *m = factory.make_temp(glsl_type::uint_type, 1110 "tmp_unpack_half_1x16_m"); 1111 factory.emit(assign(m, m_rval)); 1112 1113 /* Preliminaries 1114 * ------------- 1115 * 1116 * For a float16, the bit layout is: 1117 * 1118 * sign: 15 1119 * exponent: 10:14 1120 * mantissa: 0:9 1121 * 1122 * Let f16 be a float16 value. The sign, exponent, and mantissa 1123 * determine its value thus: 1124 * 1125 * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) 1126 * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) 1127 * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) 1128 * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) 1129 * if e16 = 31 and m16 != 0, then NaN (5) 1130 * 1131 * where 0 <= m16 < 2^10. 1132 * 1133 * For a float32, the bit layout is: 1134 * 1135 * sign: 31 1136 * exponent: 23:30 1137 * mantissa: 0:22 1138 * 1139 * Let f32 be a float32 value. The sign, exponent, and mantissa 1140 * determine its value thus: 1141 * 1142 * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) 1143 * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) 1144 * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) 1145 * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) 1146 * if e32 = 255 and m32 != 0, then NaN (14) 1147 * 1148 * where 0 <= m32 < 2^23. 1149 * 1150 * Calculation 1151 * ----------- 1152 * Our task is to compute s32, e32, m32 given f16. Since this function 1153 * ignores the sign bit, assume that s32 = s16 = 0. There are several 1154 * cases consider. 1155 */ 1156 1157 factory.emit( 1158 1159 /* Case 1) f16 is zero or subnormal. 1160 * 1161 * The simplest method of calcuating f32 in this case is 1162 * 1163 * f32 = f16 (20) 1164 * = 2^(-14) * (m16 / 2^10) (21) 1165 * = m16 / 2^(-24) (22) 1166 */ 1167 1168 /* if (e16 == 0) { */ 1169 if_tree(equal(e, constant(0u)), 1170 1171 /* u32 = bitcast_f2u(float(m) / float(1 << 24)); */ 1172 assign(u32, expr(ir_unop_bitcast_f2u, 1173 div(u2f(m), constant((float)(1 << 24))))), 1174 1175 /* Case 2) f16 is normal. 1176 * 1177 * The equation 1178 * 1179 * f32 = f16 (30) 1180 * 2^(e32 - 127) * (1 + m32 / 2^23) = (31) 1181 * 2^(e16 - 15) * (1 + m16 / 2^10) 1182 * 1183 * can be decomposed into two 1184 * 1185 * 2^(e32 - 127) = 2^(e16 - 15) (32) 1186 * 1 + m32 / 2^23 = 1 + m16 / 2^10 (33) 1187 * 1188 * which solve to 1189 * 1190 * e32 = e16 + 112 (34) 1191 * m32 = m16 * 2^13 (35) 1192 */ 1193 1194 /* } else if (e16 < 31)) { */ 1195 if_tree(less(e, constant(31u << 10u)), 1196 1197 /* u32 = ((e + (112 << 10)) | m) << 13; 1198 */ 1199 assign(u32, lshift(bit_or(add(e, constant(112u << 10u)), m), 1200 constant(13u))), 1201 1202 1203 /* Case 3) f16 is infinite. */ 1204 if_tree(equal(m, constant(0u)), 1205 1206 assign(u32, constant(255u << 23u)), 1207 1208 /* Case 4) f16 is NaN. */ 1209 /* } else { */ 1210 1211 assign(u32, constant(0x7fffffffu)))))); 1212 1213 /* } */ 1214 1215 return deref(u32).val; 1216 } 1217 1218 /** 1219 * \brief Lower an unpackHalf2x16 expression. 1220 * 1221 * \param uint_rval is unpackHalf2x16's input 1222 * \return unpackHalf2x16's output as a vec2 rvalue 1223 */ 1224 ir_rvalue* 1225 lower_unpack_half_2x16(ir_rvalue *uint_rval) 1226 { 1227 /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: 1228 * 1229 * mediump vec2 unpackHalf2x16 (highp uint v) 1230 * ------------------------------------------ 1231 * Returns a two-component floating-point vector with components 1232 * obtained by unpacking a 32-bit unsigned integer into a pair of 16-bit 1233 * values, interpreting those values as 16-bit floating-point numbers 1234 * according to the OpenGL ES Specification, and converting them to 1235 * 32-bit floating-point values. 1236 * 1237 * The first component of the vector is obtained from the 1238 * 16 least-significant bits of v; the second component is obtained 1239 * from the 16 most-significant bits of v. 1240 */ 1241 assert(uint_rval->type == glsl_type::uint_type); 1242 1243 /* uint u = RVALUE; 1244 * uvec2 f16 = uvec2(u.x & 0xffff, u.y >> 16); 1245 */ 1246 ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, 1247 "tmp_unpack_half_2x16_f16"); 1248 factory.emit(assign(f16, unpack_uint_to_uvec2(uint_rval))); 1249 1250 /* uvec2 f32; */ 1251 ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, 1252 "tmp_unpack_half_2x16_f32"); 1253 1254 /* Get f16's unshifted exponent bits. 1255 * 1256 * uvec2 e = f16 & 0x7c00u; 1257 */ 1258 ir_variable *e = factory.make_temp(glsl_type::uvec2_type, 1259 "tmp_unpack_half_2x16_e"); 1260 factory.emit(assign(e, bit_and(f16, constant(0x7c00u)))); 1261 1262 /* Get f16's unshifted mantissa bits. 1263 * 1264 * uvec2 m = f16 & 0x03ffu; 1265 */ 1266 ir_variable *m = factory.make_temp(glsl_type::uvec2_type, 1267 "tmp_unpack_half_2x16_m"); 1268 factory.emit(assign(m, bit_and(f16, constant(0x03ffu)))); 1269 1270 /* Set f32's exponent and mantissa bits. 1271 * 1272 * f32.x = unpack_half_1x16_nosign(e.x, m.x); 1273 * f32.y = unpack_half_1x16_nosign(e.y, m.y); 1274 */ 1275 factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_x(e), 1276 swizzle_x(m)), 1277 WRITEMASK_X)); 1278 factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_y(e), 1279 swizzle_y(m)), 1280 WRITEMASK_Y)); 1281 1282 /* Set f32's sign bit. 1283 * 1284 * f32 |= (f16 & 0x8000u) << 16u; 1285 */ 1286 factory.emit(assign(f32, bit_or(f32, 1287 lshift(bit_and(f16, 1288 constant(0x8000u)), 1289 constant(16u))))); 1290 1291 /* return bitcast_u2f(f32); */ 1292 ir_rvalue *result = expr(ir_unop_bitcast_u2f, f32); 1293 assert(result->type == glsl_type::vec2_type); 1294 return result; 1295 } 1296 }; 1297 1298 } // namespace anonymous 1299 1300 /** 1301 * \brief Lower the builtin packing functions. 1302 * 1303 * \param op_mask is a bitmask of `enum lower_packing_builtins_op`. 1304 */ 1305 bool 1306 lower_packing_builtins(exec_list *instructions, int op_mask) 1307 { 1308 lower_packing_builtins_visitor v(op_mask); 1309 visit_list_elements(&v, instructions, true); 1310 return v.get_progress(); 1311 } 1312