Lines Matching refs:gen
67 * Context/state used during code gen.
109 init_gen_context(struct gen_context *gen, struct ppc_function *func)
113 memset(gen, 0, sizeof(*gen));
114 gen->f = func;
115 gen->inputs_reg = ppc_reserve_register(func, 3); /* first function param */
116 gen->outputs_reg = ppc_reserve_register(func, 4); /* second function param */
117 gen->temps_reg = ppc_reserve_register(func, 5); /* ... */
118 gen->immed_reg = ppc_reserve_register(func, 6);
119 gen->const_reg = ppc_reserve_register(func, 7);
120 gen->builtins_reg = ppc_reserve_register(func, 8);
121 gen->one_vec = -1;
122 gen->bit31_vec = -1;
123 gen->offset_reg = -1;
124 gen->offset_value = -9999999;
126 gen->temps_map[i][0] = ppc_allocate_vec_register(gen->f);
127 gen->temps_map[i][1] = ppc_allocate_vec_register(gen->f);
128 gen->temps_map[i][2] = ppc_allocate_vec_register(gen->f);
129 gen->temps_map[i][3] = ppc_allocate_vec_register(gen->f);
171 emit_li_offset(struct gen_context *gen, int offset)
173 if (gen->offset_reg <= 0) {
175 gen->offset_reg = ppc_allocate_register(gen->f);
179 if (gen->offset_value < 0 || gen->offset_value != offset) {
180 gen->offset_value = offset;
181 ppc_li(gen->f, gen->offset_reg, offset);
184 return gen->offset_reg;
193 reset_li_offset(struct gen_context *gen)
195 gen->offset_value = -9999999;
207 load_constant_vec(struct gen_context *gen, int dst_vec, float value)
213 int offset_reg = emit_li_offset(gen, offset);
221 ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg);
223 ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4);
235 gen_one_vec(struct gen_context *gen)
237 if (gen->one_vec < 0) {
238 gen->one_vec = ppc_allocate_vec_register(gen->f);
239 load_constant_vec(gen, gen->one_vec, 1.0f);
241 return gen->one_vec;
248 gen_get_bit31_vec(struct gen_context *gen)
250 if (gen->bit31_vec < 0) {
251 gen->bit31_vec = ppc_allocate_vec_register(gen->f);
252 ppc_vspltisw(gen->f, gen->bit31_vec, -1);
253 ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec);
255 return gen->bit31_vec;
263 emit_fetch(struct gen_context *gen,
279 int offset_reg = emit_li_offset(gen, offset);
280 dst_vec = ppc_allocate_vec_register(gen->f);
281 ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg);
290 dst_vec = gen->temps_map[reg->Register.Index][swizzle];
295 int offset_reg = emit_li_offset(gen, offset);
296 dst_vec = ppc_allocate_vec_register(gen->f);
297 ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg);
303 int offset_reg = emit_li_offset(gen, offset);
304 dst_vec = ppc_allocate_vec_register(gen->f);
311 ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg);
313 ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle);
319 int offset_reg = emit_li_offset(gen, offset);
320 dst_vec = ppc_allocate_vec_register(gen->f);
327 ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg);
329 ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle);
345 int bit31_vec = gen_get_bit31_vec(gen);
350 dst_vec2 = ppc_allocate_vec_register(gen->f);
359 ppc_vandc(gen->f, dst_vec2, dst_vec, bit31_vec);
363 ppc_vor(gen->f, dst_vec2, dst_vec, bit31_vec);
367 ppc_vxor(gen->f, dst_vec2, dst_vec, bit31_vec);
414 get_src_vec(struct gen_context *gen,
423 for (i = 0; i < gen->num_regs; i++) {
424 if (equal_src_locs(&gen->regs[i].src, gen->regs[i].chan, src, chan)) {
426 assert(gen->regs[i].vec >= 0);
427 return gen->regs[i].vec;
432 vec = emit_fetch(gen, src, chan);
433 gen->regs[gen->num_regs].src = *src;
434 gen->regs[gen->num_regs].chan = chan;
435 gen->regs[gen->num_regs].vec = vec;
436 gen->num_regs++;
438 assert(gen->num_regs <= Elements(gen->regs));
450 release_src_vecs(struct gen_context *gen)
453 for (i = 0; i < gen->num_regs; i++) {
454 const const struct tgsi_full_src_register src = gen->regs[i].src;
456 ppc_release_vec_register(gen->f, gen->regs[i].vec);
459 gen->num_regs = 0;
465 get_dst_vec(struct gen_context *gen,
472 int vec = gen->temps_map[reg->Register.Index][chan_index];
476 return ppc_allocate_vec_register(gen->f);
486 emit_store(struct gen_context *gen,
498 int offset_reg = emit_li_offset(gen, offset);
499 ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg);
505 int dst_vec = gen->temps_map[reg->Register.Index][chan_index];
507 ppc_vmove(gen->f, dst_vec, src_vec);
513 int offset_reg = emit_li_offset(gen, offset);
514 ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg);
546 ppc_release_vec_register(gen->f, src_vec);
551 emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
556 v0 = get_src_vec(gen, inst, 0, TGSI_CHAN_X);
557 v1 = ppc_allocate_vec_register(gen->f);
562 ppc_vrsqrtefp(gen->f, v1, v0);
566 ppc_vrefp(gen->f, v1, v0);
573 emit_store(gen, v1, inst, chan_index, FALSE);
576 release_src_vecs(gen);
577 ppc_release_vec_register(gen->f, v1);
582 emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
587 int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */
588 int v1 = get_dst_vec(gen, inst, chan_index);
593 int bit31_vec = gen_get_bit31_vec(gen);
594 ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */
598 ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */
601 ppc_vrfim(gen->f, v1, v0); /* tmp = floor(v0) */
602 ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */
605 ppc_vexptefp(gen->f, v1, v0); /* v1 = 2^v0 */
609 ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */
613 ppc_vmove(gen->f, v1, v0);
618 emit_store(gen, v1, inst, chan_index, TRUE); /* store v0 */
621 release_src_vecs(gen);
626 emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
632 zero_vec = ppc_allocate_vec_register(gen->f);
633 ppc_vzero(gen->f, zero_vec);
638 int v0 = get_src_vec(gen, inst, 0, chan);
639 int v1 = get_src_vec(gen, inst, 1, chan);
640 int v2 = get_dst_vec(gen, inst, chan);
645 ppc_vaddfp(gen->f, v2, v0, v1);
648 ppc_vsubfp(gen->f, v2, v0, v1);
651 ppc_vmaddfp(gen->f, v2, v0, v1, zero_vec);
654 ppc_vminfp(gen->f, v2, v0, v1);
657 ppc_vmaxfp(gen->f, v2, v0, v1);
664 emit_store(gen, v2, inst, chan, TRUE);
668 ppc_release_vec_register(gen->f, zero_vec);
670 release_src_vecs(gen);
675 emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
681 int v0 = get_src_vec(gen, inst, 0, chan);
682 int v1 = get_src_vec(gen, inst, 1, chan);
683 int v2 = get_src_vec(gen, inst, 2, chan);
684 int v3 = get_dst_vec(gen, inst, chan);
689 ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */
692 ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */
693 ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */
700 emit_store(gen, v3, inst, chan, TRUE);
703 release_src_vecs(gen);
711 emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst)
714 int one_vec = gen_one_vec(gen);
718 int v0 = get_src_vec(gen, inst, 0, chan);
719 int v1 = get_src_vec(gen, inst, 1, chan);
720 int v2 = get_dst_vec(gen, inst, chan);
728 ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */
735 ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */
742 ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */
751 ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */
753 ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */
756 emit_store(gen, v2, inst, chan, TRUE);
759 release_src_vecs(gen);
764 emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst)
769 v2 = ppc_allocate_vec_register(gen->f);
771 ppc_vzero(gen->f, v2); /* v2 = {0, 0, 0, 0} */
773 v0 = get_src_vec(gen, inst, 0, TGSI_CHAN_X); /* v0 = src0.XXXX */
774 v1 = get_src_vec(gen, inst, 1, TGSI_CHAN_X); /* v1 = src1.XXXX */
775 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
777 v0 = get_src_vec(gen, inst, 0, TGSI_CHAN_Y); /* v0 = src0.YYYY */
778 v1 = get_src_vec(gen, inst, 1, TGSI_CHAN_Y); /* v1 = src1.YYYY */
779 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
781 v0 = get_src_vec(gen, inst, 0, TGSI_CHAN_Z); /* v0 = src0.ZZZZ */
782 v1 = get_src_vec(gen, inst, 1, TGSI_CHAN_Z); /* v1 = src1.ZZZZ */
783 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
786 v0 = get_src_vec(gen, inst, 0, TGSI_CHAN_W); /* v0 = src0.WWWW */
787 v1 = get_src_vec(gen, inst, 1, TGSI_CHAN_W); /* v1 = src1.WWWW */
788 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
791 v1 = get_src_vec(gen, inst, 1, TGSI_CHAN_W); /* v1 = src1.WWWW */
792 ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */
796 emit_store(gen, v2, inst, chan_index, FALSE); /* store v2, free v2 later */
799 release_src_vecs(gen);
801 ppc_release_vec_register(gen->f, v2);
825 emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
827 int one_vec = gen_one_vec(gen);
831 emit_store(gen, one_vec, inst, TGSI_CHAN_X, FALSE);
838 int zero_vec = ppc_allocate_vec_register(gen->f);
840 x_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_X); /* x_vec = src[0].x */
842 ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */
843 ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */
846 emit_store(gen, x_vec, inst, TGSI_CHAN_Y, FALSE);
851 int z_vec = ppc_allocate_vec_register(gen->f);
852 int pow_vec = ppc_allocate_vec_register(gen->f);
853 int pos_vec = ppc_allocate_vec_register(gen->f);
854 int p128_vec = ppc_allocate_vec_register(gen->f);
855 int n128_vec = ppc_allocate_vec_register(gen->f);
857 y_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_Y); /* y_vec = src[0].y */
858 ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */
860 w_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_W); /* w_vec = src[0].w */
863 load_constant_vec(gen, p128_vec, 128.0f);
864 load_constant_vec(gen, n128_vec, -128.0f);
865 ppc_vmaxfp(gen->f, w_vec, w_vec, n128_vec); /* w = max(w, -128) */
866 ppc_vminfp(gen->f, w_vec, w_vec, p128_vec); /* w = min(w, 128) */
873 ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec); /* pow = pow(y, w) */
874 ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */
875 ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */
877 emit_store(gen, z_vec, inst, TGSI_CHAN_Z, FALSE);
879 ppc_release_vec_register(gen->f, z_vec);
880 ppc_release_vec_register(gen->f, pow_vec);
881 ppc_release_vec_register(gen->f, pos_vec);
882 ppc_release_vec_register(gen->f, p128_vec);
883 ppc_release_vec_register(gen->f, n128_vec);
886 ppc_release_vec_register(gen->f, zero_vec);
891 emit_store(gen, one_vec, inst, TGSI_CHAN_W, FALSE);
894 release_src_vecs(gen);
899 emit_exp(struct gen_context *gen, struct tgsi_full_instruction *inst)
901 const int one_vec = gen_one_vec(gen);
905 src_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_X);
909 int dst_vec = get_dst_vec(gen, inst, TGSI_CHAN_X);
910 int tmp_vec = ppc_allocate_vec_register(gen->f);
911 ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */
912 ppc_vexptefp(gen->f, dst_vec, tmp_vec); /* dst = 2 ^ tmp */
913 emit_store(gen, dst_vec, inst, TGSI_CHAN_X, TRUE);
914 ppc_release_vec_register(gen->f, tmp_vec);
919 int dst_vec = get_dst_vec(gen, inst, TGSI_CHAN_Y);
920 int tmp_vec = ppc_allocate_vec_register(gen->f);
921 ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */
922 ppc_vsubfp(gen->f, dst_vec, src_vec, tmp_vec); /* dst = src - tmp */
923 emit_store(gen, dst_vec, inst, TGSI_CHAN_Y, TRUE);
924 ppc_release_vec_register(gen->f, tmp_vec);
929 int dst_vec = get_dst_vec(gen, inst, TGSI_CHAN_Z);
930 ppc_vexptefp(gen->f, dst_vec, src_vec); /* dst = 2 ^ src */
931 emit_store(gen, dst_vec, inst, TGSI_CHAN_Z, TRUE);
936 emit_store(gen, one_vec, inst, TGSI_CHAN_W, FALSE);
939 release_src_vecs(gen);
944 emit_log(struct gen_context *gen, struct tgsi_full_instruction *inst)
946 const int bit31_vec = gen_get_bit31_vec(gen);
947 const int one_vec = gen_one_vec(gen);
951 src_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_X);
954 abs_vec = ppc_allocate_vec_register(gen->f);
955 ppc_vandc(gen->f, abs_vec, src_vec, bit31_vec); /* abs = src & ~bit31 */
961 int tmp_vec = ppc_allocate_vec_register(gen->f);
962 ppc_vlogefp(gen->f, tmp_vec, abs_vec); /* tmp = log2(abs) */
963 ppc_vrfim(gen->f, tmp_vec, tmp_vec); /* tmp = floor(tmp); */
967 emit_store(gen, tmp_vec, inst, TGSI_CHAN_X, FALSE);
972 const int zero_vec = ppc_allocate_vec_register(gen->f);
973 ppc_vzero(gen->f, zero_vec);
974 ppc_vexptefp(gen->f, tmp_vec, tmp_vec); /* tmp = 2 ^ tmp */
975 ppc_vrefp(gen->f, tmp_vec, tmp_vec); /* tmp = 1 / tmp */
977 ppc_vmaddfp(gen->f, tmp_vec, abs_vec, tmp_vec, zero_vec);
978 emit_store(gen, tmp_vec, inst, TGSI_CHAN_Y, FALSE);
979 ppc_release_vec_register(gen->f, zero_vec);
982 ppc_release_vec_register(gen->f, tmp_vec);
987 int dst_vec = get_dst_vec(gen, inst, TGSI_CHAN_Z);
988 ppc_vlogefp(gen->f, dst_vec, abs_vec); /* dst = log2(abs) */
989 emit_store(gen, dst_vec, inst, TGSI_CHAN_Z, TRUE);
994 emit_store(gen, one_vec, inst, TGSI_CHAN_W, FALSE);
997 ppc_release_vec_register(gen->f, abs_vec);
998 release_src_vecs(gen);
1003 emit_pow(struct gen_context *gen, struct tgsi_full_instruction *inst)
1005 int s0_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_X);
1006 int s1_vec = get_src_vec(gen, inst, 1, TGSI_CHAN_X);
1007 int pow_vec = ppc_allocate_vec_register(gen->f);
1010 ppc_vec_pow(gen->f, pow_vec, s0_vec, s1_vec);
1013 emit_store(gen, pow_vec, inst, chan, FALSE);
1016 ppc_release_vec_register(gen->f, pow_vec);
1018 release_src_vecs(gen);
1023 emit_xpd(struct gen_context *gen, struct tgsi_full_instruction *inst)
1030 zero_vec = ppc_allocate_vec_register(gen->f);
1031 ppc_vzero(gen->f, zero_vec);
1033 tmp_vec = ppc_allocate_vec_register(gen->f);
1034 tmp2_vec = ppc_allocate_vec_register(gen->f);
1038 x0_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_X);
1039 x1_vec = get_src_vec(gen, inst, 1, TGSI_CHAN_X);
1043 y0_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_Y);
1044 y1_vec = get_src_vec(gen, inst, 1, TGSI_CHAN_Y);
1048 z0_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_Z);
1049 z1_vec = get_src_vec(gen, inst, 1, TGSI_CHAN_Z);
1054 ppc_vmaddfp(gen->f, tmp_vec, y0_vec, z1_vec, zero_vec);
1056 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, z0_vec, y1_vec);
1057 emit_store(gen, tmp_vec, inst, TGSI_CHAN_X, FALSE);
1061 gen->f, tmp_vec, z0_vec, x1_vec, zero_vec);
1063 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, x0_vec, z1_vec);
1064 emit_store(gen, tmp_vec, inst, TGSI_CHAN_Y, FALSE);
1068 ppc_vmaddfp(gen->f, tmp_vec, x0_vec, y1_vec, zero_vec);
1070 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, y0_vec, x1_vec);
1071 emit_store(gen, tmp_vec, inst, TGSI_CHAN_Z, FALSE);
1075 ppc_release_vec_register(gen->f, tmp_vec);
1076 ppc_release_vec_register(gen->f, zero_vec);
1077 release_src_vecs(gen);
1081 emit_instruction(struct gen_context *gen,
1100 emit_unaryop(gen, inst);
1104 emit_scalar_unaryop(gen, inst);
1111 emit_binop(gen, inst);
1119 emit_inequality(gen, inst);
1123 emit_triop(gen, inst);
1128 emit_dotprod(gen, inst);
1131 emit_lit(gen, inst);
1134 emit_log(gen, inst);
1137 emit_exp(gen, inst);
1140 emit_pow(gen, inst);
1143 emit_xpd(gen, inst);
1260 struct gen_context gen;
1277 init_gen_context(&gen, func);
1300 ok = emit_instruction(&gen, &parse.FullToken.FullInstruction);