Home | History | Annotate | Download | only in rtasm
      1 /**************************************************************************
      2  *
      3  * Copyright (C) 2008 Tungsten Graphics, Inc.   All Rights Reserved.
      4  * Copyright (C) 2009 VMware, Inc.  All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included
     14  * in all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
     20  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  **************************************************************************/
     24 
     25 /**
     26  * PPC code generation.
     27  * For reference, see http://www.power.org/resources/reading/PowerISA_V2.05.pdf
     28  * ABI info: http://www.cs.utsa.edu/~whaley/teach/cs6463FHPO/LEC/lec12_ho.pdf
     29  *
     30  * Other PPC refs:
     31  * http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2
     32  * http://www.ibm.com/developerworks/eserver/library/es-archguide-v2.html
     33  * http://www.freescale.com/files/product/doc/MPCFPE32B.pdf
     34  *
     35  * \author Brian Paul
     36  */
     37 
     38 
     39 #include <stdio.h>
     40 #include "util/u_memory.h"
     41 #include "util/u_debug.h"
     42 #include "rtasm_execmem.h"
     43 #include "rtasm_ppc.h"
     44 
     45 
     46 void
     47 ppc_init_func(struct ppc_function *p)
     48 {
     49    uint i;
     50 
     51    memset(p, 0, sizeof(*p));
     52 
     53    p->num_inst = 0;
     54    p->max_inst = 100; /* first guess at buffer size */
     55    p->store = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
     56    p->reg_used = 0x0;
     57    p->fp_used = 0x0;
     58    p->vec_used = 0x0;
     59 
     60    p->print = FALSE;
     61    p->indent = 0;
     62 
     63    /* only allow using gp registers 3..12 for now */
     64    for (i = 0; i < 3; i++)
     65       ppc_reserve_register(p, i);
     66    for (i = 12; i < PPC_NUM_REGS; i++)
     67       ppc_reserve_register(p, i);
     68 }
     69 
     70 
     71 void
     72 ppc_release_func(struct ppc_function *p)
     73 {
     74    assert(p->num_inst <= p->max_inst);
     75    if (p->store != NULL) {
     76       rtasm_exec_free(p->store);
     77    }
     78    p->store = NULL;
     79 }
     80 
     81 
     82 uint
     83 ppc_num_instructions(const struct ppc_function *p)
     84 {
     85    return p->num_inst;
     86 }
     87 
     88 
     89 void (*ppc_get_func(struct ppc_function *p))(void)
     90 {
     91 #if 0
     92    DUMP_END();
     93    if (DISASSEM && p->store)
     94       debug_printf("disassemble %p %p\n", p->store, p->csr);
     95 
     96    if (p->store == p->error_overflow)
     97       return (void (*)(void)) NULL;
     98    else
     99 #endif
    100       return (void (*)(void)) pointer_to_func(p->store);
    101 }
    102 
    103 
    104 void
    105 ppc_dump_func(const struct ppc_function *p)
    106 {
    107    uint i;
    108    for (i = 0; i < p->num_inst; i++) {
    109       debug_printf("%3u: 0x%08x\n", i, p->store[i]);
    110    }
    111 }
    112 
    113 
    114 void
    115 ppc_print_code(struct ppc_function *p, boolean enable)
    116 {
    117    p->print = enable;
    118 }
    119 
    120 
    121 void
    122 ppc_indent(struct ppc_function *p, int spaces)
    123 {
    124    p->indent += spaces;
    125 }
    126 
    127 
    128 static void
    129 indent(const struct ppc_function *p)
    130 {
    131    int i;
    132    for (i = 0; i < p->indent; i++) {
    133       putchar(' ');
    134    }
    135 }
    136 
    137 
    138 void
    139 ppc_comment(struct ppc_function *p, int rel_indent, const char *s)
    140 {
    141    if (p->print) {
    142       p->indent += rel_indent;
    143       indent(p);
    144       p->indent -= rel_indent;
    145       printf("# %s\n", s);
    146    }
    147 }
    148 
    149 
    150 /**
    151  * Mark a register as being unavailable.
    152  */
    153 int
    154 ppc_reserve_register(struct ppc_function *p, int reg)
    155 {
    156    assert(reg < PPC_NUM_REGS);
    157    p->reg_used |= (1 << reg);
    158    return reg;
    159 }
    160 
    161 
    162 /**
    163  * Allocate a general purpose register.
    164  * \return register index or -1 if none left.
    165  */
    166 int
    167 ppc_allocate_register(struct ppc_function *p)
    168 {
    169    unsigned i;
    170    for (i = 0; i < PPC_NUM_REGS; i++) {
    171       const uint32_t mask = 1 << i;
    172       if ((p->reg_used & mask) == 0) {
    173          p->reg_used |= mask;
    174          return i;
    175       }
    176    }
    177    printf("OUT OF PPC registers!\n");
    178    return -1;
    179 }
    180 
    181 
    182 /**
    183  * Mark the given general purpose register as "unallocated".
    184  */
    185 void
    186 ppc_release_register(struct ppc_function *p, int reg)
    187 {
    188    assert(reg < PPC_NUM_REGS);
    189    assert(p->reg_used & (1 << reg));
    190    p->reg_used &= ~(1 << reg);
    191 }
    192 
    193 
    194 /**
    195  * Allocate a floating point register.
    196  * \return register index or -1 if none left.
    197  */
    198 int
    199 ppc_allocate_fp_register(struct ppc_function *p)
    200 {
    201    unsigned i;
    202    for (i = 0; i < PPC_NUM_FP_REGS; i++) {
    203       const uint32_t mask = 1 << i;
    204       if ((p->fp_used & mask) == 0) {
    205          p->fp_used |= mask;
    206          return i;
    207       }
    208    }
    209    printf("OUT OF PPC FP registers!\n");
    210    return -1;
    211 }
    212 
    213 
    214 /**
    215  * Mark the given floating point register as "unallocated".
    216  */
    217 void
    218 ppc_release_fp_register(struct ppc_function *p, int reg)
    219 {
    220    assert(reg < PPC_NUM_FP_REGS);
    221    assert(p->fp_used & (1 << reg));
    222    p->fp_used &= ~(1 << reg);
    223 }
    224 
    225 
    226 /**
    227  * Allocate a vector register.
    228  * \return register index or -1 if none left.
    229  */
    230 int
    231 ppc_allocate_vec_register(struct ppc_function *p)
    232 {
    233    unsigned i;
    234    for (i = 0; i < PPC_NUM_VEC_REGS; i++) {
    235       const uint32_t mask = 1 << i;
    236       if ((p->vec_used & mask) == 0) {
    237          p->vec_used |= mask;
    238          return i;
    239       }
    240    }
    241    printf("OUT OF PPC VEC registers!\n");
    242    return -1;
    243 }
    244 
    245 
    246 /**
    247  * Mark the given vector register as "unallocated".
    248  */
    249 void
    250 ppc_release_vec_register(struct ppc_function *p, int reg)
    251 {
    252    assert(reg < PPC_NUM_VEC_REGS);
    253    assert(p->vec_used & (1 << reg));
    254    p->vec_used &= ~(1 << reg);
    255 }
    256 
    257 
    258 /**
    259  * Append instruction to instruction buffer.  Grow buffer if out of room.
    260  */
    261 static void
    262 emit_instruction(struct ppc_function *p, uint32_t inst_bits)
    263 {
    264    if (!p->store)
    265       return;  /* out of memory, drop the instruction */
    266 
    267    if (p->num_inst == p->max_inst) {
    268       /* allocate larger buffer */
    269       uint32_t *newbuf;
    270       p->max_inst *= 2;  /* 2x larger */
    271       newbuf = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
    272       if (newbuf) {
    273          memcpy(newbuf, p->store, p->num_inst * PPC_INST_SIZE);
    274       }
    275       rtasm_exec_free(p->store);
    276       p->store = newbuf;
    277       if (!p->store) {
    278          /* out of memory */
    279          p->num_inst = 0;
    280          return;
    281       }
    282    }
    283 
    284    p->store[p->num_inst++] = inst_bits;
    285 }
    286 
    287 
    288 union vx_inst {
    289    uint32_t bits;
    290    struct {
    291       unsigned op:6;
    292       unsigned vD:5;
    293       unsigned vA:5;
    294       unsigned vB:5;
    295       unsigned op2:11;
    296    } inst;
    297 };
    298 
    299 static INLINE void
    300 emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB,
    301         const char *format, boolean transpose)
    302 {
    303    union vx_inst inst;
    304    inst.inst.op = 4;
    305    inst.inst.vD = vD;
    306    inst.inst.vA = vA;
    307    inst.inst.vB = vB;
    308    inst.inst.op2 = op2;
    309    emit_instruction(p, inst.bits);
    310    if (p->print) {
    311       indent(p);
    312       if (transpose)
    313          printf(format, vD, vB, vA);
    314       else
    315          printf(format, vD, vA, vB);
    316    }
    317 }
    318 
    319 
    320 union vxr_inst {
    321    uint32_t bits;
    322    struct {
    323       unsigned op:6;
    324       unsigned vD:5;
    325       unsigned vA:5;
    326       unsigned vB:5;
    327       unsigned rC:1;
    328       unsigned op2:10;
    329    } inst;
    330 };
    331 
    332 static INLINE void
    333 emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB,
    334          const char *format)
    335 {
    336    union vxr_inst inst;
    337    inst.inst.op = 4;
    338    inst.inst.vD = vD;
    339    inst.inst.vA = vA;
    340    inst.inst.vB = vB;
    341    inst.inst.rC = 0;
    342    inst.inst.op2 = op2;
    343    emit_instruction(p, inst.bits);
    344    if (p->print) {
    345       indent(p);
    346       printf(format, vD, vA, vB);
    347    }
    348 }
    349 
    350 
    351 union va_inst {
    352    uint32_t bits;
    353    struct {
    354       unsigned op:6;
    355       unsigned vD:5;
    356       unsigned vA:5;
    357       unsigned vB:5;
    358       unsigned vC:5;
    359       unsigned op2:6;
    360    } inst;
    361 };
    362 
    363 static INLINE void
    364 emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC,
    365         const char *format)
    366 {
    367    union va_inst inst;
    368    inst.inst.op = 4;
    369    inst.inst.vD = vD;
    370    inst.inst.vA = vA;
    371    inst.inst.vB = vB;
    372    inst.inst.vC = vC;
    373    inst.inst.op2 = op2;
    374    emit_instruction(p, inst.bits);
    375    if (p->print) {
    376       indent(p);
    377       printf(format, vD, vA, vB, vC);
    378    }
    379 }
    380 
    381 
    382 union i_inst {
    383    uint32_t bits;
    384    struct {
    385       unsigned op:6;
    386       unsigned li:24;
    387       unsigned aa:1;
    388       unsigned lk:1;
    389    } inst;
    390 };
    391 
    392 static INLINE void
    393 emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk)
    394 {
    395    union i_inst inst;
    396    inst.inst.op = op;
    397    inst.inst.li = li;
    398    inst.inst.aa = aa;
    399    inst.inst.lk = lk;
    400    emit_instruction(p, inst.bits);
    401 }
    402 
    403 
    404 union xl_inst {
    405    uint32_t bits;
    406    struct {
    407       unsigned op:6;
    408       unsigned bo:5;
    409       unsigned bi:5;
    410       unsigned unused:3;
    411       unsigned bh:2;
    412       unsigned op2:10;
    413       unsigned lk:1;
    414    } inst;
    415 };
    416 
    417 static INLINE void
    418 emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh,
    419         uint op2, uint lk)
    420 {
    421    union xl_inst inst;
    422    inst.inst.op = op;
    423    inst.inst.bo = bo;
    424    inst.inst.bi = bi;
    425    inst.inst.unused = 0x0;
    426    inst.inst.bh = bh;
    427    inst.inst.op2 = op2;
    428    inst.inst.lk = lk;
    429    emit_instruction(p, inst.bits);
    430 }
    431 
    432 static INLINE void
    433 dump_xl(const char *name, uint inst)
    434 {
    435    union xl_inst i;
    436 
    437    i.bits = inst;
    438    debug_printf("%s = 0x%08x\n", name, inst);
    439    debug_printf(" op: %d 0x%x\n", i.inst.op, i.inst.op);
    440    debug_printf(" bo: %d 0x%x\n", i.inst.bo, i.inst.bo);
    441    debug_printf(" bi: %d 0x%x\n", i.inst.bi, i.inst.bi);
    442    debug_printf(" unused: %d 0x%x\n", i.inst.unused, i.inst.unused);
    443    debug_printf(" bh: %d 0x%x\n", i.inst.bh, i.inst.bh);
    444    debug_printf(" op2: %d 0x%x\n", i.inst.op2, i.inst.op2);
    445    debug_printf(" lk: %d 0x%x\n", i.inst.lk, i.inst.lk);
    446 }
    447 
    448 
    449 union x_inst {
    450    uint32_t bits;
    451    struct {
    452       unsigned op:6;
    453       unsigned vrs:5;
    454       unsigned ra:5;
    455       unsigned rb:5;
    456       unsigned op2:10;
    457       unsigned unused:1;
    458    } inst;
    459 };
    460 
    461 static INLINE void
    462 emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2,
    463        const char *format)
    464 {
    465    union x_inst inst;
    466    inst.inst.op = op;
    467    inst.inst.vrs = vrs;
    468    inst.inst.ra = ra;
    469    inst.inst.rb = rb;
    470    inst.inst.op2 = op2;
    471    inst.inst.unused = 0x0;
    472    emit_instruction(p, inst.bits);
    473    if (p->print) {
    474       indent(p);
    475       printf(format, vrs, ra, rb);
    476    }
    477 }
    478 
    479 
    480 union d_inst {
    481    uint32_t bits;
    482    struct {
    483       unsigned op:6;
    484       unsigned rt:5;
    485       unsigned ra:5;
    486       unsigned si:16;
    487    } inst;
    488 };
    489 
    490 static INLINE void
    491 emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si,
    492        const char *format, boolean transpose)
    493 {
    494    union d_inst inst;
    495    assert(si >= -32768);
    496    assert(si <= 32767);
    497    inst.inst.op = op;
    498    inst.inst.rt = rt;
    499    inst.inst.ra = ra;
    500    inst.inst.si = (unsigned) (si & 0xffff);
    501    emit_instruction(p, inst.bits);
    502    if (p->print) {
    503       indent(p);
    504       if (transpose)
    505          printf(format, rt, si, ra);
    506       else
    507          printf(format, rt, ra, si);
    508    }
    509 }
    510 
    511 
    512 union a_inst {
    513    uint32_t bits;
    514    struct {
    515       unsigned op:6;
    516       unsigned frt:5;
    517       unsigned fra:5;
    518       unsigned frb:5;
    519       unsigned unused:5;
    520       unsigned op2:5;
    521       unsigned rc:1;
    522    } inst;
    523 };
    524 
    525 static INLINE void
    526 emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2,
    527        uint rc, const char *format)
    528 {
    529    union a_inst inst;
    530    inst.inst.op = op;
    531    inst.inst.frt = frt;
    532    inst.inst.fra = fra;
    533    inst.inst.frb = frb;
    534    inst.inst.unused = 0x0;
    535    inst.inst.op2 = op2;
    536    inst.inst.rc = rc;
    537    emit_instruction(p, inst.bits);
    538    if (p->print) {
    539       indent(p);
    540       printf(format, frt, fra, frb);
    541    }
    542 }
    543 
    544 
    545 union xo_inst {
    546    uint32_t bits;
    547    struct {
    548       unsigned op:6;
    549       unsigned rt:5;
    550       unsigned ra:5;
    551       unsigned rb:5;
    552       unsigned oe:1;
    553       unsigned op2:9;
    554       unsigned rc:1;
    555    } inst;
    556 };
    557 
    558 static INLINE void
    559 emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe,
    560         uint op2, uint rc, const char *format)
    561 {
    562    union xo_inst inst;
    563    inst.inst.op = op;
    564    inst.inst.rt = rt;
    565    inst.inst.ra = ra;
    566    inst.inst.rb = rb;
    567    inst.inst.oe = oe;
    568    inst.inst.op2 = op2;
    569    inst.inst.rc = rc;
    570    emit_instruction(p, inst.bits);
    571    if (p->print) {
    572       indent(p);
    573       printf(format, rt, ra, rb);
    574    }
    575 }
    576 
    577 
    578 
    579 
    580 
    581 /**
    582  ** float vector arithmetic
    583  **/
    584 
    585 /** vector float add */
    586 void
    587 ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB)
    588 {
    589    emit_vx(p, 10, vD, vA, vB, "vaddfp\t%u, v%u, v%u\n", FALSE);
    590 }
    591 
    592 /** vector float substract */
    593 void
    594 ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB)
    595 {
    596    emit_vx(p, 74, vD, vA, vB, "vsubfp\tv%u, v%u, v%u\n", FALSE);
    597 }
    598 
    599 /** vector float min */
    600 void
    601 ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB)
    602 {
    603    emit_vx(p, 1098, vD, vA, vB, "vminfp\tv%u, v%u, v%u\n", FALSE);
    604 }
    605 
    606 /** vector float max */
    607 void
    608 ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB)
    609 {
    610    emit_vx(p, 1034, vD, vA, vB, "vmaxfp\tv%u, v%u, v%u\n", FALSE);
    611 }
    612 
    613 /** vector float mult add: vD = vA * vB + vC */
    614 void
    615 ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
    616 {
    617    /* note arg order */
    618    emit_va(p, 46, vD, vA, vC, vB, "vmaddfp\tv%u, v%u, v%u, v%u\n");
    619 }
    620 
    621 /** vector float negative mult subtract: vD = vA - vB * vC */
    622 void
    623 ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
    624 {
    625    /* note arg order */
    626    emit_va(p, 47, vD, vB, vA, vC, "vnmsubfp\tv%u, v%u, v%u, v%u\n");
    627 }
    628 
    629 /** vector float compare greater than */
    630 void
    631 ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB)
    632 {
    633    emit_vxr(p, 710, vD, vA, vB, "vcmpgtfpx\tv%u, v%u, v%u");
    634 }
    635 
    636 /** vector float compare greater than or equal to */
    637 void
    638 ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB)
    639 {
    640    emit_vxr(p, 454, vD, vA, vB, "vcmpgefpx\tv%u, v%u, v%u");
    641 }
    642 
    643 /** vector float compare equal */
    644 void
    645 ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB)
    646 {
    647    emit_vxr(p, 198, vD, vA, vB, "vcmpeqfpx\tv%u, v%u, v%u");
    648 }
    649 
    650 /** vector float 2^x */
    651 void
    652 ppc_vexptefp(struct ppc_function *p, uint vD, uint vB)
    653 {
    654    emit_vx(p, 394, vD, 0, vB, "vexptefp\tv%u, 0%u, v%u\n", FALSE);
    655 }
    656 
    657 /** vector float log2(x) */
    658 void
    659 ppc_vlogefp(struct ppc_function *p, uint vD, uint vB)
    660 {
    661    emit_vx(p, 458, vD, 0, vB, "vlogefp\tv%u, 0%u, v%u\n", FALSE);
    662 }
    663 
    664 /** vector float reciprocol */
    665 void
    666 ppc_vrefp(struct ppc_function *p, uint vD, uint vB)
    667 {
    668    emit_vx(p, 266, vD, 0, vB, "vrefp\tv%u, 0%u, v%u\n", FALSE);
    669 }
    670 
    671 /** vector float reciprocol sqrt estimate */
    672 void
    673 ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB)
    674 {
    675    emit_vx(p, 330, vD, 0, vB, "vrsqrtefp\tv%u, 0%u, v%u\n", FALSE);
    676 }
    677 
    678 /** vector float round to negative infinity */
    679 void
    680 ppc_vrfim(struct ppc_function *p, uint vD, uint vB)
    681 {
    682    emit_vx(p, 714, vD, 0, vB, "vrfim\tv%u, 0%u, v%u\n", FALSE);
    683 }
    684 
    685 /** vector float round to positive infinity */
    686 void
    687 ppc_vrfip(struct ppc_function *p, uint vD, uint vB)
    688 {
    689    emit_vx(p, 650, vD, 0, vB, "vrfip\tv%u, 0%u, v%u\n", FALSE);
    690 }
    691 
    692 /** vector float round to nearest int */
    693 void
    694 ppc_vrfin(struct ppc_function *p, uint vD, uint vB)
    695 {
    696    emit_vx(p, 522, vD, 0, vB, "vrfin\tv%u, 0%u, v%u\n", FALSE);
    697 }
    698 
    699 /** vector float round to int toward zero */
    700 void
    701 ppc_vrfiz(struct ppc_function *p, uint vD, uint vB)
    702 {
    703    emit_vx(p, 586, vD, 0, vB, "vrfiz\tv%u, 0%u, v%u\n", FALSE);
    704 }
    705 
    706 /** vector store: store vR at mem[rA+rB] */
    707 void
    708 ppc_stvx(struct ppc_function *p, uint vR, uint rA, uint rB)
    709 {
    710    emit_x(p, 31, vR, rA, rB, 231, "stvx\tv%u, r%u, r%u\n");
    711 }
    712 
    713 /** vector load: vR = mem[rA+rB] */
    714 void
    715 ppc_lvx(struct ppc_function *p, uint vR, uint rA, uint rB)
    716 {
    717    emit_x(p, 31, vR, rA, rB, 103, "lvx\tv%u, r%u, r%u\n");
    718 }
    719 
    720 /** load vector element word: vR = mem_word[ra+rb] */
    721 void
    722 ppc_lvewx(struct ppc_function *p, uint vR, uint rA, uint rB)
    723 {
    724    emit_x(p, 31, vR, rA, rB, 71, "lvewx\tv%u, r%u, r%u\n");
    725 }
    726 
    727 
    728 
    729 
    730 /**
    731  ** vector bitwise operations
    732  **/
    733 
    734 /** vector and */
    735 void
    736 ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB)
    737 {
    738    emit_vx(p, 1028, vD, vA, vB, "vand\tv%u, v%u, v%u\n", FALSE);
    739 }
    740 
    741 /** vector and complement */
    742 void
    743 ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB)
    744 {
    745    emit_vx(p, 1092, vD, vA, vB, "vandc\tv%u, v%u, v%u\n", FALSE);
    746 }
    747 
    748 /** vector or */
    749 void
    750 ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB)
    751 {
    752    emit_vx(p, 1156, vD, vA, vB, "vor\tv%u, v%u, v%u\n", FALSE);
    753 }
    754 
    755 /** vector nor */
    756 void
    757 ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB)
    758 {
    759    emit_vx(p, 1284, vD, vA, vB, "vnor\tv%u, v%u, v%u\n", FALSE);
    760 }
    761 
    762 /** vector xor */
    763 void
    764 ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB)
    765 {
    766    emit_vx(p, 1220, vD, vA, vB, "vxor\tv%u, v%u, v%u\n", FALSE);
    767 }
    768 
    769 /** Pseudo-instruction: vector move */
    770 void
    771 ppc_vmove(struct ppc_function *p, uint vD, uint vA)
    772 {
    773    boolean print = p->print;
    774    p->print = FALSE;
    775    ppc_vor(p, vD, vA, vA);
    776    if (print) {
    777       indent(p);
    778       printf("vor\tv%u, v%u, v%u \t# v%u = v%u\n", vD, vA, vA, vD, vA);
    779    }
    780    p->print = print;
    781 }
    782 
    783 /** Set vector register to {0,0,0,0} */
    784 void
    785 ppc_vzero(struct ppc_function *p, uint vr)
    786 {
    787    boolean print = p->print;
    788    p->print = FALSE;
    789    ppc_vxor(p, vr, vr, vr);
    790    if (print) {
    791       indent(p);
    792       printf("vxor\tv%u, v%u, v%u \t# v%u = {0,0,0,0}\n", vr, vr, vr, vr);
    793    }
    794    p->print = print;
    795 }
    796 
    797 
    798 
    799 
    800 /**
    801  ** Vector shuffle / select / splat / etc
    802  **/
    803 
    804 /** vector permute */
    805 void
    806 ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
    807 {
    808    emit_va(p, 43, vD, vA, vB, vC, "vperm\tr%u, r%u, r%u, r%u");
    809 }
    810 
    811 /** vector select */
    812 void
    813 ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
    814 {
    815    emit_va(p, 42, vD, vA, vB, vC, "vsel\tr%u, r%u, r%u, r%u");
    816 }
    817 
    818 /** vector splat byte */
    819 void
    820 ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm)
    821 {
    822    emit_vx(p, 42, vD, imm, vB, "vspltb\tv%u, v%u, %u\n", TRUE);
    823 }
    824 
    825 /** vector splat half word */
    826 void
    827 ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm)
    828 {
    829    emit_vx(p, 588, vD, imm, vB, "vsplthw\tv%u, v%u, %u\n", TRUE);
    830 }
    831 
    832 /** vector splat word */
    833 void
    834 ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm)
    835 {
    836    emit_vx(p, 652, vD, imm, vB, "vspltw\tv%u, v%u, %u\n", TRUE);
    837 }
    838 
    839 /** vector splat signed immediate word */
    840 void
    841 ppc_vspltisw(struct ppc_function *p, uint vD, int imm)
    842 {
    843    assert(imm >= -16);
    844    assert(imm < 15);
    845    emit_vx(p, 908, vD, imm, 0, "vspltisw\tv%u, %d, %u\n", FALSE);
    846 }
    847 
    848 /** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */
    849 void
    850 ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB)
    851 {
    852    emit_vx(p, 388, vD, vA, vB, "vslw\tv%u, v%u, v%u\n", FALSE);
    853 }
    854 
    855 
    856 
    857 
    858 /**
    859  ** integer arithmetic
    860  **/
    861 
    862 /** rt = ra + imm */
    863 void
    864 ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm)
    865 {
    866    emit_d(p, 14, rt, ra, imm, "addi\tr%u, r%u, %d\n", FALSE);
    867 }
    868 
    869 /** rt = ra + (imm << 16) */
    870 void
    871 ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm)
    872 {
    873    emit_d(p, 15, rt, ra, imm, "addis\tr%u, r%u, %d\n", FALSE);
    874 }
    875 
    876 /** rt = ra + rb */
    877 void
    878 ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb)
    879 {
    880    emit_xo(p, 31, rt, ra, rb, 0, 266, 0, "add\tr%u, r%u, r%u\n");
    881 }
    882 
    883 /** rt = ra AND ra */
    884 void
    885 ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb)
    886 {
    887    emit_x(p, 31, ra, rt, rb, 28, "and\tr%u, r%u, r%u\n");  /* note argument order */
    888 }
    889 
    890 /** rt = ra AND imm */
    891 void
    892 ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm)
    893 {
    894    /* note argument order */
    895    emit_d(p, 28, ra, rt, imm, "andi\tr%u, r%u, %d\n", FALSE);
    896 }
    897 
    898 /** rt = ra OR ra */
    899 void
    900 ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb)
    901 {
    902    emit_x(p, 31, ra, rt, rb, 444, "or\tr%u, r%u, r%u\n");  /* note argument order */
    903 }
    904 
    905 /** rt = ra OR imm */
    906 void
    907 ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm)
    908 {
    909    /* note argument order */
    910    emit_d(p, 24, ra, rt, imm, "ori\tr%u, r%u, %d\n", FALSE);
    911 }
    912 
    913 /** rt = ra XOR ra */
    914 void
    915 ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb)
    916 {
    917    emit_x(p, 31, ra, rt, rb, 316, "xor\tr%u, r%u, r%u\n");  /* note argument order */
    918 }
    919 
    920 /** rt = ra XOR imm */
    921 void
    922 ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm)
    923 {
    924    /* note argument order */
    925    emit_d(p, 26, ra, rt, imm, "xori\tr%u, r%u, %d\n", FALSE);
    926 }
    927 
    928 /** pseudo instruction: move: rt = ra */
    929 void
    930 ppc_mr(struct ppc_function *p, uint rt, uint ra)
    931 {
    932    ppc_or(p, rt, ra, ra);
    933 }
    934 
    935 /** pseudo instruction: load immediate: rt = imm */
    936 void
    937 ppc_li(struct ppc_function *p, uint rt, int imm)
    938 {
    939    boolean print = p->print;
    940    p->print = FALSE;
    941    ppc_addi(p, rt, 0, imm);
    942    if (print) {
    943       indent(p);
    944       printf("addi\tr%u, r0, %d \t# r%u = %d\n", rt, imm, rt, imm);
    945    }
    946    p->print = print;
    947 }
    948 
    949 /** rt = imm << 16 */
    950 void
    951 ppc_lis(struct ppc_function *p, uint rt, int imm)
    952 {
    953    ppc_addis(p, rt, 0, imm);
    954 }
    955 
    956 /** rt = imm */
    957 void
    958 ppc_load_int(struct ppc_function *p, uint rt, int imm)
    959 {
    960    ppc_lis(p, rt, (imm >> 16));          /* rt = imm >> 16 */
    961    ppc_ori(p, rt, rt, (imm & 0xffff));   /* rt = rt | (imm & 0xffff) */
    962 }
    963 
    964 
    965 
    966 
    967 /**
    968  ** integer load/store
    969  **/
    970 
    971 /** store rs at memory[(ra)+d],
    972  * then update ra = (ra)+d
    973  */
    974 void
    975 ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d)
    976 {
    977    emit_d(p, 37, rs, ra, d, "stwu\tr%u, %d(r%u)\n", TRUE);
    978 }
    979 
    980 /** store rs at memory[(ra)+d] */
    981 void
    982 ppc_stw(struct ppc_function *p, uint rs, uint ra, int d)
    983 {
    984    emit_d(p, 36, rs, ra, d, "stw\tr%u, %d(r%u)\n", TRUE);
    985 }
    986 
    987 /** Load rt = mem[(ra)+d];  then zero set high 32 bits to zero. */
    988 void
    989 ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d)
    990 {
    991    emit_d(p, 32, rt, ra, d, "lwz\tr%u, %d(r%u)\n", TRUE);
    992 }
    993 
    994 
    995 
    996 /**
    997  ** Float (non-vector) arithmetic
    998  **/
    999 
   1000 /** add: frt = fra + frb */
   1001 void
   1002 ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb)
   1003 {
   1004    emit_a(p, 63, frt, fra, frb, 21, 0, "fadd\tf%u, f%u, f%u\n");
   1005 }
   1006 
   1007 /** sub: frt = fra - frb */
   1008 void
   1009 ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb)
   1010 {
   1011    emit_a(p, 63, frt, fra, frb, 20, 0, "fsub\tf%u, f%u, f%u\n");
   1012 }
   1013 
   1014 /** convert to int: rt = (int) ra */
   1015 void
   1016 ppc_fctiwz(struct ppc_function *p, uint rt, uint fra)
   1017 {
   1018    emit_x(p, 63, rt, 0, fra, 15, "fctiwz\tr%u, r%u, r%u\n");
   1019 }
   1020 
   1021 /** store frs at mem[(ra)+offset] */
   1022 void
   1023 ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset)
   1024 {
   1025    emit_d(p, 52, frs, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE);
   1026 }
   1027 
   1028 /** store frs at mem[(ra)+(rb)] */
   1029 void
   1030 ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb)
   1031 {
   1032    emit_x(p, 31, frs, ra, rb, 983, "stfiwx\tr%u, r%u, r%u\n");
   1033 }
   1034 
   1035 /** load frt = mem[(ra)+offset] */
   1036 void
   1037 ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset)
   1038 {
   1039    emit_d(p, 48, frt, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE);
   1040 }
   1041 
   1042 
   1043 
   1044 
   1045 
   1046 /**
   1047  ** branch instructions
   1048  **/
   1049 
   1050 /** BLR: Branch to link register (p. 35) */
   1051 void
   1052 ppc_blr(struct ppc_function *p)
   1053 {
   1054    emit_i(p, 18, 0, 0, 1);
   1055    if (p->print) {
   1056       indent(p);
   1057       printf("blr\n");
   1058    }
   1059 }
   1060 
   1061 /** Branch Conditional to Link Register (p. 36) */
   1062 void
   1063 ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg)
   1064 {
   1065    emit_xl(p, 19, condOp, condReg, branchHint, 16, 0);
   1066    if (p->print) {
   1067       indent(p);
   1068       printf("bclr\t%u %u %u\n", condOp, branchHint, condReg);
   1069    }
   1070 }
   1071 
   1072 /** Pseudo instruction: return from subroutine */
   1073 void
   1074 ppc_return(struct ppc_function *p)
   1075 {
   1076    ppc_bclr(p, BRANCH_COND_ALWAYS, BRANCH_HINT_SUB_RETURN, 0);
   1077 }
   1078