Home | History | Annotate | Download | only in state_tracker
      1 /*
      2  * Copyright  2016 Red Hat
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     21  * SOFTWARE.
     22  */
     23 
     24 #include <stdbool.h>
     25 
     26 #include "st_tgsi_lower_yuv.h"
     27 #include "tgsi/tgsi_transform.h"
     28 #include "tgsi/tgsi_scan.h"
     29 #include "tgsi/tgsi_dump.h"
     30 #include "util/u_debug.h"
     31 
     32 #include "util/bitscan.h"
     33 
     34 struct tgsi_yuv_transform {
     35    struct tgsi_transform_context base;
     36    struct tgsi_shader_info info;
     37    struct tgsi_full_src_register imm[4];
     38    struct {
     39       struct tgsi_full_src_register src;
     40       struct tgsi_full_dst_register dst;
     41    } tmp[2];
     42 #define A 0
     43 #define B 1
     44 
     45    /* Maps a primary sampler (used for Y) to the U or UV sampler.  In
     46     * case of 3-plane YUV format, the V plane is next sampler after U.
     47     */
     48    unsigned char sampler_map[PIPE_MAX_SAMPLERS][2];
     49 
     50    bool first_instruction_emitted;
     51    unsigned free_slots;
     52    unsigned lower_nv12;
     53    unsigned lower_iyuv;
     54 };
     55 
     56 static inline struct tgsi_yuv_transform *
     57 tgsi_yuv_transform(struct tgsi_transform_context *tctx)
     58 {
     59    return (struct tgsi_yuv_transform *)tctx;
     60 }
     61 
     62 static void
     63 reg_dst(struct tgsi_full_dst_register *dst,
     64         const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
     65 {
     66    *dst = *orig_dst;
     67    dst->Register.WriteMask &= wrmask;
     68    assert(dst->Register.WriteMask);
     69 }
     70 
     71 static inline void
     72 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
     73 {
     74    swiz[0] = src->SwizzleX;
     75    swiz[1] = src->SwizzleY;
     76    swiz[2] = src->SwizzleZ;
     77    swiz[3] = src->SwizzleW;
     78 }
     79 
     80 static void
     81 reg_src(struct tgsi_full_src_register *src,
     82         const struct tgsi_full_src_register *orig_src,
     83         unsigned sx, unsigned sy, unsigned sz, unsigned sw)
     84 {
     85    unsigned swiz[4];
     86    get_swiz(swiz, &orig_src->Register);
     87    *src = *orig_src;
     88    src->Register.SwizzleX = swiz[sx];
     89    src->Register.SwizzleY = swiz[sy];
     90    src->Register.SwizzleZ = swiz[sz];
     91    src->Register.SwizzleW = swiz[sw];
     92 }
     93 
     94 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
     95 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
     96       TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
     97 
     98 static inline struct tgsi_full_instruction
     99 tex_instruction(unsigned samp)
    100 {
    101    struct tgsi_full_instruction inst;
    102 
    103    inst = tgsi_default_full_instruction();
    104    inst.Instruction.Opcode = TGSI_OPCODE_TEX;
    105    inst.Instruction.Texture = 1;
    106    inst.Texture.Texture = TGSI_TEXTURE_2D;
    107    inst.Instruction.NumDstRegs = 1;
    108    inst.Instruction.NumSrcRegs = 2;
    109    inst.Src[1].Register.File  = TGSI_FILE_SAMPLER;
    110    inst.Src[1].Register.Index = samp;
    111 
    112    return inst;
    113 }
    114 
    115 static inline struct tgsi_full_instruction
    116 mov_instruction(void)
    117 {
    118    struct tgsi_full_instruction inst;
    119 
    120    inst = tgsi_default_full_instruction();
    121    inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    122    inst.Instruction.Saturate = 0;
    123    inst.Instruction.NumDstRegs = 1;
    124    inst.Instruction.NumSrcRegs = 1;
    125 
    126    return inst;
    127 }
    128 
    129 static inline struct tgsi_full_instruction
    130 dp3_instruction(void)
    131 {
    132    struct tgsi_full_instruction inst;
    133 
    134    inst = tgsi_default_full_instruction();
    135    inst.Instruction.Opcode = TGSI_OPCODE_DP3;
    136    inst.Instruction.NumDstRegs = 1;
    137    inst.Instruction.NumSrcRegs = 2;
    138 
    139    return inst;
    140 }
    141 
    142 
    143 
    144 static void
    145 emit_immed(struct tgsi_transform_context *tctx, int idx,
    146            float x, float y, float z, float w)
    147 {
    148    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
    149    struct tgsi_shader_info *info = &ctx->info;
    150    struct tgsi_full_immediate immed;
    151 
    152    immed = tgsi_default_full_immediate();
    153    immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
    154    immed.u[0].Float = x;
    155    immed.u[1].Float = y;
    156    immed.u[2].Float = z;
    157    immed.u[3].Float = w;
    158    tctx->emit_immediate(tctx, &immed);
    159 
    160    ctx->imm[idx].Register.File = TGSI_FILE_IMMEDIATE;
    161    ctx->imm[idx].Register.Index = info->immediate_count + idx;
    162    ctx->imm[idx].Register.SwizzleX = TGSI_SWIZZLE_X;
    163    ctx->imm[idx].Register.SwizzleY = TGSI_SWIZZLE_Y;
    164    ctx->imm[idx].Register.SwizzleZ = TGSI_SWIZZLE_Z;
    165    ctx->imm[idx].Register.SwizzleW = TGSI_SWIZZLE_W;
    166 }
    167 
    168 static void
    169 emit_samp(struct tgsi_transform_context *tctx, unsigned samp)
    170 {
    171    tgsi_transform_sampler_decl(tctx, samp);
    172    tgsi_transform_sampler_view_decl(tctx, samp, PIPE_TEXTURE_2D,
    173                                     TGSI_RETURN_TYPE_FLOAT);
    174 }
    175 
    176 /* Emit extra declarations we need:
    177  *  + 2 TEMP to hold intermediate results
    178  *  + 1 (for 2-plane YUV) or 2 (for 3-plane YUV) extra samplers per
    179  *    lowered YUV sampler
    180  *  + extra immediates for doing CSC
    181  */
    182 static void
    183 emit_decls(struct tgsi_transform_context *tctx)
    184 {
    185    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
    186    struct tgsi_shader_info *info = &ctx->info;
    187    unsigned mask, tempbase, i;
    188    struct tgsi_full_declaration decl;
    189 
    190    /*
    191     * Declare immediates for CSC conversion:
    192     */
    193 
    194    /* ITU-R BT.601 conversion */
    195    emit_immed(tctx, 0, 1.164,  0.000,  1.596,  0.0);
    196    emit_immed(tctx, 1, 1.164, -0.392, -0.813,  0.0);
    197    emit_immed(tctx, 2, 1.164,  2.017,  0.000,  0.0);
    198    emit_immed(tctx, 3, 0.0625, 0.500,  0.500,  1.0);
    199 
    200    /*
    201     * Declare extra samplers / sampler-views:
    202     */
    203 
    204    mask = ctx->lower_nv12 | ctx->lower_iyuv;
    205    while (mask) {
    206       unsigned extra, y_samp = u_bit_scan(&mask);
    207 
    208       extra = u_bit_scan(&ctx->free_slots);
    209       ctx->sampler_map[y_samp][0] = extra;
    210       emit_samp(tctx, extra);
    211 
    212       if (ctx->lower_iyuv & (1 << y_samp)) {
    213          extra = u_bit_scan(&ctx->free_slots);
    214          ctx->sampler_map[y_samp][1] = extra;
    215          emit_samp(tctx, extra);
    216       }
    217    }
    218 
    219    /*
    220     * Declare extra temp:
    221     */
    222 
    223    tempbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
    224 
    225    for (i = 0; i < 2; i++) {
    226       decl = tgsi_default_full_declaration();
    227       decl.Declaration.File = TGSI_FILE_TEMPORARY;
    228       decl.Range.First = decl.Range.Last = tempbase + i;
    229       tctx->emit_declaration(tctx, &decl);
    230 
    231       ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
    232       ctx->tmp[i].src.Register.Index = tempbase + i;
    233       ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
    234       ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
    235       ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
    236       ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
    237 
    238       ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
    239       ctx->tmp[i].dst.Register.Index = tempbase + i;
    240       ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
    241    }
    242 }
    243 
    244 /* call with YUV in tmpA.xyz */
    245 static void
    246 yuv_to_rgb(struct tgsi_transform_context *tctx,
    247            struct tgsi_full_dst_register *dst)
    248 {
    249    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
    250    struct tgsi_full_instruction inst;
    251 
    252    /*
    253     * IMM[0] FLT32 { 1.164,  0.000,  1.596,  0.0 }
    254     * IMM[1] FLT32 { 1.164, -0.392, -0.813,  0.0 }
    255     * IMM[2] FLT32 { 1.164,  2.017,  0.000,  0.0 }
    256     * IMM[3] FLT32 { 0.0625, 0.500,  0.500,  1.0 }
    257     */
    258 
    259    /* SUB tmpA.xyz, tmpA, imm[3] */
    260    inst = tgsi_default_full_instruction();
    261    inst.Instruction.Opcode = TGSI_OPCODE_ADD;
    262    inst.Instruction.Saturate = 0;
    263    inst.Instruction.NumDstRegs = 1;
    264    inst.Instruction.NumSrcRegs = 2;
    265    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
    266    reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
    267    reg_src(&inst.Src[1], &ctx->imm[3], SWIZ(X, Y, Z, _));
    268    inst.Src[1].Register.Negate = 1;
    269    tctx->emit_instruction(tctx, &inst);
    270 
    271    /* DP3 dst.x, tmpA, imm[0] */
    272    inst = dp3_instruction();
    273    reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X);
    274    reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
    275    reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W));
    276    tctx->emit_instruction(tctx, &inst);
    277 
    278    /* DP3 dst.y, tmpA, imm[1] */
    279    inst = dp3_instruction();
    280    reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y);
    281    reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
    282    reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W));
    283    tctx->emit_instruction(tctx, &inst);
    284 
    285    /* DP3 dst.z, tmpA, imm[2] */
    286    inst = dp3_instruction();
    287    reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z);
    288    reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
    289    reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W));
    290    tctx->emit_instruction(tctx, &inst);
    291 
    292    /* MOV dst.w, imm[0].x */
    293    inst = mov_instruction();
    294    reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W);
    295    reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W));
    296    tctx->emit_instruction(tctx, &inst);
    297 }
    298 
    299 static void
    300 lower_nv12(struct tgsi_transform_context *tctx,
    301            struct tgsi_full_instruction *originst)
    302 {
    303    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
    304    struct tgsi_full_instruction inst;
    305    struct tgsi_full_src_register *coord = &originst->Src[0];
    306    unsigned samp = originst->Src[1].Register.Index;
    307 
    308    /* sample Y:
    309     *    TEX tempA.x, coord, texture[samp], 2D;
    310     */
    311    inst = tex_instruction(samp);
    312    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    313    reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
    314    tctx->emit_instruction(tctx, &inst);
    315 
    316    /* sample UV:
    317     *    TEX tempB.xy, coord, texture[sampler_map[samp][0]], 2D;
    318     *    MOV tempA.yz, tempB._xy_
    319     */
    320    inst = tex_instruction(ctx->sampler_map[samp][0]);
    321    reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XY);
    322    reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
    323    tctx->emit_instruction(tctx, &inst);
    324 
    325    inst = mov_instruction();
    326    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_YZ);
    327    reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, Y, _));
    328    tctx->emit_instruction(tctx, &inst);
    329 
    330    /* At this point, we have YUV in tempA.xyz, rest is common: */
    331    yuv_to_rgb(tctx, &originst->Dst[0]);
    332 }
    333 
    334 static void
    335 lower_iyuv(struct tgsi_transform_context *tctx,
    336            struct tgsi_full_instruction *originst)
    337 {
    338    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
    339    struct tgsi_full_instruction inst;
    340    struct tgsi_full_src_register *coord = &originst->Src[0];
    341    unsigned samp = originst->Src[1].Register.Index;
    342 
    343    /* sample Y:
    344     *    TEX tempA.x, coord, texture[samp], 2D;
    345     */
    346    inst = tex_instruction(samp);
    347    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    348    reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
    349    tctx->emit_instruction(tctx, &inst);
    350 
    351    /* sample U:
    352     *    TEX tempB.x, coord, texture[sampler_map[samp][0]], 2D;
    353     *    MOV tempA.y, tempB._x__
    354     */
    355    inst = tex_instruction(ctx->sampler_map[samp][0]);
    356    reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
    357    reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
    358    tctx->emit_instruction(tctx, &inst);
    359 
    360    inst = mov_instruction();
    361    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    362    reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, _, _));
    363    tctx->emit_instruction(tctx, &inst);
    364 
    365    /* sample V:
    366     *    TEX tempB.x, coord, texture[sampler_map[samp][1]], 2D;
    367     *    MOV tempA.z, tempB.__x_
    368     */
    369    inst = tex_instruction(ctx->sampler_map[samp][1]);
    370    reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
    371    reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
    372    tctx->emit_instruction(tctx, &inst);
    373 
    374    inst = mov_instruction();
    375    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
    376    reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, _, X, _));
    377    tctx->emit_instruction(tctx, &inst);
    378 
    379    /* At this point, we have YUV in tempA.xyz, rest is common: */
    380    yuv_to_rgb(tctx, &originst->Dst[0]);
    381 }
    382 
    383 static void
    384 transform_instr(struct tgsi_transform_context *tctx,
    385                 struct tgsi_full_instruction *inst)
    386 {
    387    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
    388 
    389    if (!ctx->first_instruction_emitted) {
    390       emit_decls(tctx);
    391       ctx->first_instruction_emitted = true;
    392    }
    393 
    394    switch (inst->Instruction.Opcode) {
    395    /* TODO what other tex opcode's can be used w/ external eglimgs? */
    396    case TGSI_OPCODE_TEX: {
    397       unsigned samp = inst->Src[1].Register.Index;
    398       if (ctx->lower_nv12 & (1 << samp)) {
    399          lower_nv12(tctx, inst);
    400       } else if (ctx->lower_iyuv & (1 << samp)) {
    401          lower_iyuv(tctx, inst);
    402       } else {
    403          goto skip;
    404       }
    405       break;
    406    }
    407    default:
    408    skip:
    409       tctx->emit_instruction(tctx, inst);
    410       return;
    411    }
    412 }
    413 
    414 extern const struct tgsi_token *
    415 st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots,
    416                   unsigned lower_nv12, unsigned lower_iyuv)
    417 {
    418    struct tgsi_yuv_transform ctx;
    419    struct tgsi_token *newtoks;
    420    int newlen;
    421 
    422    assert(!(lower_nv12 & lower_iyuv)); /* bitmasks should be mutually exclusive */
    423 
    424 //   tgsi_dump(tokens, 0);
    425 //   debug_printf("\n");
    426 
    427    memset(&ctx, 0, sizeof(ctx));
    428    ctx.base.transform_instruction = transform_instr;
    429    ctx.free_slots = free_slots;
    430    ctx.lower_nv12 = lower_nv12;
    431    ctx.lower_iyuv = lower_iyuv;
    432    tgsi_scan_shader(tokens, &ctx.info);
    433 
    434    /* TODO better job of figuring out how many extra tokens we need..
    435     * this is a pain about tgsi_transform :-/
    436     */
    437    newlen = tgsi_num_tokens(tokens) + 120;
    438    newtoks = tgsi_alloc_tokens(newlen);
    439    if (!newtoks)
    440       return NULL;
    441 
    442    tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
    443 
    444 //   tgsi_dump(newtoks, 0);
    445 //   debug_printf("\n");
    446 
    447    return newtoks;
    448 }
    449