Home | History | Annotate | Download | only in swr
      1 /****************************************************************************
      2  * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  ***************************************************************************/
     23 
     24 // llvm redefines DEBUG
     25 #pragma push_macro("DEBUG")
     26 #undef DEBUG
     27 #include "JitManager.h"
     28 #include "llvm-c/Core.h"
     29 #include "llvm/Support/CBindingWrapping.h"
     30 #pragma pop_macro("DEBUG")
     31 
     32 #include "state.h"
     33 #include "state_llvm.h"
     34 #include "builder.h"
     35 
     36 #include "tgsi/tgsi_strings.h"
     37 #include "util/u_format.h"
     38 #include "gallivm/lp_bld_init.h"
     39 #include "gallivm/lp_bld_flow.h"
     40 #include "gallivm/lp_bld_struct.h"
     41 #include "gallivm/lp_bld_tgsi.h"
     42 
     43 #include "swr_context.h"
     44 #include "swr_context_llvm.h"
     45 #include "swr_resource.h"
     46 #include "swr_state.h"
     47 #include "swr_screen.h"
     48 
     49 using namespace SwrJit;
     50 
     51 static unsigned
     52 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
     53 
     54 bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs)
     55 {
     56    return !memcmp(&lhs, &rhs, sizeof(lhs));
     57 }
     58 
     59 bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs)
     60 {
     61    return !memcmp(&lhs, &rhs, sizeof(lhs));
     62 }
     63 
     64 static void
     65 swr_generate_sampler_key(const struct lp_tgsi_info &info,
     66                          struct swr_context *ctx,
     67                          unsigned shader_type,
     68                          struct swr_jit_sampler_key &key)
     69 {
     70    key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1;
     71 
     72    for (unsigned i = 0; i < key.nr_samplers; i++) {
     73       if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
     74          lp_sampler_static_sampler_state(
     75             &key.sampler[i].sampler_state,
     76             ctx->samplers[shader_type][i]);
     77       }
     78    }
     79 
     80    /*
     81     * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
     82     * are dx10-style? Can't really have mixed opcodes, at least not
     83     * if we want to skip the holes here (without rescanning tgsi).
     84     */
     85    if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
     86       key.nr_sampler_views =
     87          info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
     88       for (unsigned i = 0; i < key.nr_sampler_views; i++) {
     89          if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) {
     90             const struct pipe_sampler_view *view =
     91                ctx->sampler_views[shader_type][i];
     92             lp_sampler_static_texture_state(
     93                &key.sampler[i].texture_state, view);
     94             if (view) {
     95                struct swr_resource *swr_res = swr_resource(view->texture);
     96                const struct util_format_description *desc =
     97                   util_format_description(view->format);
     98                if (swr_res->has_depth && swr_res->has_stencil &&
     99                    !util_format_has_depth(desc))
    100                   key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
    101             }
    102          }
    103       }
    104    } else {
    105       key.nr_sampler_views = key.nr_samplers;
    106       for (unsigned i = 0; i < key.nr_sampler_views; i++) {
    107          if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
    108             const struct pipe_sampler_view *view =
    109                ctx->sampler_views[shader_type][i];
    110             lp_sampler_static_texture_state(
    111                &key.sampler[i].texture_state, view);
    112             if (view) {
    113                struct swr_resource *swr_res = swr_resource(view->texture);
    114                const struct util_format_description *desc =
    115                   util_format_description(view->format);
    116                if (swr_res->has_depth && swr_res->has_stencil &&
    117                    !util_format_has_depth(desc))
    118                   key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
    119             }
    120          }
    121       }
    122    }
    123 }
    124 
    125 void
    126 swr_generate_fs_key(struct swr_jit_fs_key &key,
    127                     struct swr_context *ctx,
    128                     swr_fragment_shader *swr_fs)
    129 {
    130    memset(&key, 0, sizeof(key));
    131 
    132    key.nr_cbufs = ctx->framebuffer.nr_cbufs;
    133    key.light_twoside = ctx->rasterizer->light_twoside;
    134    key.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable;
    135    memcpy(&key.vs_output_semantic_name,
    136           &ctx->vs->info.base.output_semantic_name,
    137           sizeof(key.vs_output_semantic_name));
    138    memcpy(&key.vs_output_semantic_idx,
    139           &ctx->vs->info.base.output_semantic_index,
    140           sizeof(key.vs_output_semantic_idx));
    141 
    142    swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key);
    143 }
    144 
    145 void
    146 swr_generate_vs_key(struct swr_jit_vs_key &key,
    147                     struct swr_context *ctx,
    148                     swr_vertex_shader *swr_vs)
    149 {
    150    memset(&key, 0, sizeof(key));
    151 
    152    key.clip_plane_mask =
    153       swr_vs->info.base.clipdist_writemask ?
    154       swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
    155       ctx->rasterizer->clip_plane_enable;
    156 
    157    swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
    158 }
    159 
    160 struct BuilderSWR : public Builder {
    161    BuilderSWR(JitManager *pJitMgr, const char *pName)
    162       : Builder(pJitMgr)
    163    {
    164       pJitMgr->SetupNewModule();
    165       gallivm = gallivm_create(pName, wrap(&JM()->mContext));
    166       pJitMgr->mpCurrentModule = unwrap(gallivm->module);
    167    }
    168 
    169    ~BuilderSWR() {
    170       gallivm_free_ir(gallivm);
    171    }
    172 
    173    struct gallivm_state *gallivm;
    174    PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key);
    175    PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key);
    176 };
    177 
    178 PFN_VERTEX_FUNC
    179 BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
    180 {
    181    struct swr_vertex_shader *swr_vs = ctx->vs;
    182 
    183    LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
    184    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
    185 
    186    memset(outputs, 0, sizeof(outputs));
    187 
    188    AttrBuilder attrBuilder;
    189    attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
    190    AttributeSet attrSet = AttributeSet::get(
    191       JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
    192 
    193    std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
    194                               PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
    195    FunctionType *vsFuncType =
    196       FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false);
    197 
    198    // create new vertex shader function
    199    auto pFunction = Function::Create(vsFuncType,
    200                                      GlobalValue::ExternalLinkage,
    201                                      "VS",
    202                                      JM()->mpCurrentModule);
    203    pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
    204 
    205    BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
    206    IRB()->SetInsertPoint(block);
    207    LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
    208 
    209    auto argitr = pFunction->arg_begin();
    210    Value *hPrivateData = &*argitr++;
    211    hPrivateData->setName("hPrivateData");
    212    Value *pVsCtx = &*argitr++;
    213    pVsCtx->setName("vsCtx");
    214 
    215    Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)});
    216 
    217    consts_ptr->setName("vs_constants");
    218    Value *const_sizes_ptr =
    219       GEP(hPrivateData, {0, swr_draw_context_num_constantsVS});
    220    const_sizes_ptr->setName("num_vs_constants");
    221 
    222    Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin});
    223 
    224    for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
    225       const unsigned mask = swr_vs->info.base.input_usage_mask[attrib];
    226       for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
    227          if (mask & (1 << channel)) {
    228             inputs[attrib][channel] =
    229                wrap(LOAD(vtxInput, {0, 0, attrib, channel}));
    230          }
    231       }
    232    }
    233 
    234    struct lp_build_sampler_soa *sampler =
    235       swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX);
    236 
    237    struct lp_bld_tgsi_system_values system_values;
    238    memset(&system_values, 0, sizeof(system_values));
    239    system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID}));
    240    system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID}));
    241 
    242    lp_build_tgsi_soa(gallivm,
    243                      swr_vs->pipe.tokens,
    244                      lp_type_float_vec(32, 32 * 8),
    245                      NULL, // mask
    246                      wrap(consts_ptr),
    247                      wrap(const_sizes_ptr),
    248                      &system_values,
    249                      inputs,
    250                      outputs,
    251                      wrap(hPrivateData), // (sampler context)
    252                      NULL, // thread data
    253                      sampler, // sampler
    254                      &swr_vs->info.base,
    255                      NULL); // geometry shader face
    256 
    257    sampler->destroy(sampler);
    258 
    259    IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
    260 
    261    Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout});
    262 
    263    for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
    264       for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) {
    265          if (!outputs[attrib][channel])
    266             continue;
    267 
    268          Value *val = LOAD(unwrap(outputs[attrib][channel]));
    269 
    270          uint32_t outSlot = attrib;
    271          if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE)
    272             outSlot = VERTEX_POINT_SIZE_SLOT;
    273          STORE(val, vtxOutput, {0, 0, outSlot, channel});
    274       }
    275    }
    276 
    277    if (ctx->rasterizer->clip_plane_enable ||
    278        swr_vs->info.base.culldist_writemask) {
    279       unsigned clip_mask = ctx->rasterizer->clip_plane_enable;
    280 
    281       unsigned cv = 0;
    282       if (swr_vs->info.base.writes_clipvertex) {
    283          cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
    284                                  &swr_vs->info.base);
    285       } else {
    286          for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
    287             if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
    288                 swr_vs->info.base.output_semantic_index[i] == 0) {
    289                cv = i;
    290                break;
    291             }
    292          }
    293       }
    294       LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
    295       LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], "");
    296       LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], "");
    297       LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], "");
    298 
    299       for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) {
    300          // clip distance overrides user clip planes
    301          if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) ||
    302              ((swr_vs->info.base.culldist_writemask << swr_vs->info.base.num_written_clipdistance) & (1 << val))) {
    303             unsigned cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1,
    304                                              &swr_vs->info.base);
    305             if (val < 4) {
    306                LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], "");
    307                STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val});
    308             } else {
    309                LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val - 4], "");
    310                STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4});
    311             }
    312             continue;
    313          }
    314 
    315          if (!(clip_mask & (1 << val)))
    316             continue;
    317 
    318          Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0}));
    319          Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
    320          Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
    321          Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
    322          Value *dist = FADD(FMUL(unwrap(cx), VBROADCAST(px)),
    323                             FADD(FMUL(unwrap(cy), VBROADCAST(py)),
    324                                  FADD(FMUL(unwrap(cz), VBROADCAST(pz)),
    325                                       FMUL(unwrap(cw), VBROADCAST(pw)))));
    326 
    327          if (val < 4)
    328             STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val});
    329          else
    330             STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4});
    331       }
    332    }
    333 
    334    RET_VOID();
    335 
    336    gallivm_verify_function(gallivm, wrap(pFunction));
    337    gallivm_compile_module(gallivm);
    338 
    339    //   lp_debug_dump_value(func);
    340 
    341    PFN_VERTEX_FUNC pFunc =
    342       (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
    343 
    344    debug_printf("vert shader  %p\n", pFunc);
    345    assert(pFunc && "Error: VertShader = NULL");
    346 
    347    JM()->mIsModuleFinalized = true;
    348 
    349    return pFunc;
    350 }
    351 
    352 PFN_VERTEX_FUNC
    353 swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
    354 {
    355    BuilderSWR builder(
    356       reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
    357       "VS");
    358    PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key);
    359 
    360    ctx->vs->map.insert(std::make_pair(key, make_unique<VariantVS>(builder.gallivm, func)));
    361    return func;
    362 }
    363 
    364 static unsigned
    365 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
    366 {
    367    for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
    368       if ((info->output_semantic_name[i] == name)
    369           && (info->output_semantic_index[i] == index)) {
    370          return i - 1; // position is not part of the linkage
    371       }
    372    }
    373 
    374    return 0xFFFFFFFF;
    375 }
    376 
    377 PFN_PIXEL_KERNEL
    378 BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
    379 {
    380    struct swr_fragment_shader *swr_fs = ctx->fs;
    381 
    382    LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
    383    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
    384 
    385    memset(inputs, 0, sizeof(inputs));
    386    memset(outputs, 0, sizeof(outputs));
    387 
    388    struct lp_build_sampler_soa *sampler = NULL;
    389 
    390    AttrBuilder attrBuilder;
    391    attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
    392    AttributeSet attrSet = AttributeSet::get(
    393       JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
    394 
    395    std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
    396                               PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
    397    FunctionType *funcType =
    398       FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false);
    399 
    400    auto pFunction = Function::Create(funcType,
    401                                      GlobalValue::ExternalLinkage,
    402                                      "FS",
    403                                      JM()->mpCurrentModule);
    404    pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
    405 
    406    BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
    407    IRB()->SetInsertPoint(block);
    408    LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
    409 
    410    auto args = pFunction->arg_begin();
    411    Value *hPrivateData = &*args++;
    412    hPrivateData->setName("hPrivateData");
    413    Value *pPS = &*args++;
    414    pPS->setName("psCtx");
    415 
    416    Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS});
    417    consts_ptr->setName("fs_constants");
    418    Value *const_sizes_ptr =
    419       GEP(hPrivateData, {0, swr_draw_context_num_constantsFS});
    420    const_sizes_ptr->setName("num_fs_constants");
    421 
    422    // load *pAttribs, *pPerspAttribs
    423    Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs");
    424    Value *pPerspAttribs =
    425       LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs");
    426 
    427    swr_fs->constantMask = 0;
    428    swr_fs->flatConstantMask = 0;
    429    swr_fs->pointSpriteMask = 0;
    430 
    431    for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
    432       const unsigned mask = swr_fs->info.base.input_usage_mask[attrib];
    433       const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib];
    434       const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib];
    435 
    436       if (!mask)
    437          continue;
    438 
    439       // load i,j
    440       Value *vi = nullptr, *vj = nullptr;
    441       switch (interpLoc) {
    442       case TGSI_INTERPOLATE_LOC_CENTER:
    443          vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i");
    444          vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j");
    445          break;
    446       case TGSI_INTERPOLATE_LOC_CENTROID:
    447          vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i");
    448          vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j");
    449          break;
    450       case TGSI_INTERPOLATE_LOC_SAMPLE:
    451          vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i");
    452          vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j");
    453          break;
    454       }
    455 
    456       // load/compute w
    457       Value *vw = nullptr, *pAttribs;
    458       if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
    459           interpMode == TGSI_INTERPOLATE_COLOR) {
    460          pAttribs = pPerspAttribs;
    461          switch (interpLoc) {
    462          case TGSI_INTERPOLATE_LOC_CENTER:
    463             vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}));
    464             break;
    465          case TGSI_INTERPOLATE_LOC_CENTROID:
    466             vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid}));
    467             break;
    468          case TGSI_INTERPOLATE_LOC_SAMPLE:
    469             vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample}));
    470             break;
    471          }
    472       } else {
    473          pAttribs = pRawAttribs;
    474          vw = VIMMED1(1.f);
    475       }
    476 
    477       vw->setName("w");
    478 
    479       ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib];
    480       ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib];
    481 
    482       if (semantic_name == TGSI_SEMANTIC_FACE) {
    483          Value *ff =
    484             UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty);
    485          ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f));
    486          ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace");
    487 
    488          inputs[attrib][0] = wrap(ff);
    489          inputs[attrib][1] = wrap(VIMMED1(0.0f));
    490          inputs[attrib][2] = wrap(VIMMED1(0.0f));
    491          inputs[attrib][3] = wrap(VIMMED1(1.0f));
    492          continue;
    493       } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord
    494          if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
    495              TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER) {
    496             inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX"));
    497             inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY"));
    498          } else {
    499             inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL}, "vX"));
    500             inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL}, "vY"));
    501          }
    502          inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ"));
    503          inputs[attrib][3] =
    504             wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW"));
    505          continue;
    506       } else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
    507          Value *primID = LOAD(pPS, {0, SWR_PS_CONTEXT_primID}, "primID");
    508          inputs[attrib][0] = wrap(VECTOR_SPLAT(JM()->mVWidth, primID));
    509          inputs[attrib][1] = wrap(VIMMED1(0));
    510          inputs[attrib][2] = wrap(VIMMED1(0));
    511          inputs[attrib][3] = wrap(VIMMED1(0));
    512          continue;
    513       }
    514 
    515       unsigned linkedAttrib =
    516          locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
    517       if (semantic_name == TGSI_SEMANTIC_GENERIC &&
    518           key.sprite_coord_enable & (1 << semantic_idx)) {
    519          /* we add an extra attrib to the backendState in swr_update_derived. */
    520          linkedAttrib = ctx->vs->info.base.num_outputs - 1;
    521          swr_fs->pointSpriteMask |= (1 << linkedAttrib);
    522       } else if (linkedAttrib == 0xFFFFFFFF) {
    523          inputs[attrib][0] = wrap(VIMMED1(0.0f));
    524          inputs[attrib][1] = wrap(VIMMED1(0.0f));
    525          inputs[attrib][2] = wrap(VIMMED1(0.0f));
    526          inputs[attrib][3] = wrap(VIMMED1(1.0f));
    527          /* If we're reading in color and 2-sided lighting is enabled, we have
    528           * to keep going.
    529           */
    530          if (semantic_name != TGSI_SEMANTIC_COLOR || !key.light_twoside)
    531             continue;
    532       } else {
    533          if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
    534             swr_fs->constantMask |= 1 << linkedAttrib;
    535          } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
    536             swr_fs->flatConstantMask |= 1 << linkedAttrib;
    537          }
    538       }
    539 
    540       unsigned bcolorAttrib = 0xFFFFFFFF;
    541       Value *offset = NULL;
    542       if (semantic_name == TGSI_SEMANTIC_COLOR && key.light_twoside) {
    543          bcolorAttrib = locate_linkage(
    544                TGSI_SEMANTIC_BCOLOR, semantic_idx, &ctx->vs->info.base);
    545          /* Neither front nor back colors were available. Nothing to load. */
    546          if (bcolorAttrib == 0xFFFFFFFF && linkedAttrib == 0xFFFFFFFF)
    547             continue;
    548          /* If there is no front color, just always use the back color. */
    549          if (linkedAttrib == 0xFFFFFFFF)
    550             linkedAttrib = bcolorAttrib;
    551 
    552          if (bcolorAttrib != 0xFFFFFFFF) {
    553             if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
    554                swr_fs->constantMask |= 1 << bcolorAttrib;
    555             } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
    556                swr_fs->flatConstantMask |= 1 << bcolorAttrib;
    557             }
    558 
    559             unsigned diff = 12 * (bcolorAttrib - linkedAttrib);
    560 
    561             if (diff) {
    562                Value *back =
    563                   XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace");
    564 
    565                offset = MUL(back, C(diff));
    566                offset->setName("offset");
    567             }
    568          }
    569       }
    570 
    571       for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
    572          if (mask & (1 << channel)) {
    573             Value *indexA = C(linkedAttrib * 12 + channel);
    574             Value *indexB = C(linkedAttrib * 12 + channel + 4);
    575             Value *indexC = C(linkedAttrib * 12 + channel + 8);
    576 
    577             if (offset) {
    578                indexA = ADD(indexA, offset);
    579                indexB = ADD(indexB, offset);
    580                indexC = ADD(indexC, offset);
    581             }
    582 
    583             Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA)));
    584             Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB)));
    585             Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC)));
    586 
    587             if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
    588                inputs[attrib][channel] = wrap(va);
    589             } else {
    590                Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj);
    591 
    592                vc = FMUL(vk, vc);
    593 
    594                Value *interp = FMUL(va, vi);
    595                Value *interp1 = FMUL(vb, vj);
    596                interp = FADD(interp, interp1);
    597                interp = FADD(interp, vc);
    598                if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
    599                    interpMode == TGSI_INTERPOLATE_COLOR)
    600                   interp = FMUL(interp, vw);
    601                inputs[attrib][channel] = wrap(interp);
    602             }
    603          }
    604       }
    605    }
    606 
    607    sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT);
    608 
    609    struct lp_bld_tgsi_system_values system_values;
    610    memset(&system_values, 0, sizeof(system_values));
    611 
    612    struct lp_build_mask_context mask;
    613 
    614    if (swr_fs->info.base.uses_kill) {
    615       Value *mask_val = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask");
    616       lp_build_mask_begin(
    617          &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(mask_val));
    618    }
    619 
    620    lp_build_tgsi_soa(gallivm,
    621                      swr_fs->pipe.tokens,
    622                      lp_type_float_vec(32, 32 * 8),
    623                      swr_fs->info.base.uses_kill ? &mask : NULL, // mask
    624                      wrap(consts_ptr),
    625                      wrap(const_sizes_ptr),
    626                      &system_values,
    627                      inputs,
    628                      outputs,
    629                      wrap(hPrivateData),
    630                      NULL, // thread data
    631                      sampler, // sampler
    632                      &swr_fs->info.base,
    633                      NULL); // geometry shader face
    634 
    635    sampler->destroy(sampler);
    636 
    637    IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
    638 
    639    for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs;
    640         attrib++) {
    641       switch (swr_fs->info.base.output_semantic_name[attrib]) {
    642       case TGSI_SEMANTIC_POSITION: {
    643          // write z
    644          LLVMValueRef outZ =
    645             LLVMBuildLoad(gallivm->builder, outputs[attrib][2], "");
    646          STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ});
    647          break;
    648       }
    649       case TGSI_SEMANTIC_COLOR: {
    650          for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
    651             if (!outputs[attrib][channel])
    652                continue;
    653 
    654             LLVMValueRef out =
    655                LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], "");
    656             if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
    657                 swr_fs->info.base.output_semantic_index[attrib] == 0) {
    658                for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) {
    659                   STORE(unwrap(out),
    660                         pPS,
    661                         {0, SWR_PS_CONTEXT_shaded, rt, channel});
    662                }
    663             } else {
    664                STORE(unwrap(out),
    665                      pPS,
    666                      {0,
    667                            SWR_PS_CONTEXT_shaded,
    668                            swr_fs->info.base.output_semantic_index[attrib],
    669                            channel});
    670             }
    671          }
    672          break;
    673       }
    674       default: {
    675          fprintf(stderr,
    676                  "unknown output from FS %s[%d]\n",
    677                  tgsi_semantic_names[swr_fs->info.base
    678                                         .output_semantic_name[attrib]],
    679                  swr_fs->info.base.output_semantic_index[attrib]);
    680          break;
    681       }
    682       }
    683    }
    684 
    685    LLVMValueRef mask_result = 0;
    686    if (swr_fs->info.base.uses_kill) {
    687       mask_result = lp_build_mask_end(&mask);
    688    }
    689 
    690    IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
    691 
    692    if (swr_fs->info.base.uses_kill) {
    693       STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask});
    694    }
    695 
    696    RET_VOID();
    697 
    698    gallivm_verify_function(gallivm, wrap(pFunction));
    699 
    700    gallivm_compile_module(gallivm);
    701 
    702    PFN_PIXEL_KERNEL kernel =
    703       (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction));
    704    debug_printf("frag shader  %p\n", kernel);
    705    assert(kernel && "Error: FragShader = NULL");
    706 
    707    JM()->mIsModuleFinalized = true;
    708 
    709    return kernel;
    710 }
    711 
    712 PFN_PIXEL_KERNEL
    713 swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key)
    714 {
    715    BuilderSWR builder(
    716       reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
    717       "FS");
    718    PFN_PIXEL_KERNEL func = builder.CompileFS(ctx, key);
    719 
    720    ctx->fs->map.insert(std::make_pair(key, make_unique<VariantFS>(builder.gallivm, func)));
    721    return func;
    722 }
    723