Home | History | Annotate | Download | only in llvmpipe
      1 /**************************************************************************
      2  *
      3  * Copyright 2009 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 
     29 /**
     30  * @file
     31  * Unit tests for blend LLVM IR generation
     32  *
     33  * @author Jose Fonseca <jfonseca (at) vmware.com>
     34  *
     35  * Blend computation code derived from code written by
     36  * @author Brian Paul <brian (at) vmware.com>
     37  */
     38 
     39 #include "util/u_memory.h"
     40 
     41 #include "gallivm/lp_bld_init.h"
     42 #include "gallivm/lp_bld_type.h"
     43 #include "gallivm/lp_bld_debug.h"
     44 #include "lp_bld_blend.h"
     45 #include "lp_test.h"
     46 
     47 
     48 enum vector_mode
     49 {
     50    AoS = 0,
     51    SoA = 1
     52 };
     53 
     54 
     55 typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
     56 
     57 
     58 void
     59 write_tsv_header(FILE *fp)
     60 {
     61    fprintf(fp,
     62            "result\t"
     63            "cycles_per_channel\t"
     64            "mode\t"
     65            "type\t"
     66            "sep_func\t"
     67            "sep_src_factor\t"
     68            "sep_dst_factor\t"
     69            "rgb_func\t"
     70            "rgb_src_factor\t"
     71            "rgb_dst_factor\t"
     72            "alpha_func\t"
     73            "alpha_src_factor\t"
     74            "alpha_dst_factor\n");
     75 
     76    fflush(fp);
     77 }
     78 
     79 
     80 static void
     81 write_tsv_row(FILE *fp,
     82               const struct pipe_blend_state *blend,
     83               enum vector_mode mode,
     84               struct lp_type type,
     85               double cycles,
     86               boolean success)
     87 {
     88    fprintf(fp, "%s\t", success ? "pass" : "fail");
     89 
     90    if (mode == AoS) {
     91       fprintf(fp, "%.1f\t", cycles / type.length);
     92       fprintf(fp, "aos\t");
     93    }
     94 
     95    if (mode == SoA) {
     96       fprintf(fp, "%.1f\t", cycles / (4 * type.length));
     97       fprintf(fp, "soa\t");
     98    }
     99 
    100    fprintf(fp, "%s%u%sx%u\t",
    101            type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
    102            type.width,
    103            type.norm ? "n" : "",
    104            type.length);
    105 
    106    fprintf(fp,
    107            "%s\t%s\t%s\t",
    108            blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false",
    109            blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false",
    110            blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false");
    111 
    112    fprintf(fp,
    113            "%s\t%s\t%s\t%s\t%s\t%s\n",
    114            util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
    115            util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
    116            util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
    117            util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
    118            util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
    119            util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
    120 
    121    fflush(fp);
    122 }
    123 
    124 
    125 static void
    126 dump_blend_type(FILE *fp,
    127                 const struct pipe_blend_state *blend,
    128                 enum vector_mode mode,
    129                 struct lp_type type)
    130 {
    131    fprintf(fp, "%s", mode ? "soa" : "aos");
    132 
    133    fprintf(fp, " type=%s%u%sx%u",
    134            type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
    135            type.width,
    136            type.norm ? "n" : "",
    137            type.length);
    138 
    139    fprintf(fp,
    140            " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
    141            "rgb_func",         util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
    142            "rgb_src_factor",   util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
    143            "rgb_dst_factor",   util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
    144            "alpha_func",       util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
    145            "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
    146            "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
    147 
    148    fprintf(fp, " ...\n");
    149    fflush(fp);
    150 }
    151 
    152 
    153 static LLVMValueRef
    154 add_blend_test(struct gallivm_state *gallivm,
    155                const struct pipe_blend_state *blend,
    156                enum vector_mode mode,
    157                struct lp_type type)
    158 {
    159    LLVMModuleRef module = gallivm->module;
    160    LLVMContextRef context = gallivm->context;
    161    LLVMTypeRef vec_type;
    162    LLVMTypeRef args[4];
    163    LLVMValueRef func;
    164    LLVMValueRef src_ptr;
    165    LLVMValueRef dst_ptr;
    166    LLVMValueRef const_ptr;
    167    LLVMValueRef res_ptr;
    168    LLVMBasicBlockRef block;
    169    LLVMBuilderRef builder;
    170    const enum pipe_format format = PIPE_FORMAT_R8G8B8A8_UNORM;
    171    const unsigned rt = 0;
    172    const unsigned char swizzle[4] = { 0, 1, 2, 3 };
    173 
    174    vec_type = lp_build_vec_type(gallivm, type);
    175 
    176    args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
    177    func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidTypeInContext(context), args, 4, 0));
    178    LLVMSetFunctionCallConv(func, LLVMCCallConv);
    179    src_ptr = LLVMGetParam(func, 0);
    180    dst_ptr = LLVMGetParam(func, 1);
    181    const_ptr = LLVMGetParam(func, 2);
    182    res_ptr = LLVMGetParam(func, 3);
    183 
    184    block = LLVMAppendBasicBlockInContext(context, func, "entry");
    185    builder = gallivm->builder;
    186    LLVMPositionBuilderAtEnd(builder, block);
    187 
    188    if (mode == AoS) {
    189       LLVMValueRef src;
    190       LLVMValueRef dst;
    191       LLVMValueRef con;
    192       LLVMValueRef res;
    193 
    194       src = LLVMBuildLoad(builder, src_ptr, "src");
    195       dst = LLVMBuildLoad(builder, dst_ptr, "dst");
    196       con = LLVMBuildLoad(builder, const_ptr, "const");
    197 
    198       res = lp_build_blend_aos(gallivm, blend, &format, type, rt, src, dst, NULL, con, swizzle);
    199 
    200       lp_build_name(res, "res");
    201 
    202       LLVMBuildStore(builder, res, res_ptr);
    203    }
    204 
    205    if (mode == SoA) {
    206       LLVMValueRef src[4];
    207       LLVMValueRef dst[4];
    208       LLVMValueRef con[4];
    209       LLVMValueRef res[4];
    210       unsigned i;
    211 
    212       for(i = 0; i < 4; ++i) {
    213          LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
    214          src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
    215          dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
    216          con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
    217          lp_build_name(src[i], "src.%c", "rgba"[i]);
    218          lp_build_name(con[i], "con.%c", "rgba"[i]);
    219          lp_build_name(dst[i], "dst.%c", "rgba"[i]);
    220       }
    221 
    222       lp_build_blend_soa(gallivm, blend, type, rt, src, dst, con, res);
    223 
    224       for(i = 0; i < 4; ++i) {
    225          LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
    226          lp_build_name(res[i], "res.%c", "rgba"[i]);
    227          LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
    228       }
    229    }
    230 
    231    LLVMBuildRetVoid(builder);;
    232 
    233    return func;
    234 }
    235 
    236 
    237 static void
    238 compute_blend_ref_term(unsigned rgb_factor,
    239                        unsigned alpha_factor,
    240                        const double *factor,
    241                        const double *src,
    242                        const double *dst,
    243                        const double *con,
    244                        double *term)
    245 {
    246    double temp;
    247 
    248    switch (rgb_factor) {
    249    case PIPE_BLENDFACTOR_ONE:
    250       term[0] = factor[0]; /* R */
    251       term[1] = factor[1]; /* G */
    252       term[2] = factor[2]; /* B */
    253       break;
    254    case PIPE_BLENDFACTOR_SRC_COLOR:
    255       term[0] = factor[0] * src[0]; /* R */
    256       term[1] = factor[1] * src[1]; /* G */
    257       term[2] = factor[2] * src[2]; /* B */
    258       break;
    259    case PIPE_BLENDFACTOR_SRC_ALPHA:
    260       term[0] = factor[0] * src[3]; /* R */
    261       term[1] = factor[1] * src[3]; /* G */
    262       term[2] = factor[2] * src[3]; /* B */
    263       break;
    264    case PIPE_BLENDFACTOR_DST_COLOR:
    265       term[0] = factor[0] * dst[0]; /* R */
    266       term[1] = factor[1] * dst[1]; /* G */
    267       term[2] = factor[2] * dst[2]; /* B */
    268       break;
    269    case PIPE_BLENDFACTOR_DST_ALPHA:
    270       term[0] = factor[0] * dst[3]; /* R */
    271       term[1] = factor[1] * dst[3]; /* G */
    272       term[2] = factor[2] * dst[3]; /* B */
    273       break;
    274    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
    275       temp = MIN2(src[3], 1.0f - dst[3]);
    276       term[0] = factor[0] * temp; /* R */
    277       term[1] = factor[1] * temp; /* G */
    278       term[2] = factor[2] * temp; /* B */
    279       break;
    280    case PIPE_BLENDFACTOR_CONST_COLOR:
    281       term[0] = factor[0] * con[0]; /* R */
    282       term[1] = factor[1] * con[1]; /* G */
    283       term[2] = factor[2] * con[2]; /* B */
    284       break;
    285    case PIPE_BLENDFACTOR_CONST_ALPHA:
    286       term[0] = factor[0] * con[3]; /* R */
    287       term[1] = factor[1] * con[3]; /* G */
    288       term[2] = factor[2] * con[3]; /* B */
    289       break;
    290    case PIPE_BLENDFACTOR_SRC1_COLOR:
    291       assert(0); /* to do */
    292       break;
    293    case PIPE_BLENDFACTOR_SRC1_ALPHA:
    294       assert(0); /* to do */
    295       break;
    296    case PIPE_BLENDFACTOR_ZERO:
    297       term[0] = 0.0f; /* R */
    298       term[1] = 0.0f; /* G */
    299       term[2] = 0.0f; /* B */
    300       break;
    301    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
    302       term[0] = factor[0] * (1.0f - src[0]); /* R */
    303       term[1] = factor[1] * (1.0f - src[1]); /* G */
    304       term[2] = factor[2] * (1.0f - src[2]); /* B */
    305       break;
    306    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
    307       term[0] = factor[0] * (1.0f - src[3]); /* R */
    308       term[1] = factor[1] * (1.0f - src[3]); /* G */
    309       term[2] = factor[2] * (1.0f - src[3]); /* B */
    310       break;
    311    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
    312       term[0] = factor[0] * (1.0f - dst[3]); /* R */
    313       term[1] = factor[1] * (1.0f - dst[3]); /* G */
    314       term[2] = factor[2] * (1.0f - dst[3]); /* B */
    315       break;
    316    case PIPE_BLENDFACTOR_INV_DST_COLOR:
    317       term[0] = factor[0] * (1.0f - dst[0]); /* R */
    318       term[1] = factor[1] * (1.0f - dst[1]); /* G */
    319       term[2] = factor[2] * (1.0f - dst[2]); /* B */
    320       break;
    321    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
    322       term[0] = factor[0] * (1.0f - con[0]); /* R */
    323       term[1] = factor[1] * (1.0f - con[1]); /* G */
    324       term[2] = factor[2] * (1.0f - con[2]); /* B */
    325       break;
    326    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
    327       term[0] = factor[0] * (1.0f - con[3]); /* R */
    328       term[1] = factor[1] * (1.0f - con[3]); /* G */
    329       term[2] = factor[2] * (1.0f - con[3]); /* B */
    330       break;
    331    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
    332       assert(0); /* to do */
    333       break;
    334    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
    335       assert(0); /* to do */
    336       break;
    337    default:
    338       assert(0);
    339    }
    340 
    341    /*
    342     * Compute src/first term A
    343     */
    344    switch (alpha_factor) {
    345    case PIPE_BLENDFACTOR_ONE:
    346       term[3] = factor[3]; /* A */
    347       break;
    348    case PIPE_BLENDFACTOR_SRC_COLOR:
    349    case PIPE_BLENDFACTOR_SRC_ALPHA:
    350       term[3] = factor[3] * src[3]; /* A */
    351       break;
    352    case PIPE_BLENDFACTOR_DST_COLOR:
    353    case PIPE_BLENDFACTOR_DST_ALPHA:
    354       term[3] = factor[3] * dst[3]; /* A */
    355       break;
    356    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
    357       term[3] = src[3]; /* A */
    358       break;
    359    case PIPE_BLENDFACTOR_CONST_COLOR:
    360    case PIPE_BLENDFACTOR_CONST_ALPHA:
    361       term[3] = factor[3] * con[3]; /* A */
    362       break;
    363    case PIPE_BLENDFACTOR_ZERO:
    364       term[3] = 0.0f; /* A */
    365       break;
    366    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
    367    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
    368       term[3] = factor[3] * (1.0f - src[3]); /* A */
    369       break;
    370    case PIPE_BLENDFACTOR_INV_DST_COLOR:
    371    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
    372       term[3] = factor[3] * (1.0f - dst[3]); /* A */
    373       break;
    374    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
    375    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
    376       term[3] = factor[3] * (1.0f - con[3]);
    377       break;
    378    default:
    379       assert(0);
    380    }
    381 }
    382 
    383 
    384 static void
    385 compute_blend_ref(const struct pipe_blend_state *blend,
    386                   const double *src,
    387                   const double *dst,
    388                   const double *con,
    389                   double *res)
    390 {
    391    double src_term[4];
    392    double dst_term[4];
    393 
    394    compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor,
    395                           src, src, dst, con, src_term);
    396    compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor,
    397                           dst, src, dst, con, dst_term);
    398 
    399    /*
    400     * Combine RGB terms
    401     */
    402    switch (blend->rt[0].rgb_func) {
    403    case PIPE_BLEND_ADD:
    404       res[0] = src_term[0] + dst_term[0]; /* R */
    405       res[1] = src_term[1] + dst_term[1]; /* G */
    406       res[2] = src_term[2] + dst_term[2]; /* B */
    407       break;
    408    case PIPE_BLEND_SUBTRACT:
    409       res[0] = src_term[0] - dst_term[0]; /* R */
    410       res[1] = src_term[1] - dst_term[1]; /* G */
    411       res[2] = src_term[2] - dst_term[2]; /* B */
    412       break;
    413    case PIPE_BLEND_REVERSE_SUBTRACT:
    414       res[0] = dst_term[0] - src_term[0]; /* R */
    415       res[1] = dst_term[1] - src_term[1]; /* G */
    416       res[2] = dst_term[2] - src_term[2]; /* B */
    417       break;
    418    case PIPE_BLEND_MIN:
    419       res[0] = MIN2(src_term[0], dst_term[0]); /* R */
    420       res[1] = MIN2(src_term[1], dst_term[1]); /* G */
    421       res[2] = MIN2(src_term[2], dst_term[2]); /* B */
    422       break;
    423    case PIPE_BLEND_MAX:
    424       res[0] = MAX2(src_term[0], dst_term[0]); /* R */
    425       res[1] = MAX2(src_term[1], dst_term[1]); /* G */
    426       res[2] = MAX2(src_term[2], dst_term[2]); /* B */
    427       break;
    428    default:
    429       assert(0);
    430    }
    431 
    432    /*
    433     * Combine A terms
    434     */
    435    switch (blend->rt[0].alpha_func) {
    436    case PIPE_BLEND_ADD:
    437       res[3] = src_term[3] + dst_term[3]; /* A */
    438       break;
    439    case PIPE_BLEND_SUBTRACT:
    440       res[3] = src_term[3] - dst_term[3]; /* A */
    441       break;
    442    case PIPE_BLEND_REVERSE_SUBTRACT:
    443       res[3] = dst_term[3] - src_term[3]; /* A */
    444       break;
    445    case PIPE_BLEND_MIN:
    446       res[3] = MIN2(src_term[3], dst_term[3]); /* A */
    447       break;
    448    case PIPE_BLEND_MAX:
    449       res[3] = MAX2(src_term[3], dst_term[3]); /* A */
    450       break;
    451    default:
    452       assert(0);
    453    }
    454 }
    455 
    456 
    457 PIPE_ALIGN_STACK
    458 static boolean
    459 test_one(unsigned verbose,
    460          FILE *fp,
    461          const struct pipe_blend_state *blend,
    462          enum vector_mode mode,
    463          struct lp_type type)
    464 {
    465    struct gallivm_state *gallivm;
    466    LLVMValueRef func = NULL;
    467    blend_test_ptr_t blend_test_ptr;
    468    boolean success;
    469    const unsigned n = LP_TEST_NUM_SAMPLES;
    470    int64_t cycles[LP_TEST_NUM_SAMPLES];
    471    double cycles_avg = 0.0;
    472    unsigned i, j;
    473    const unsigned stride = lp_type_width(type)/8;
    474 
    475    if(verbose >= 1)
    476       dump_blend_type(stdout, blend, mode, type);
    477 
    478    gallivm = gallivm_create();
    479 
    480    func = add_blend_test(gallivm, blend, mode, type);
    481 
    482    gallivm_compile_module(gallivm);
    483 
    484    blend_test_ptr = (blend_test_ptr_t)gallivm_jit_function(gallivm, func);
    485 
    486    success = TRUE;
    487    if(mode == AoS) {
    488       uint8_t *src, *dst, *con, *res, *ref;
    489       src = align_malloc(stride, stride);
    490       dst = align_malloc(stride, stride);
    491       con = align_malloc(stride, stride);
    492       res = align_malloc(stride, stride);
    493       ref = align_malloc(stride, stride);
    494 
    495       for(i = 0; i < n && success; ++i) {
    496          int64_t start_counter = 0;
    497          int64_t end_counter = 0;
    498 
    499          random_vec(type, src);
    500          random_vec(type, dst);
    501          random_vec(type, con);
    502 
    503          {
    504             double fsrc[LP_MAX_VECTOR_LENGTH];
    505             double fdst[LP_MAX_VECTOR_LENGTH];
    506             double fcon[LP_MAX_VECTOR_LENGTH];
    507             double fref[LP_MAX_VECTOR_LENGTH];
    508 
    509             read_vec(type, src, fsrc);
    510             read_vec(type, dst, fdst);
    511             read_vec(type, con, fcon);
    512 
    513             for(j = 0; j < type.length; j += 4)
    514                compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
    515 
    516             write_vec(type, ref, fref);
    517          }
    518 
    519          start_counter = rdtsc();
    520          blend_test_ptr(src, dst, con, res);
    521          end_counter = rdtsc();
    522 
    523          cycles[i] = end_counter - start_counter;
    524 
    525          if(!compare_vec(type, res, ref)) {
    526             success = FALSE;
    527 
    528             if(verbose < 1)
    529                dump_blend_type(stderr, blend, mode, type);
    530             fprintf(stderr, "MISMATCH\n");
    531 
    532             fprintf(stderr, "  Src: ");
    533             dump_vec(stderr, type, src);
    534             fprintf(stderr, "\n");
    535 
    536             fprintf(stderr, "  Dst: ");
    537             dump_vec(stderr, type, dst);
    538             fprintf(stderr, "\n");
    539 
    540             fprintf(stderr, "  Con: ");
    541             dump_vec(stderr, type, con);
    542             fprintf(stderr, "\n");
    543 
    544             fprintf(stderr, "  Res: ");
    545             dump_vec(stderr, type, res);
    546             fprintf(stderr, "\n");
    547 
    548             fprintf(stderr, "  Ref: ");
    549             dump_vec(stderr, type, ref);
    550             fprintf(stderr, "\n");
    551          }
    552       }
    553       align_free(src);
    554       align_free(dst);
    555       align_free(con);
    556       align_free(res);
    557       align_free(ref);
    558    }
    559    else if(mode == SoA) {
    560       uint8_t *src, *dst, *con, *res, *ref;
    561       src = align_malloc(4*stride, stride);
    562       dst = align_malloc(4*stride, stride);
    563       con = align_malloc(4*stride, stride);
    564       res = align_malloc(4*stride, stride);
    565       ref = align_malloc(4*stride, stride);
    566 
    567       for(i = 0; i < n && success; ++i) {
    568          int64_t start_counter = 0;
    569          int64_t end_counter = 0;
    570          boolean mismatch;
    571 
    572          for(j = 0; j < 4; ++j) {
    573             random_vec(type, src + j*stride);
    574             random_vec(type, dst + j*stride);
    575             random_vec(type, con + j*stride);
    576          }
    577 
    578          {
    579             double fsrc[4];
    580             double fdst[4];
    581             double fcon[4];
    582             double fref[4];
    583             unsigned k;
    584 
    585             for(k = 0; k < type.length; ++k) {
    586                for(j = 0; j < 4; ++j) {
    587                   fsrc[j] = read_elem(type, src + j*stride, k);
    588                   fdst[j] = read_elem(type, dst + j*stride, k);
    589                   fcon[j] = read_elem(type, con + j*stride, k);
    590                }
    591 
    592                compute_blend_ref(blend, fsrc, fdst, fcon, fref);
    593 
    594                for(j = 0; j < 4; ++j)
    595                   write_elem(type, ref + j*stride, k, fref[j]);
    596             }
    597          }
    598 
    599          start_counter = rdtsc();
    600          blend_test_ptr(src, dst, con, res);
    601          end_counter = rdtsc();
    602 
    603          cycles[i] = end_counter - start_counter;
    604 
    605          mismatch = FALSE;
    606          for (j = 0; j < 4; ++j)
    607             if(!compare_vec(type, res + j*stride, ref + j*stride))
    608                mismatch = TRUE;
    609 
    610          if (mismatch) {
    611             success = FALSE;
    612 
    613             if(verbose < 1)
    614                dump_blend_type(stderr, blend, mode, type);
    615             fprintf(stderr, "MISMATCH\n");
    616             for(j = 0; j < 4; ++j) {
    617                char channel = "RGBA"[j];
    618                fprintf(stderr, "  Src%c: ", channel);
    619                dump_vec(stderr, type, src + j*stride);
    620                fprintf(stderr, "\n");
    621 
    622                fprintf(stderr, "  Dst%c: ", channel);
    623                dump_vec(stderr, type, dst + j*stride);
    624                fprintf(stderr, "\n");
    625 
    626                fprintf(stderr, "  Con%c: ", channel);
    627                dump_vec(stderr, type, con + j*stride);
    628                fprintf(stderr, "\n");
    629 
    630                fprintf(stderr, "  Res%c: ", channel);
    631                dump_vec(stderr, type, res + j*stride);
    632                fprintf(stderr, "\n");
    633 
    634                fprintf(stderr, "  Ref%c: ", channel);
    635                dump_vec(stderr, type, ref + j*stride);
    636                fprintf(stderr, "\n");
    637 
    638                fprintf(stderr, "\n");
    639             }
    640          }
    641       }
    642       align_free(src);
    643       align_free(dst);
    644       align_free(con);
    645       align_free(res);
    646       align_free(ref);
    647    }
    648 
    649    /*
    650     * Unfortunately the output of cycle counter is not very reliable as it comes
    651     * -- sometimes we get outliers (due IRQs perhaps?) which are
    652     * better removed to avoid random or biased data.
    653     */
    654    {
    655       double sum = 0.0, sum2 = 0.0;
    656       double avg, std;
    657       unsigned m;
    658 
    659       for(i = 0; i < n; ++i) {
    660          sum += cycles[i];
    661          sum2 += cycles[i]*cycles[i];
    662       }
    663 
    664       avg = sum/n;
    665       std = sqrtf((sum2 - n*avg*avg)/n);
    666 
    667       m = 0;
    668       sum = 0.0;
    669       for(i = 0; i < n; ++i) {
    670          if(fabs(cycles[i] - avg) <= 4.0*std) {
    671             sum += cycles[i];
    672             ++m;
    673          }
    674       }
    675 
    676       cycles_avg = sum/m;
    677 
    678    }
    679 
    680    if(fp)
    681       write_tsv_row(fp, blend, mode, type, cycles_avg, success);
    682 
    683    gallivm_free_function(gallivm, func, blend_test_ptr);
    684 
    685    gallivm_destroy(gallivm);
    686 
    687    return success;
    688 }
    689 
    690 
    691 const unsigned
    692 blend_factors[] = {
    693    PIPE_BLENDFACTOR_ZERO,
    694    PIPE_BLENDFACTOR_ONE,
    695    PIPE_BLENDFACTOR_SRC_COLOR,
    696    PIPE_BLENDFACTOR_SRC_ALPHA,
    697    PIPE_BLENDFACTOR_DST_COLOR,
    698    PIPE_BLENDFACTOR_DST_ALPHA,
    699    PIPE_BLENDFACTOR_CONST_COLOR,
    700    PIPE_BLENDFACTOR_CONST_ALPHA,
    701 #if 0
    702    PIPE_BLENDFACTOR_SRC1_COLOR,
    703    PIPE_BLENDFACTOR_SRC1_ALPHA,
    704 #endif
    705    PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
    706    PIPE_BLENDFACTOR_INV_SRC_COLOR,
    707    PIPE_BLENDFACTOR_INV_SRC_ALPHA,
    708    PIPE_BLENDFACTOR_INV_DST_COLOR,
    709    PIPE_BLENDFACTOR_INV_DST_ALPHA,
    710    PIPE_BLENDFACTOR_INV_CONST_COLOR,
    711    PIPE_BLENDFACTOR_INV_CONST_ALPHA,
    712 #if 0
    713    PIPE_BLENDFACTOR_INV_SRC1_COLOR,
    714    PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
    715 #endif
    716 };
    717 
    718 
    719 const unsigned
    720 blend_funcs[] = {
    721    PIPE_BLEND_ADD,
    722    PIPE_BLEND_SUBTRACT,
    723    PIPE_BLEND_REVERSE_SUBTRACT,
    724    PIPE_BLEND_MIN,
    725    PIPE_BLEND_MAX
    726 };
    727 
    728 
    729 const struct lp_type blend_types[] = {
    730    /* float, fixed,  sign,  norm, width, len */
    731    {   TRUE, FALSE,  TRUE, FALSE,    32,   4 }, /* f32 x 4 */
    732    {  FALSE, FALSE, FALSE,  TRUE,     8,  16 }, /* u8n x 16 */
    733 };
    734 
    735 
    736 const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
    737 const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
    738 const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
    739 
    740 
    741 boolean
    742 test_all(unsigned verbose, FILE *fp)
    743 {
    744    const unsigned *rgb_func;
    745    const unsigned *rgb_src_factor;
    746    const unsigned *rgb_dst_factor;
    747    const unsigned *alpha_func;
    748    const unsigned *alpha_src_factor;
    749    const unsigned *alpha_dst_factor;
    750    struct pipe_blend_state blend;
    751    enum vector_mode mode;
    752    const struct lp_type *type;
    753    boolean success = TRUE;
    754 
    755    for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
    756       for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
    757          for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
    758             for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
    759                for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
    760                   for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
    761                      for(mode = 0; mode < 2; ++mode) {
    762                         for(type = blend_types; type < &blend_types[num_types]; ++type) {
    763 
    764                            if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
    765                               *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
    766                               continue;
    767 
    768                            memset(&blend, 0, sizeof blend);
    769                            blend.rt[0].blend_enable      = 1;
    770                            blend.rt[0].rgb_func          = *rgb_func;
    771                            blend.rt[0].rgb_src_factor    = *rgb_src_factor;
    772                            blend.rt[0].rgb_dst_factor    = *rgb_dst_factor;
    773                            blend.rt[0].alpha_func        = *alpha_func;
    774                            blend.rt[0].alpha_src_factor  = *alpha_src_factor;
    775                            blend.rt[0].alpha_dst_factor  = *alpha_dst_factor;
    776                            blend.rt[0].colormask         = PIPE_MASK_RGBA;
    777 
    778                            if(!test_one(verbose, fp, &blend, mode, *type))
    779                              success = FALSE;
    780 
    781                         }
    782                      }
    783                   }
    784                }
    785             }
    786          }
    787       }
    788    }
    789 
    790    return success;
    791 }
    792 
    793 
    794 boolean
    795 test_some(unsigned verbose, FILE *fp,
    796           unsigned long n)
    797 {
    798    const unsigned *rgb_func;
    799    const unsigned *rgb_src_factor;
    800    const unsigned *rgb_dst_factor;
    801    const unsigned *alpha_func;
    802    const unsigned *alpha_src_factor;
    803    const unsigned *alpha_dst_factor;
    804    struct pipe_blend_state blend;
    805    enum vector_mode mode;
    806    const struct lp_type *type;
    807    unsigned long i;
    808    boolean success = TRUE;
    809 
    810    for(i = 0; i < n; ++i) {
    811       rgb_func = &blend_funcs[rand() % num_funcs];
    812       alpha_func = &blend_funcs[rand() % num_funcs];
    813       rgb_src_factor = &blend_factors[rand() % num_factors];
    814       alpha_src_factor = &blend_factors[rand() % num_factors];
    815 
    816       do {
    817          rgb_dst_factor = &blend_factors[rand() % num_factors];
    818       } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
    819 
    820       do {
    821          alpha_dst_factor = &blend_factors[rand() % num_factors];
    822       } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
    823 
    824       mode = rand() & 1;
    825 
    826       type = &blend_types[rand() % num_types];
    827 
    828       memset(&blend, 0, sizeof blend);
    829       blend.rt[0].blend_enable      = 1;
    830       blend.rt[0].rgb_func          = *rgb_func;
    831       blend.rt[0].rgb_src_factor    = *rgb_src_factor;
    832       blend.rt[0].rgb_dst_factor    = *rgb_dst_factor;
    833       blend.rt[0].alpha_func        = *alpha_func;
    834       blend.rt[0].alpha_src_factor  = *alpha_src_factor;
    835       blend.rt[0].alpha_dst_factor  = *alpha_dst_factor;
    836       blend.rt[0].colormask         = PIPE_MASK_RGBA;
    837 
    838       if(!test_one(verbose, fp, &blend, mode, *type))
    839         success = FALSE;
    840    }
    841 
    842    return success;
    843 }
    844 
    845 
    846 boolean
    847 test_single(unsigned verbose, FILE *fp)
    848 {
    849    printf("no test_single()");
    850    return TRUE;
    851 }
    852