Home | History | Annotate | Download | only in softpipe
      1 /**************************************************************************
      2  *
      3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
      4  * All Rights Reserved.
      5  * Copyright 2008-2010 VMware, Inc.  All rights reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sub license, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * The above copyright notice and this permission notice (including the
     16  * next paragraph) shall be included in all copies or substantial portions
     17  * of the Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
     23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     26  *
     27  **************************************************************************/
     28 
     29 /**
     30  * Texture sampling
     31  *
     32  * Authors:
     33  *   Brian Paul
     34  *   Keith Whitwell
     35  */
     36 
     37 #include "pipe/p_context.h"
     38 #include "pipe/p_defines.h"
     39 #include "pipe/p_shader_tokens.h"
     40 #include "util/u_math.h"
     41 #include "util/u_memory.h"
     42 #include "sp_quad.h"   /* only for #define QUAD_* tokens */
     43 #include "sp_tex_sample.h"
     44 #include "sp_tex_tile_cache.h"
     45 
     46 
     47 /** Set to one to help debug texture sampling */
     48 #define DEBUG_TEX 0
     49 
     50 
     51 /*
     52  * Return fractional part of 'f'.  Used for computing interpolation weights.
     53  * Need to be careful with negative values.
     54  * Note, if this function isn't perfect you'll sometimes see 1-pixel bands
     55  * of improperly weighted linear-filtered textures.
     56  * The tests/texwrap.c demo is a good test.
     57  */
     58 static INLINE float
     59 frac(float f)
     60 {
     61    return f - floorf(f);
     62 }
     63 
     64 
     65 
     66 /**
     67  * Linear interpolation macro
     68  */
     69 static INLINE float
     70 lerp(float a, float v0, float v1)
     71 {
     72    return v0 + a * (v1 - v0);
     73 }
     74 
     75 
     76 /**
     77  * Do 2D/bilinear interpolation of float values.
     78  * v00, v10, v01 and v11 are typically four texture samples in a square/box.
     79  * a and b are the horizontal and vertical interpolants.
     80  * It's important that this function is inlined when compiled with
     81  * optimization!  If we find that's not true on some systems, convert
     82  * to a macro.
     83  */
     84 static INLINE float
     85 lerp_2d(float a, float b,
     86         float v00, float v10, float v01, float v11)
     87 {
     88    const float temp0 = lerp(a, v00, v10);
     89    const float temp1 = lerp(a, v01, v11);
     90    return lerp(b, temp0, temp1);
     91 }
     92 
     93 
     94 /**
     95  * As above, but 3D interpolation of 8 values.
     96  */
     97 static INLINE float
     98 lerp_3d(float a, float b, float c,
     99         float v000, float v100, float v010, float v110,
    100         float v001, float v101, float v011, float v111)
    101 {
    102    const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
    103    const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
    104    return lerp(c, temp0, temp1);
    105 }
    106 
    107 
    108 
    109 /**
    110  * Compute coord % size for repeat wrap modes.
    111  * Note that if coord is negative, coord % size doesn't give the right
    112  * value.  To avoid that problem we add a large multiple of the size
    113  * (rather than using a conditional).
    114  */
    115 static INLINE int
    116 repeat(int coord, unsigned size)
    117 {
    118    return (coord + size * 1024) % size;
    119 }
    120 
    121 
    122 /**
    123  * Apply texture coord wrapping mode and return integer texture indexes
    124  * for a vector of four texcoords (S or T or P).
    125  * \param wrapMode  PIPE_TEX_WRAP_x
    126  * \param s  the incoming texcoords
    127  * \param size  the texture image size
    128  * \param icoord  returns the integer texcoords
    129  * \return  integer texture index
    130  */
    131 static void
    132 wrap_nearest_repeat(float s, unsigned size, int *icoord)
    133 {
    134    /* s limited to [0,1) */
    135    /* i limited to [0,size-1] */
    136    int i = util_ifloor(s * size);
    137    *icoord = repeat(i, size);
    138 }
    139 
    140 
    141 static void
    142 wrap_nearest_clamp(float s, unsigned size, int *icoord)
    143 {
    144    /* s limited to [0,1] */
    145    /* i limited to [0,size-1] */
    146    if (s <= 0.0F)
    147       *icoord = 0;
    148    else if (s >= 1.0F)
    149       *icoord = size - 1;
    150    else
    151       *icoord = util_ifloor(s * size);
    152 }
    153 
    154 
    155 static void
    156 wrap_nearest_clamp_to_edge(float s, unsigned size, int *icoord)
    157 {
    158    /* s limited to [min,max] */
    159    /* i limited to [0, size-1] */
    160    const float min = 1.0F / (2.0F * size);
    161    const float max = 1.0F - min;
    162    if (s < min)
    163       *icoord = 0;
    164    else if (s > max)
    165       *icoord = size - 1;
    166    else
    167       *icoord = util_ifloor(s * size);
    168 }
    169 
    170 
    171 static void
    172 wrap_nearest_clamp_to_border(float s, unsigned size, int *icoord)
    173 {
    174    /* s limited to [min,max] */
    175    /* i limited to [-1, size] */
    176    const float min = -1.0F / (2.0F * size);
    177    const float max = 1.0F - min;
    178    if (s <= min)
    179       *icoord = -1;
    180    else if (s >= max)
    181       *icoord = size;
    182    else
    183       *icoord = util_ifloor(s * size);
    184 }
    185 
    186 
    187 static void
    188 wrap_nearest_mirror_repeat(float s, unsigned size, int *icoord)
    189 {
    190    const float min = 1.0F / (2.0F * size);
    191    const float max = 1.0F - min;
    192    const int flr = util_ifloor(s);
    193    float u = frac(s);
    194    if (flr & 1)
    195       u = 1.0F - u;
    196    if (u < min)
    197       *icoord = 0;
    198    else if (u > max)
    199       *icoord = size - 1;
    200    else
    201       *icoord = util_ifloor(u * size);
    202 }
    203 
    204 
    205 static void
    206 wrap_nearest_mirror_clamp(float s, unsigned size, int *icoord)
    207 {
    208    /* s limited to [0,1] */
    209    /* i limited to [0,size-1] */
    210    const float u = fabsf(s);
    211    if (u <= 0.0F)
    212       *icoord = 0;
    213    else if (u >= 1.0F)
    214       *icoord = size - 1;
    215    else
    216       *icoord = util_ifloor(u * size);
    217 }
    218 
    219 
    220 static void
    221 wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int *icoord)
    222 {
    223    /* s limited to [min,max] */
    224    /* i limited to [0, size-1] */
    225    const float min = 1.0F / (2.0F * size);
    226    const float max = 1.0F - min;
    227    const float u = fabsf(s);
    228    if (u < min)
    229       *icoord = 0;
    230    else if (u > max)
    231       *icoord = size - 1;
    232    else
    233       *icoord = util_ifloor(u * size);
    234 }
    235 
    236 
    237 static void
    238 wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int *icoord)
    239 {
    240    /* s limited to [min,max] */
    241    /* i limited to [0, size-1] */
    242    const float min = -1.0F / (2.0F * size);
    243    const float max = 1.0F - min;
    244    const float u = fabsf(s);
    245    if (u < min)
    246       *icoord = -1;
    247    else if (u > max)
    248       *icoord = size;
    249    else
    250       *icoord = util_ifloor(u * size);
    251 }
    252 
    253 
    254 /**
    255  * Used to compute texel locations for linear sampling
    256  * \param wrapMode  PIPE_TEX_WRAP_x
    257  * \param s  the texcoord
    258  * \param size  the texture image size
    259  * \param icoord0  returns first texture index
    260  * \param icoord1  returns second texture index (usually icoord0 + 1)
    261  * \param w  returns blend factor/weight between texture indices
    262  * \param icoord  returns the computed integer texture coord
    263  */
    264 static void
    265 wrap_linear_repeat(float s, unsigned size,
    266                    int *icoord0, int *icoord1, float *w)
    267 {
    268    float u = s * size - 0.5F;
    269    *icoord0 = repeat(util_ifloor(u), size);
    270    *icoord1 = repeat(*icoord0 + 1, size);
    271    *w = frac(u);
    272 }
    273 
    274 
    275 static void
    276 wrap_linear_clamp(float s, unsigned size,
    277                   int *icoord0, int *icoord1, float *w)
    278 {
    279    float u = CLAMP(s, 0.0F, 1.0F);
    280    u = u * size - 0.5f;
    281    *icoord0 = util_ifloor(u);
    282    *icoord1 = *icoord0 + 1;
    283    *w = frac(u);
    284 }
    285 
    286 
    287 static void
    288 wrap_linear_clamp_to_edge(float s, unsigned size,
    289                           int *icoord0, int *icoord1, float *w)
    290 {
    291    float u = CLAMP(s, 0.0F, 1.0F);
    292    u = u * size - 0.5f;
    293    *icoord0 = util_ifloor(u);
    294    *icoord1 = *icoord0 + 1;
    295    if (*icoord0 < 0)
    296       *icoord0 = 0;
    297    if (*icoord1 >= (int) size)
    298       *icoord1 = size - 1;
    299    *w = frac(u);
    300 }
    301 
    302 
    303 static void
    304 wrap_linear_clamp_to_border(float s, unsigned size,
    305                             int *icoord0, int *icoord1, float *w)
    306 {
    307    const float min = -1.0F / (2.0F * size);
    308    const float max = 1.0F - min;
    309    float u = CLAMP(s, min, max);
    310    u = u * size - 0.5f;
    311    *icoord0 = util_ifloor(u);
    312    *icoord1 = *icoord0 + 1;
    313    *w = frac(u);
    314 }
    315 
    316 
    317 static void
    318 wrap_linear_mirror_repeat(float s, unsigned size,
    319                           int *icoord0, int *icoord1, float *w)
    320 {
    321    const int flr = util_ifloor(s);
    322    float u = frac(s);
    323    if (flr & 1)
    324       u = 1.0F - u;
    325    u = u * size - 0.5F;
    326    *icoord0 = util_ifloor(u);
    327    *icoord1 = *icoord0 + 1;
    328    if (*icoord0 < 0)
    329       *icoord0 = 0;
    330    if (*icoord1 >= (int) size)
    331       *icoord1 = size - 1;
    332    *w = frac(u);
    333 }
    334 
    335 
    336 static void
    337 wrap_linear_mirror_clamp(float s, unsigned size,
    338                          int *icoord0, int *icoord1, float *w)
    339 {
    340    float u = fabsf(s);
    341    if (u >= 1.0F)
    342       u = (float) size;
    343    else
    344       u *= size;
    345    u -= 0.5F;
    346    *icoord0 = util_ifloor(u);
    347    *icoord1 = *icoord0 + 1;
    348    *w = frac(u);
    349 }
    350 
    351 
    352 static void
    353 wrap_linear_mirror_clamp_to_edge(float s, unsigned size,
    354                                  int *icoord0, int *icoord1, float *w)
    355 {
    356    float u = fabsf(s);
    357    if (u >= 1.0F)
    358       u = (float) size;
    359    else
    360       u *= size;
    361    u -= 0.5F;
    362    *icoord0 = util_ifloor(u);
    363    *icoord1 = *icoord0 + 1;
    364    if (*icoord0 < 0)
    365       *icoord0 = 0;
    366    if (*icoord1 >= (int) size)
    367       *icoord1 = size - 1;
    368    *w = frac(u);
    369 }
    370 
    371 
    372 static void
    373 wrap_linear_mirror_clamp_to_border(float s, unsigned size,
    374                                    int *icoord0, int *icoord1, float *w)
    375 {
    376    const float min = -1.0F / (2.0F * size);
    377    const float max = 1.0F - min;
    378    float u = fabsf(s);
    379    if (u <= min)
    380       u = min * size;
    381    else if (u >= max)
    382       u = max * size;
    383    else
    384       u *= size;
    385    u -= 0.5F;
    386    *icoord0 = util_ifloor(u);
    387    *icoord1 = *icoord0 + 1;
    388    *w = frac(u);
    389 }
    390 
    391 
    392 /**
    393  * PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords.
    394  */
    395 static void
    396 wrap_nearest_unorm_clamp(float s, unsigned size, int *icoord)
    397 {
    398    int i = util_ifloor(s);
    399    *icoord = CLAMP(i, 0, (int) size-1);
    400 }
    401 
    402 
    403 /**
    404  * PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized coords.
    405  */
    406 static void
    407 wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int *icoord)
    408 {
    409    *icoord = util_ifloor( CLAMP(s, -0.5F, (float) size + 0.5F) );
    410 }
    411 
    412 
    413 /**
    414  * PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized coords.
    415  */
    416 static void
    417 wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int *icoord)
    418 {
    419    *icoord = util_ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) );
    420 }
    421 
    422 
    423 /**
    424  * PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords.
    425  */
    426 static void
    427 wrap_linear_unorm_clamp(float s, unsigned size,
    428                         int *icoord0, int *icoord1, float *w)
    429 {
    430    /* Not exactly what the spec says, but it matches NVIDIA output */
    431    float u = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f);
    432    *icoord0 = util_ifloor(u);
    433    *icoord1 = *icoord0 + 1;
    434    *w = frac(u);
    435 }
    436 
    437 
    438 /**
    439  * PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized coords.
    440  */
    441 static void
    442 wrap_linear_unorm_clamp_to_border(float s, unsigned size,
    443                                   int *icoord0, int *icoord1, float *w)
    444 {
    445    float u = CLAMP(s, -0.5F, (float) size + 0.5F);
    446    u -= 0.5F;
    447    *icoord0 = util_ifloor(u);
    448    *icoord1 = *icoord0 + 1;
    449    if (*icoord1 > (int) size - 1)
    450       *icoord1 = size - 1;
    451    *w = frac(u);
    452 }
    453 
    454 
    455 /**
    456  * PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized coords.
    457  */
    458 static void
    459 wrap_linear_unorm_clamp_to_edge(float s, unsigned size,
    460                                 int *icoord0, int *icoord1, float *w)
    461 {
    462    float u = CLAMP(s, +0.5F, (float) size - 0.5F);
    463    u -= 0.5F;
    464    *icoord0 = util_ifloor(u);
    465    *icoord1 = *icoord0 + 1;
    466    if (*icoord1 > (int) size - 1)
    467       *icoord1 = size - 1;
    468    *w = frac(u);
    469 }
    470 
    471 
    472 /**
    473  * Do coordinate to array index conversion.  For array textures.
    474  */
    475 static INLINE void
    476 wrap_array_layer(float coord, unsigned size, int *layer)
    477 {
    478    int c = util_ifloor(coord + 0.5F);
    479    *layer = CLAMP(c, 0, size - 1);
    480 }
    481 
    482 
    483 /**
    484  * Examine the quad's texture coordinates to compute the partial
    485  * derivatives w.r.t X and Y, then compute lambda (level of detail).
    486  */
    487 static float
    488 compute_lambda_1d(const struct sp_sampler_variant *samp,
    489                   const float s[TGSI_QUAD_SIZE],
    490                   const float t[TGSI_QUAD_SIZE],
    491                   const float p[TGSI_QUAD_SIZE])
    492 {
    493    const struct pipe_resource *texture = samp->view->texture;
    494    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
    495    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
    496    float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level);
    497 
    498    return util_fast_log2(rho);
    499 }
    500 
    501 
    502 static float
    503 compute_lambda_2d(const struct sp_sampler_variant *samp,
    504                   const float s[TGSI_QUAD_SIZE],
    505                   const float t[TGSI_QUAD_SIZE],
    506                   const float p[TGSI_QUAD_SIZE])
    507 {
    508    const struct pipe_resource *texture = samp->view->texture;
    509    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
    510    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
    511    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
    512    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
    513    float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level);
    514    float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, samp->view->u.tex.first_level);
    515    float rho  = MAX2(maxx, maxy);
    516 
    517    return util_fast_log2(rho);
    518 }
    519 
    520 
    521 static float
    522 compute_lambda_3d(const struct sp_sampler_variant *samp,
    523                   const float s[TGSI_QUAD_SIZE],
    524                   const float t[TGSI_QUAD_SIZE],
    525                   const float p[TGSI_QUAD_SIZE])
    526 {
    527    const struct pipe_resource *texture = samp->view->texture;
    528    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
    529    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
    530    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
    531    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
    532    float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
    533    float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
    534    float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level);
    535    float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, samp->view->u.tex.first_level);
    536    float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, samp->view->u.tex.first_level);
    537    float rho;
    538 
    539    rho = MAX2(maxx, maxy);
    540    rho = MAX2(rho, maxz);
    541 
    542    return util_fast_log2(rho);
    543 }
    544 
    545 
    546 /**
    547  * Compute lambda for a vertex texture sampler.
    548  * Since there aren't derivatives to use, just return 0.
    549  */
    550 static float
    551 compute_lambda_vert(const struct sp_sampler_variant *samp,
    552                     const float s[TGSI_QUAD_SIZE],
    553                     const float t[TGSI_QUAD_SIZE],
    554                     const float p[TGSI_QUAD_SIZE])
    555 {
    556    return 0.0f;
    557 }
    558 
    559 
    560 
    561 /**
    562  * Get a texel from a texture, using the texture tile cache.
    563  *
    564  * \param addr  the template tex address containing cube, z, face info.
    565  * \param x  the x coord of texel within 2D image
    566  * \param y  the y coord of texel within 2D image
    567  * \param rgba  the quad to put the texel/color into
    568  *
    569  * XXX maybe move this into sp_tex_tile_cache.c and merge with the
    570  * sp_get_cached_tile_tex() function.
    571  */
    572 
    573 
    574 
    575 
    576 static INLINE const float *
    577 get_texel_2d_no_border(const struct sp_sampler_variant *samp,
    578 		       union tex_tile_address addr, int x, int y)
    579 {
    580    const struct softpipe_tex_cached_tile *tile;
    581 
    582    addr.bits.x = x / TILE_SIZE;
    583    addr.bits.y = y / TILE_SIZE;
    584    y %= TILE_SIZE;
    585    x %= TILE_SIZE;
    586 
    587    tile = sp_get_cached_tile_tex(samp->cache, addr);
    588 
    589    return &tile->data.color[y][x][0];
    590 }
    591 
    592 
    593 static INLINE const float *
    594 get_texel_2d(const struct sp_sampler_variant *samp,
    595 	     union tex_tile_address addr, int x, int y)
    596 {
    597    const struct pipe_resource *texture = samp->view->texture;
    598    unsigned level = addr.bits.level;
    599 
    600    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
    601        y < 0 || y >= (int) u_minify(texture->height0, level)) {
    602       return samp->sampler->border_color.f;
    603    }
    604    else {
    605       return get_texel_2d_no_border( samp, addr, x, y );
    606    }
    607 }
    608 
    609 
    610 /* Gather a quad of adjacent texels within a tile:
    611  */
    612 static INLINE void
    613 get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_variant *samp,
    614 					union tex_tile_address addr,
    615 					unsigned x, unsigned y,
    616 					const float *out[4])
    617 {
    618    const struct softpipe_tex_cached_tile *tile;
    619 
    620    addr.bits.x = x / TILE_SIZE;
    621    addr.bits.y = y / TILE_SIZE;
    622    y %= TILE_SIZE;
    623    x %= TILE_SIZE;
    624 
    625    tile = sp_get_cached_tile_tex(samp->cache, addr);
    626 
    627    out[0] = &tile->data.color[y  ][x  ][0];
    628    out[1] = &tile->data.color[y  ][x+1][0];
    629    out[2] = &tile->data.color[y+1][x  ][0];
    630    out[3] = &tile->data.color[y+1][x+1][0];
    631 }
    632 
    633 
    634 /* Gather a quad of potentially non-adjacent texels:
    635  */
    636 static INLINE void
    637 get_texel_quad_2d_no_border(const struct sp_sampler_variant *samp,
    638 			    union tex_tile_address addr,
    639 			    int x0, int y0,
    640 			    int x1, int y1,
    641 			    const float *out[4])
    642 {
    643    out[0] = get_texel_2d_no_border( samp, addr, x0, y0 );
    644    out[1] = get_texel_2d_no_border( samp, addr, x1, y0 );
    645    out[2] = get_texel_2d_no_border( samp, addr, x0, y1 );
    646    out[3] = get_texel_2d_no_border( samp, addr, x1, y1 );
    647 }
    648 
    649 /* Can involve a lot of unnecessary checks for border color:
    650  */
    651 static INLINE void
    652 get_texel_quad_2d(const struct sp_sampler_variant *samp,
    653 		  union tex_tile_address addr,
    654 		  int x0, int y0,
    655 		  int x1, int y1,
    656 		  const float *out[4])
    657 {
    658    out[0] = get_texel_2d( samp, addr, x0, y0 );
    659    out[1] = get_texel_2d( samp, addr, x1, y0 );
    660    out[3] = get_texel_2d( samp, addr, x1, y1 );
    661    out[2] = get_texel_2d( samp, addr, x0, y1 );
    662 }
    663 
    664 
    665 
    666 /* 3d variants:
    667  */
    668 static INLINE const float *
    669 get_texel_3d_no_border(const struct sp_sampler_variant *samp,
    670                        union tex_tile_address addr, int x, int y, int z)
    671 {
    672    const struct softpipe_tex_cached_tile *tile;
    673 
    674    addr.bits.x = x / TILE_SIZE;
    675    addr.bits.y = y / TILE_SIZE;
    676    addr.bits.z = z;
    677    y %= TILE_SIZE;
    678    x %= TILE_SIZE;
    679 
    680    tile = sp_get_cached_tile_tex(samp->cache, addr);
    681 
    682    return &tile->data.color[y][x][0];
    683 }
    684 
    685 
    686 static INLINE const float *
    687 get_texel_3d(const struct sp_sampler_variant *samp,
    688 	     union tex_tile_address addr, int x, int y, int z)
    689 {
    690    const struct pipe_resource *texture = samp->view->texture;
    691    unsigned level = addr.bits.level;
    692 
    693    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
    694        y < 0 || y >= (int) u_minify(texture->height0, level) ||
    695        z < 0 || z >= (int) u_minify(texture->depth0, level)) {
    696       return samp->sampler->border_color.f;
    697    }
    698    else {
    699       return get_texel_3d_no_border( samp, addr, x, y, z );
    700    }
    701 }
    702 
    703 
    704 /* Get texel pointer for 1D array texture */
    705 static INLINE const float *
    706 get_texel_1d_array(const struct sp_sampler_variant *samp,
    707                    union tex_tile_address addr, int x, int y)
    708 {
    709    const struct pipe_resource *texture = samp->view->texture;
    710    unsigned level = addr.bits.level;
    711 
    712    if (x < 0 || x >= (int) u_minify(texture->width0, level)) {
    713       return samp->sampler->border_color.f;
    714    }
    715    else {
    716       return get_texel_2d_no_border(samp, addr, x, y);
    717    }
    718 }
    719 
    720 
    721 /* Get texel pointer for 2D array texture */
    722 static INLINE const float *
    723 get_texel_2d_array(const struct sp_sampler_variant *samp,
    724                    union tex_tile_address addr, int x, int y, int layer)
    725 {
    726    const struct pipe_resource *texture = samp->view->texture;
    727    unsigned level = addr.bits.level;
    728 
    729    assert(layer < (int) texture->array_size);
    730    assert(layer >= 0);
    731 
    732    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
    733        y < 0 || y >= (int) u_minify(texture->height0, level)) {
    734       return samp->sampler->border_color.f;
    735    }
    736    else {
    737       return get_texel_3d_no_border(samp, addr, x, y, layer);
    738    }
    739 }
    740 
    741 
    742 /**
    743  * Given the logbase2 of a mipmap's base level size and a mipmap level,
    744  * return the size (in texels) of that mipmap level.
    745  * For example, if level[0].width = 256 then base_pot will be 8.
    746  * If level = 2, then we'll return 64 (the width at level=2).
    747  * Return 1 if level > base_pot.
    748  */
    749 static INLINE unsigned
    750 pot_level_size(unsigned base_pot, unsigned level)
    751 {
    752    return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
    753 }
    754 
    755 
    756 static void
    757 print_sample(const char *function, const float *rgba)
    758 {
    759    debug_printf("%s %g %g %g %g\n",
    760                 function,
    761                 rgba[0], rgba[TGSI_NUM_CHANNELS], rgba[2*TGSI_NUM_CHANNELS], rgba[3*TGSI_NUM_CHANNELS]);
    762 }
    763 
    764 
    765 static void
    766 print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
    767 {
    768    debug_printf("%s %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
    769                 function,
    770                 rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
    771                 rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
    772                 rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
    773                 rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
    774 }
    775 
    776 /* Some image-filter fastpaths:
    777  */
    778 static INLINE void
    779 img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
    780                                 float s,
    781                                 float t,
    782                                 float p,
    783 				unsigned level,
    784                                 unsigned face_id,
    785                                 enum tgsi_sampler_control control,
    786                                 float *rgba)
    787 {
    788    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
    789    unsigned xpot = pot_level_size(samp->xpot, level);
    790    unsigned ypot = pot_level_size(samp->ypot, level);
    791    unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */
    792    unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */
    793    union tex_tile_address addr;
    794    int c;
    795 
    796 
    797 
    798    float u = s * xpot - 0.5F;
    799    float v = t * ypot - 0.5F;
    800 
    801    int uflr = util_ifloor(u);
    802    int vflr = util_ifloor(v);
    803 
    804    float xw = u - (float)uflr;
    805    float yw = v - (float)vflr;
    806 
    807    int x0 = uflr & (xpot - 1);
    808    int y0 = vflr & (ypot - 1);
    809 
    810    const float *tx[4];
    811 
    812    addr.value = 0;
    813    addr.bits.level = level;
    814 
    815    /* Can we fetch all four at once:
    816     */
    817    if (x0 < xmax && y0 < ymax) {
    818       get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0, tx);
    819    }
    820    else {
    821       unsigned x1 = (x0 + 1) & (xpot - 1);
    822       unsigned y1 = (y0 + 1) & (ypot - 1);
    823       get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1, tx);
    824    }
    825 
    826    /* interpolate R, G, B, A */
    827    for (c = 0; c < TGSI_QUAD_SIZE; c++) {
    828       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
    829                                        tx[0][c], tx[1][c],
    830                                        tx[2][c], tx[3][c]);
    831    }
    832 
    833    if (DEBUG_TEX) {
    834       print_sample(__FUNCTION__, rgba);
    835    }
    836 }
    837 
    838 
    839 static INLINE void
    840 img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
    841                                  float s,
    842                                  float t,
    843                                  float p,
    844                                  unsigned level,
    845                                  unsigned face_id,
    846                                  enum tgsi_sampler_control control,
    847                                  float rgba[TGSI_QUAD_SIZE])
    848 {
    849    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
    850    unsigned xpot = pot_level_size(samp->xpot, level);
    851    unsigned ypot = pot_level_size(samp->ypot, level);
    852    const float *out;
    853    union tex_tile_address addr;
    854    int c;
    855 
    856    float u = s * xpot;
    857    float v = t * ypot;
    858 
    859    int uflr = util_ifloor(u);
    860    int vflr = util_ifloor(v);
    861 
    862    int x0 = uflr & (xpot - 1);
    863    int y0 = vflr & (ypot - 1);
    864 
    865    addr.value = 0;
    866    addr.bits.level = level;
    867 
    868    out = get_texel_2d_no_border(samp, addr, x0, y0);
    869    for (c = 0; c < TGSI_QUAD_SIZE; c++)
    870       rgba[TGSI_NUM_CHANNELS*c] = out[c];
    871 
    872    if (DEBUG_TEX) {
    873       print_sample(__FUNCTION__, rgba);
    874    }
    875 }
    876 
    877 
    878 static INLINE void
    879 img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
    880                                 float s,
    881                                 float t,
    882                                 float p,
    883                                 unsigned level,
    884                                 unsigned face_id,
    885                                 enum tgsi_sampler_control control,
    886                                 float rgba[TGSI_QUAD_SIZE])
    887 {
    888    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
    889    unsigned xpot = pot_level_size(samp->xpot, level);
    890    unsigned ypot = pot_level_size(samp->ypot, level);
    891    union tex_tile_address addr;
    892    int c;
    893 
    894    float u = s * xpot;
    895    float v = t * ypot;
    896 
    897    int x0, y0;
    898    const float *out;
    899 
    900    addr.value = 0;
    901    addr.bits.level = level;
    902 
    903    x0 = util_ifloor(u);
    904    if (x0 < 0)
    905       x0 = 0;
    906    else if (x0 > xpot - 1)
    907       x0 = xpot - 1;
    908 
    909    y0 = util_ifloor(v);
    910    if (y0 < 0)
    911       y0 = 0;
    912    else if (y0 > ypot - 1)
    913       y0 = ypot - 1;
    914 
    915    out = get_texel_2d_no_border(samp, addr, x0, y0);
    916    for (c = 0; c < TGSI_QUAD_SIZE; c++)
    917       rgba[TGSI_NUM_CHANNELS*c] = out[c];
    918 
    919    if (DEBUG_TEX) {
    920       print_sample(__FUNCTION__, rgba);
    921    }
    922 }
    923 
    924 
    925 static void
    926 img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
    927                       float s,
    928                       float t,
    929                       float p,
    930                       unsigned level,
    931                       unsigned face_id,
    932                       enum tgsi_sampler_control control,
    933                       float rgba[TGSI_QUAD_SIZE])
    934 {
    935    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
    936    const struct pipe_resource *texture = samp->view->texture;
    937    int width;
    938    int x;
    939    union tex_tile_address addr;
    940    const float *out;
    941    int c;
    942 
    943    width = u_minify(texture->width0, level);
    944 
    945    assert(width > 0);
    946 
    947    addr.value = 0;
    948    addr.bits.level = level;
    949 
    950    samp->nearest_texcoord_s(s, width, &x);
    951 
    952    out = get_texel_2d(samp, addr, x, 0);
    953    for (c = 0; c < TGSI_QUAD_SIZE; c++)
    954       rgba[TGSI_NUM_CHANNELS*c] = out[c];
    955 
    956    if (DEBUG_TEX) {
    957       print_sample(__FUNCTION__, rgba);
    958    }
    959 }
    960 
    961 
    962 static void
    963 img_filter_1d_array_nearest(struct tgsi_sampler *tgsi_sampler,
    964                             float s,
    965                             float t,
    966                             float p,
    967                             unsigned level,
    968                             unsigned face_id,
    969                             enum tgsi_sampler_control control,
    970                             float *rgba)
    971 {
    972    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
    973    const struct pipe_resource *texture = samp->view->texture;
    974    int width;
    975    int x, layer;
    976    union tex_tile_address addr;
    977    const float *out;
    978    int c;
    979 
    980    width = u_minify(texture->width0, level);
    981 
    982    assert(width > 0);
    983 
    984    addr.value = 0;
    985    addr.bits.level = level;
    986 
    987    samp->nearest_texcoord_s(s, width, &x);
    988    wrap_array_layer(t, texture->array_size, &layer);
    989 
    990    out = get_texel_1d_array(samp, addr, x, layer);
    991    for (c = 0; c < TGSI_QUAD_SIZE; c++)
    992       rgba[TGSI_NUM_CHANNELS*c] = out[c];
    993 
    994    if (DEBUG_TEX) {
    995       print_sample(__FUNCTION__, rgba);
    996    }
    997 }
    998 
    999 
   1000 static void
   1001 img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
   1002                       float s,
   1003                       float t,
   1004                       float p,
   1005                       unsigned level,
   1006                       unsigned face_id,
   1007                       enum tgsi_sampler_control control,
   1008                       float *rgba)
   1009 {
   1010    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1011    const struct pipe_resource *texture = samp->view->texture;
   1012    int width, height;
   1013    int x, y;
   1014    union tex_tile_address addr;
   1015    const float *out;
   1016    int c;
   1017 
   1018    width = u_minify(texture->width0, level);
   1019    height = u_minify(texture->height0, level);
   1020 
   1021    assert(width > 0);
   1022    assert(height > 0);
   1023 
   1024    addr.value = 0;
   1025    addr.bits.level = level;
   1026 
   1027    samp->nearest_texcoord_s(s, width, &x);
   1028    samp->nearest_texcoord_t(t, height, &y);
   1029 
   1030    out = get_texel_2d(samp, addr, x, y);
   1031    for (c = 0; c < TGSI_QUAD_SIZE; c++)
   1032       rgba[TGSI_NUM_CHANNELS*c] = out[c];
   1033 
   1034    if (DEBUG_TEX) {
   1035       print_sample(__FUNCTION__, rgba);
   1036    }
   1037 }
   1038 
   1039 
   1040 static void
   1041 img_filter_2d_array_nearest(struct tgsi_sampler *tgsi_sampler,
   1042                             float s,
   1043                             float t,
   1044                             float p,
   1045                             unsigned level,
   1046                             unsigned face_id,
   1047                             enum tgsi_sampler_control control,
   1048                             float *rgba)
   1049 {
   1050    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1051    const struct pipe_resource *texture = samp->view->texture;
   1052    int width, height;
   1053    int x, y, layer;
   1054    union tex_tile_address addr;
   1055    const float *out;
   1056    int c;
   1057 
   1058    width = u_minify(texture->width0, level);
   1059    height = u_minify(texture->height0, level);
   1060 
   1061    assert(width > 0);
   1062    assert(height > 0);
   1063 
   1064    addr.value = 0;
   1065    addr.bits.level = level;
   1066 
   1067    samp->nearest_texcoord_s(s, width, &x);
   1068    samp->nearest_texcoord_t(t, height, &y);
   1069    wrap_array_layer(p, texture->array_size, &layer);
   1070 
   1071    out = get_texel_2d_array(samp, addr, x, y, layer);
   1072    for (c = 0; c < TGSI_QUAD_SIZE; c++)
   1073       rgba[TGSI_NUM_CHANNELS*c] = out[c];
   1074 
   1075    if (DEBUG_TEX) {
   1076       print_sample(__FUNCTION__, rgba);
   1077    }
   1078 }
   1079 
   1080 
   1081 static INLINE union tex_tile_address
   1082 face(union tex_tile_address addr, unsigned face )
   1083 {
   1084    addr.bits.face = face;
   1085    return addr;
   1086 }
   1087 
   1088 
   1089 static void
   1090 img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
   1091                         float s,
   1092                         float t,
   1093                         float p,
   1094                         unsigned level,
   1095                         unsigned face_id,
   1096                         enum tgsi_sampler_control control,
   1097                         float *rgba)
   1098 {
   1099    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1100    const struct pipe_resource *texture = samp->view->texture;
   1101    int width, height;
   1102    int x, y;
   1103    union tex_tile_address addr;
   1104    const float *out;
   1105    int c;
   1106 
   1107    width = u_minify(texture->width0, level);
   1108    height = u_minify(texture->height0, level);
   1109 
   1110    assert(width > 0);
   1111    assert(height > 0);
   1112 
   1113    addr.value = 0;
   1114    addr.bits.level = level;
   1115 
   1116    samp->nearest_texcoord_s(s, width, &x);
   1117    samp->nearest_texcoord_t(t, height, &y);
   1118 
   1119    out = get_texel_2d(samp, face(addr, face_id), x, y);
   1120    for (c = 0; c < TGSI_QUAD_SIZE; c++)
   1121       rgba[TGSI_NUM_CHANNELS*c] = out[c];
   1122 
   1123    if (DEBUG_TEX) {
   1124       print_sample(__FUNCTION__, rgba);
   1125    }
   1126 }
   1127 
   1128 
   1129 static void
   1130 img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
   1131                       float s,
   1132                       float t,
   1133                       float p,
   1134                       unsigned level,
   1135                       unsigned face_id,
   1136                       enum tgsi_sampler_control control,
   1137                       float *rgba)
   1138 {
   1139    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1140    const struct pipe_resource *texture = samp->view->texture;
   1141    int width, height, depth;
   1142    int x, y, z;
   1143    union tex_tile_address addr;
   1144    const float *out;
   1145    int c;
   1146 
   1147    width = u_minify(texture->width0, level);
   1148    height = u_minify(texture->height0, level);
   1149    depth = u_minify(texture->depth0, level);
   1150 
   1151    assert(width > 0);
   1152    assert(height > 0);
   1153    assert(depth > 0);
   1154 
   1155    samp->nearest_texcoord_s(s, width,  &x);
   1156    samp->nearest_texcoord_t(t, height, &y);
   1157    samp->nearest_texcoord_p(p, depth,  &z);
   1158 
   1159    addr.value = 0;
   1160    addr.bits.level = level;
   1161 
   1162    out = get_texel_3d(samp, addr, x, y, z);
   1163    for (c = 0; c < TGSI_QUAD_SIZE; c++)
   1164       rgba[TGSI_NUM_CHANNELS*c] = out[c];
   1165 }
   1166 
   1167 
   1168 static void
   1169 img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
   1170                      float s,
   1171                      float t,
   1172                      float p,
   1173                      unsigned level,
   1174                      unsigned face_id,
   1175                      enum tgsi_sampler_control control,
   1176                      float *rgba)
   1177 {
   1178    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1179    const struct pipe_resource *texture = samp->view->texture;
   1180    int width;
   1181    int x0, x1;
   1182    float xw; /* weights */
   1183    union tex_tile_address addr;
   1184    const float *tx0, *tx1;
   1185    int c;
   1186 
   1187    width = u_minify(texture->width0, level);
   1188 
   1189    assert(width > 0);
   1190 
   1191    addr.value = 0;
   1192    addr.bits.level = level;
   1193 
   1194    samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
   1195 
   1196    tx0 = get_texel_2d(samp, addr, x0, 0);
   1197    tx1 = get_texel_2d(samp, addr, x1, 0);
   1198 
   1199    /* interpolate R, G, B, A */
   1200    for (c = 0; c < TGSI_QUAD_SIZE; c++)
   1201       rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
   1202 }
   1203 
   1204 
   1205 static void
   1206 img_filter_1d_array_linear(struct tgsi_sampler *tgsi_sampler,
   1207                            float s,
   1208                            float t,
   1209                            float p,
   1210                            unsigned level,
   1211                            unsigned face_id,
   1212                            enum tgsi_sampler_control control,
   1213                            float *rgba)
   1214 {
   1215    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1216    const struct pipe_resource *texture = samp->view->texture;
   1217    int width;
   1218    int x0, x1, layer;
   1219    float xw; /* weights */
   1220    union tex_tile_address addr;
   1221    const float *tx0, *tx1;
   1222    int c;
   1223 
   1224    width = u_minify(texture->width0, level);
   1225 
   1226    assert(width > 0);
   1227 
   1228    addr.value = 0;
   1229    addr.bits.level = level;
   1230 
   1231    samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
   1232    wrap_array_layer(t, texture->array_size, &layer);
   1233 
   1234    tx0 = get_texel_1d_array(samp, addr, x0, layer);
   1235    tx1 = get_texel_1d_array(samp, addr, x1, layer);
   1236 
   1237    /* interpolate R, G, B, A */
   1238    for (c = 0; c < TGSI_QUAD_SIZE; c++)
   1239       rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
   1240 }
   1241 
   1242 
   1243 static void
   1244 img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
   1245                      float s,
   1246                      float t,
   1247                      float p,
   1248                      unsigned level,
   1249                      unsigned face_id,
   1250                      enum tgsi_sampler_control control,
   1251                      float *rgba)
   1252 {
   1253    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1254    const struct pipe_resource *texture = samp->view->texture;
   1255    int width, height;
   1256    int x0, y0, x1, y1;
   1257    float xw, yw; /* weights */
   1258    union tex_tile_address addr;
   1259    const float *tx0, *tx1, *tx2, *tx3;
   1260    int c;
   1261 
   1262    width = u_minify(texture->width0, level);
   1263    height = u_minify(texture->height0, level);
   1264 
   1265    assert(width > 0);
   1266    assert(height > 0);
   1267 
   1268    addr.value = 0;
   1269    addr.bits.level = level;
   1270 
   1271    samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
   1272    samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
   1273 
   1274    tx0 = get_texel_2d(samp, addr, x0, y0);
   1275    tx1 = get_texel_2d(samp, addr, x1, y0);
   1276    tx2 = get_texel_2d(samp, addr, x0, y1);
   1277    tx3 = get_texel_2d(samp, addr, x1, y1);
   1278 
   1279    /* interpolate R, G, B, A */
   1280    for (c = 0; c < TGSI_QUAD_SIZE; c++)
   1281       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
   1282                                           tx0[c], tx1[c],
   1283                                           tx2[c], tx3[c]);
   1284 }
   1285 
   1286 
   1287 static void
   1288 img_filter_2d_array_linear(struct tgsi_sampler *tgsi_sampler,
   1289                            float s,
   1290                            float t,
   1291                            float p,
   1292                            unsigned level,
   1293                            unsigned face_id,
   1294                            enum tgsi_sampler_control control,
   1295                            float *rgba)
   1296 {
   1297    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1298    const struct pipe_resource *texture = samp->view->texture;
   1299    int width, height;
   1300    int x0, y0, x1, y1, layer;
   1301    float xw, yw; /* weights */
   1302    union tex_tile_address addr;
   1303    const float *tx0, *tx1, *tx2, *tx3;
   1304    int c;
   1305 
   1306    width = u_minify(texture->width0, level);
   1307    height = u_minify(texture->height0, level);
   1308 
   1309    assert(width > 0);
   1310    assert(height > 0);
   1311 
   1312    addr.value = 0;
   1313    addr.bits.level = level;
   1314 
   1315    samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
   1316    samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
   1317    wrap_array_layer(p, texture->array_size, &layer);
   1318 
   1319    tx0 = get_texel_2d_array(samp, addr, x0, y0, layer);
   1320    tx1 = get_texel_2d_array(samp, addr, x1, y0, layer);
   1321    tx2 = get_texel_2d_array(samp, addr, x0, y1, layer);
   1322    tx3 = get_texel_2d_array(samp, addr, x1, y1, layer);
   1323 
   1324    /* interpolate R, G, B, A */
   1325    for (c = 0; c < TGSI_QUAD_SIZE; c++)
   1326       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
   1327                                           tx0[c], tx1[c],
   1328                                           tx2[c], tx3[c]);
   1329 }
   1330 
   1331 
   1332 static void
   1333 img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
   1334                        float s,
   1335                        float t,
   1336                        float p,
   1337                        unsigned level,
   1338                        unsigned face_id,
   1339                        enum tgsi_sampler_control control,
   1340                        float *rgba)
   1341 {
   1342    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1343    const struct pipe_resource *texture = samp->view->texture;
   1344    int width, height;
   1345    int x0, y0, x1, y1;
   1346    float xw, yw; /* weights */
   1347    union tex_tile_address addr, addrj;
   1348    const float *tx0, *tx1, *tx2, *tx3;
   1349    int c;
   1350 
   1351    width = u_minify(texture->width0, level);
   1352    height = u_minify(texture->height0, level);
   1353 
   1354    assert(width > 0);
   1355    assert(height > 0);
   1356 
   1357    addr.value = 0;
   1358    addr.bits.level = level;
   1359 
   1360    samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
   1361    samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
   1362 
   1363    addrj = face(addr, face_id);
   1364    tx0 = get_texel_2d(samp, addrj, x0, y0);
   1365    tx1 = get_texel_2d(samp, addrj, x1, y0);
   1366    tx2 = get_texel_2d(samp, addrj, x0, y1);
   1367    tx3 = get_texel_2d(samp, addrj, x1, y1);
   1368 
   1369    /* interpolate R, G, B, A */
   1370    for (c = 0; c < TGSI_QUAD_SIZE; c++)
   1371       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
   1372                                           tx0[c], tx1[c],
   1373                                           tx2[c], tx3[c]);
   1374 }
   1375 
   1376 
   1377 static void
   1378 img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
   1379                      float s,
   1380                      float t,
   1381                      float p,
   1382                      unsigned level,
   1383                      unsigned face_id,
   1384                      enum tgsi_sampler_control control,
   1385                      float *rgba)
   1386 {
   1387    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1388    const struct pipe_resource *texture = samp->view->texture;
   1389    int width, height, depth;
   1390    int x0, x1, y0, y1, z0, z1;
   1391    float xw, yw, zw; /* interpolation weights */
   1392    union tex_tile_address addr;
   1393    const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13;
   1394    int c;
   1395 
   1396    width = u_minify(texture->width0, level);
   1397    height = u_minify(texture->height0, level);
   1398    depth = u_minify(texture->depth0, level);
   1399 
   1400    addr.value = 0;
   1401    addr.bits.level = level;
   1402 
   1403    assert(width > 0);
   1404    assert(height > 0);
   1405    assert(depth > 0);
   1406 
   1407    samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
   1408    samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
   1409    samp->linear_texcoord_p(p, depth,  &z0, &z1, &zw);
   1410 
   1411 
   1412    tx00 = get_texel_3d(samp, addr, x0, y0, z0);
   1413    tx01 = get_texel_3d(samp, addr, x1, y0, z0);
   1414    tx02 = get_texel_3d(samp, addr, x0, y1, z0);
   1415    tx03 = get_texel_3d(samp, addr, x1, y1, z0);
   1416 
   1417    tx10 = get_texel_3d(samp, addr, x0, y0, z1);
   1418    tx11 = get_texel_3d(samp, addr, x1, y0, z1);
   1419    tx12 = get_texel_3d(samp, addr, x0, y1, z1);
   1420    tx13 = get_texel_3d(samp, addr, x1, y1, z1);
   1421 
   1422       /* interpolate R, G, B, A */
   1423    for (c = 0; c < TGSI_QUAD_SIZE; c++)
   1424       rgba[TGSI_NUM_CHANNELS*c] =  lerp_3d(xw, yw, zw,
   1425                                            tx00[c], tx01[c],
   1426                                            tx02[c], tx03[c],
   1427                                            tx10[c], tx11[c],
   1428                                            tx12[c], tx13[c]);
   1429 }
   1430 
   1431 
   1432 /* Calculate level of detail for every fragment.
   1433  * Note that lambda has already been biased by global LOD bias.
   1434  */
   1435 static INLINE void
   1436 compute_lod(const struct pipe_sampler_state *sampler,
   1437             const float biased_lambda,
   1438             const float lodbias[TGSI_QUAD_SIZE],
   1439             float lod[TGSI_QUAD_SIZE])
   1440 {
   1441    uint i;
   1442 
   1443    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
   1444       lod[i] = biased_lambda + lodbias[i];
   1445       lod[i] = CLAMP(lod[i], sampler->min_lod, sampler->max_lod);
   1446    }
   1447 }
   1448 
   1449 
   1450 static void
   1451 mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
   1452                   const float s[TGSI_QUAD_SIZE],
   1453                   const float t[TGSI_QUAD_SIZE],
   1454                   const float p[TGSI_QUAD_SIZE],
   1455                   const float c0[TGSI_QUAD_SIZE],
   1456                   enum tgsi_sampler_control control,
   1457                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
   1458 {
   1459    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1460    const struct pipe_resource *texture = samp->view->texture;
   1461    int j;
   1462    float lod[TGSI_QUAD_SIZE];
   1463 
   1464    if (control == tgsi_sampler_lod_bias) {
   1465       float lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
   1466       compute_lod(samp->sampler, lambda, c0, lod);
   1467    } else {
   1468       assert(control == tgsi_sampler_lod_explicit);
   1469 
   1470       memcpy(lod, c0, sizeof(lod));
   1471    }
   1472 
   1473    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   1474       int level0 = samp->view->u.tex.first_level + (int)lod[j];
   1475 
   1476       if (lod[j] < 0.0)
   1477          samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
   1478 
   1479       else if (level0 >= texture->last_level)
   1480          samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], texture->last_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
   1481 
   1482       else {
   1483          float levelBlend = frac(lod[j]);
   1484          float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
   1485          int c;
   1486 
   1487          samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level0,   samp->faces[j], tgsi_sampler_lod_bias, &rgbax[0][0]);
   1488          samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level0+1, samp->faces[j], tgsi_sampler_lod_bias, &rgbax[0][1]);
   1489 
   1490          for (c = 0; c < 4; c++) {
   1491             rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
   1492          }
   1493       }
   1494    }
   1495 
   1496    if (DEBUG_TEX) {
   1497       print_sample_4(__FUNCTION__, rgba);
   1498    }
   1499 }
   1500 
   1501 
   1502 /**
   1503  * Compute nearest mipmap level from texcoords.
   1504  * Then sample the texture level for four elements of a quad.
   1505  * \param c0  the LOD bias factors, or absolute LODs (depending on control)
   1506  */
   1507 static void
   1508 mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
   1509                    const float s[TGSI_QUAD_SIZE],
   1510                    const float t[TGSI_QUAD_SIZE],
   1511                    const float p[TGSI_QUAD_SIZE],
   1512                    const float c0[TGSI_QUAD_SIZE],
   1513                    enum tgsi_sampler_control control,
   1514                    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
   1515 {
   1516    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1517    const struct pipe_resource *texture = samp->view->texture;
   1518    float lod[TGSI_QUAD_SIZE];
   1519    int j;
   1520 
   1521    if (control == tgsi_sampler_lod_bias) {
   1522       float lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
   1523       compute_lod(samp->sampler, lambda, c0, lod);
   1524    } else {
   1525       assert(control == tgsi_sampler_lod_explicit);
   1526 
   1527       memcpy(lod, c0, sizeof(lod));
   1528    }
   1529 
   1530    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   1531       if (lod[j] < 0.0)
   1532          samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
   1533       else {
   1534          float level = samp->view->u.tex.first_level + (int)(lod[j] + 0.5F) ;
   1535          level = MIN2(level, (int)texture->last_level);
   1536          samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
   1537       }
   1538    }
   1539 
   1540    if (DEBUG_TEX) {
   1541       print_sample_4(__FUNCTION__, rgba);
   1542    }
   1543 }
   1544 
   1545 
   1546 static void
   1547 mip_filter_none(struct tgsi_sampler *tgsi_sampler,
   1548                 const float s[TGSI_QUAD_SIZE],
   1549                 const float t[TGSI_QUAD_SIZE],
   1550                 const float p[TGSI_QUAD_SIZE],
   1551                 const float c0[TGSI_QUAD_SIZE],
   1552                 enum tgsi_sampler_control control,
   1553                 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
   1554 {
   1555    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1556    float lod[TGSI_QUAD_SIZE];
   1557    int j;
   1558 
   1559    if (control == tgsi_sampler_lod_bias) {
   1560       float lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
   1561       compute_lod(samp->sampler, lambda, c0, lod);
   1562    } else {
   1563       assert(control == tgsi_sampler_lod_explicit);
   1564 
   1565       memcpy(lod, c0, sizeof(lod));
   1566    }
   1567 
   1568    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   1569       if (lod[j] < 0.0) {
   1570          samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
   1571       }
   1572       else {
   1573          samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
   1574       }
   1575    }
   1576 }
   1577 
   1578 
   1579 static void
   1580 mip_filter_none_no_filter_select(struct tgsi_sampler *tgsi_sampler,
   1581                                      const float s[TGSI_QUAD_SIZE],
   1582                                      const float t[TGSI_QUAD_SIZE],
   1583                                      const float p[TGSI_QUAD_SIZE],
   1584                                      const float c0[TGSI_QUAD_SIZE],
   1585                                      enum tgsi_sampler_control control,
   1586                                      float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
   1587 {
   1588    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1589    int j;
   1590 
   1591    for (j = 0; j < TGSI_QUAD_SIZE; j++)
   1592       samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
   1593 }
   1594 
   1595 
   1596 /* For anisotropic filtering */
   1597 #define WEIGHT_LUT_SIZE 1024
   1598 
   1599 static float *weightLut = NULL;
   1600 
   1601 /**
   1602  * Creates the look-up table used to speed-up EWA sampling
   1603  */
   1604 static void
   1605 create_filter_table(void)
   1606 {
   1607    unsigned i;
   1608    if (!weightLut) {
   1609       weightLut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
   1610 
   1611       for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
   1612          float alpha = 2;
   1613          float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
   1614          float weight = (float) exp(-alpha * r2);
   1615          weightLut[i] = weight;
   1616       }
   1617    }
   1618 }
   1619 
   1620 
   1621 /**
   1622  * Elliptical weighted average (EWA) filter for producing high quality
   1623  * anisotropic filtered results.
   1624  * Based on the Higher Quality Elliptical Weighted Average Filter
   1625  * published by Paul S. Heckbert in his Master's Thesis
   1626  * "Fundamentals of Texture Mapping and Image Warping" (1989)
   1627  */
   1628 static void
   1629 img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler,
   1630                   const float s[TGSI_QUAD_SIZE],
   1631                   const float t[TGSI_QUAD_SIZE],
   1632                   const float p[TGSI_QUAD_SIZE],
   1633                   unsigned level,
   1634                   enum tgsi_sampler_control control,
   1635                   const float dudx, const float dvdx,
   1636                   const float dudy, const float dvdy,
   1637                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
   1638 {
   1639    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1640    const struct pipe_resource *texture = samp->view->texture;
   1641 
   1642    // ??? Won't the image filters blow up if level is negative?
   1643    unsigned level0 = level > 0 ? level : 0;
   1644    float scaling = 1.0 / (1 << level0);
   1645    int width = u_minify(texture->width0, level0);
   1646    int height = u_minify(texture->height0, level0);
   1647 
   1648    float ux = dudx * scaling;
   1649    float vx = dvdx * scaling;
   1650    float uy = dudy * scaling;
   1651    float vy = dvdy * scaling;
   1652 
   1653    /* compute ellipse coefficients to bound the region:
   1654     * A*x*x + B*x*y + C*y*y = F.
   1655     */
   1656    float A = vx*vx+vy*vy+1;
   1657    float B = -2*(ux*vx+uy*vy);
   1658    float C = ux*ux+uy*uy+1;
   1659    float F = A*C-B*B/4.0;
   1660 
   1661    /* check if it is an ellipse */
   1662    /* ASSERT(F > 0.0); */
   1663 
   1664    /* Compute the ellipse's (u,v) bounding box in texture space */
   1665    float d = -B*B+4.0*C*A;
   1666    float box_u = 2.0 / d * sqrt(d*C*F); /* box_u -> half of bbox with   */
   1667    float box_v = 2.0 / d * sqrt(A*d*F); /* box_v -> half of bbox height */
   1668 
   1669    float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
   1670    float s_buffer[TGSI_QUAD_SIZE];
   1671    float t_buffer[TGSI_QUAD_SIZE];
   1672    float weight_buffer[TGSI_QUAD_SIZE];
   1673    unsigned buffer_next;
   1674    int j;
   1675    float den; /* = 0.0F; */
   1676    float ddq;
   1677    float U; /* = u0 - tex_u; */
   1678    int v;
   1679 
   1680    /* Scale ellipse formula to directly index the Filter Lookup Table.
   1681     * i.e. scale so that F = WEIGHT_LUT_SIZE-1
   1682     */
   1683    double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
   1684    A *= formScale;
   1685    B *= formScale;
   1686    C *= formScale;
   1687    /* F *= formScale; */ /* no need to scale F as we don't use it below here */
   1688 
   1689    /* For each quad, the du and dx values are the same and so the ellipse is
   1690     * also the same. Note that texel/image access can only be performed using
   1691     * a quad, i.e. it is not possible to get the pixel value for a single
   1692     * tex coord. In order to have a better performance, the access is buffered
   1693     * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is
   1694     * full, then the pixel values are read from the image.
   1695     */
   1696    ddq = 2 * A;
   1697 
   1698    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   1699       /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
   1700        * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
   1701        * value, q, is less than F, we're inside the ellipse
   1702        */
   1703       float tex_u = -0.5F + s[j] * texture->width0 * scaling;
   1704       float tex_v = -0.5F + t[j] * texture->height0 * scaling;
   1705 
   1706       int u0 = (int) floorf(tex_u - box_u);
   1707       int u1 = (int) ceilf(tex_u + box_u);
   1708       int v0 = (int) floorf(tex_v - box_v);
   1709       int v1 = (int) ceilf(tex_v + box_v);
   1710 
   1711       float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
   1712       buffer_next = 0;
   1713       den = 0;
   1714       U = u0 - tex_u;
   1715       for (v = v0; v <= v1; ++v) {
   1716          float V = v - tex_v;
   1717          float dq = A * (2 * U + 1) + B * V;
   1718          float q = (C * V + B * U) * V + A * U * U;
   1719 
   1720          int u;
   1721          for (u = u0; u <= u1; ++u) {
   1722             /* Note that the ellipse has been pre-scaled so F =
   1723              * WEIGHT_LUT_SIZE - 1
   1724              */
   1725             if (q < WEIGHT_LUT_SIZE) {
   1726                /* as a LUT is used, q must never be negative;
   1727                 * should not happen, though
   1728                 */
   1729                const int qClamped = q >= 0.0F ? q : 0;
   1730                float weight = weightLut[qClamped];
   1731 
   1732                weight_buffer[buffer_next] = weight;
   1733                s_buffer[buffer_next] = u / ((float) width);
   1734                t_buffer[buffer_next] = v / ((float) height);
   1735 
   1736                buffer_next++;
   1737                if (buffer_next == TGSI_QUAD_SIZE) {
   1738                   /* 4 texel coords are in the buffer -> read it now */
   1739                   unsigned jj;
   1740                   /* it is assumed that samp->min_img_filter is set to
   1741                    * img_filter_2d_nearest or one of the
   1742                    * accelerated img_filter_2d_nearest_XXX functions.
   1743                    */
   1744                   for (jj = 0; jj < buffer_next; jj++) {
   1745                      samp->min_img_filter(tgsi_sampler, s_buffer[jj], t_buffer[jj], p[jj], level, samp->faces[j],
   1746                                           tgsi_sampler_lod_bias, &rgba_temp[0][jj]);
   1747                      num[0] += weight_buffer[jj] * rgba_temp[0][jj];
   1748                      num[1] += weight_buffer[jj] * rgba_temp[1][jj];
   1749                      num[2] += weight_buffer[jj] * rgba_temp[2][jj];
   1750                      num[3] += weight_buffer[jj] * rgba_temp[3][jj];
   1751                   }
   1752 
   1753                   buffer_next = 0;
   1754                }
   1755 
   1756                den += weight;
   1757             }
   1758             q += dq;
   1759             dq += ddq;
   1760          }
   1761       }
   1762 
   1763       /* if the tex coord buffer contains unread values, we will read
   1764        * them now.
   1765        */
   1766       if (buffer_next > 0) {
   1767          unsigned jj;
   1768          /* it is assumed that samp->min_img_filter is set to
   1769           * img_filter_2d_nearest or one of the
   1770           * accelerated img_filter_2d_nearest_XXX functions.
   1771           */
   1772          for (jj = 0; jj < buffer_next; jj++) {
   1773             samp->min_img_filter(tgsi_sampler, s_buffer[jj], t_buffer[jj], p[jj], level, samp->faces[j],
   1774                                  tgsi_sampler_lod_bias, &rgba_temp[0][jj]);
   1775             num[0] += weight_buffer[jj] * rgba_temp[0][jj];
   1776             num[1] += weight_buffer[jj] * rgba_temp[1][jj];
   1777             num[2] += weight_buffer[jj] * rgba_temp[2][jj];
   1778             num[3] += weight_buffer[jj] * rgba_temp[3][jj];
   1779          }
   1780       }
   1781 
   1782       if (den <= 0.0F) {
   1783          /* Reaching this place would mean that no pixels intersected
   1784           * the ellipse.  This should never happen because the filter
   1785           * we use always intersects at least one pixel.
   1786           */
   1787 
   1788          /*rgba[0]=0;
   1789          rgba[1]=0;
   1790          rgba[2]=0;
   1791          rgba[3]=0;*/
   1792          /* not enough pixels in resampling, resort to direct interpolation */
   1793          samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level, samp->faces[j],
   1794                               tgsi_sampler_lod_bias, &rgba_temp[0][j]);
   1795          den = 1;
   1796          num[0] = rgba_temp[0][j];
   1797          num[1] = rgba_temp[1][j];
   1798          num[2] = rgba_temp[2][j];
   1799          num[3] = rgba_temp[3][j];
   1800       }
   1801 
   1802       rgba[0][j] = num[0] / den;
   1803       rgba[1][j] = num[1] / den;
   1804       rgba[2][j] = num[2] / den;
   1805       rgba[3][j] = num[3] / den;
   1806    }
   1807 }
   1808 
   1809 
   1810 /**
   1811  * Sample 2D texture using an anisotropic filter.
   1812  */
   1813 static void
   1814 mip_filter_linear_aniso(struct tgsi_sampler *tgsi_sampler,
   1815                         const float s[TGSI_QUAD_SIZE],
   1816                         const float t[TGSI_QUAD_SIZE],
   1817                         const float p[TGSI_QUAD_SIZE],
   1818                         const float c0[TGSI_QUAD_SIZE],
   1819                         enum tgsi_sampler_control control,
   1820                         float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
   1821 {
   1822    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1823    const struct pipe_resource *texture = samp->view->texture;
   1824    int level0;
   1825    float lambda;
   1826    float lod[TGSI_QUAD_SIZE];
   1827 
   1828    float s_to_u = u_minify(texture->width0, samp->view->u.tex.first_level);
   1829    float t_to_v = u_minify(texture->height0, samp->view->u.tex.first_level);
   1830    float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
   1831    float dudy = (s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]) * s_to_u;
   1832    float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
   1833    float dvdy = (t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]) * t_to_v;
   1834 
   1835    if (control == tgsi_sampler_lod_bias) {
   1836       /* note: instead of working with Px and Py, we will use the
   1837        * squared length instead, to avoid sqrt.
   1838        */
   1839       float Px2 = dudx * dudx + dvdx * dvdx;
   1840       float Py2 = dudy * dudy + dvdy * dvdy;
   1841 
   1842       float Pmax2;
   1843       float Pmin2;
   1844       float e;
   1845       const float maxEccentricity = samp->sampler->max_anisotropy * samp->sampler->max_anisotropy;
   1846 
   1847       if (Px2 < Py2) {
   1848          Pmax2 = Py2;
   1849          Pmin2 = Px2;
   1850       }
   1851       else {
   1852          Pmax2 = Px2;
   1853          Pmin2 = Py2;
   1854       }
   1855 
   1856       /* if the eccentricity of the ellipse is too big, scale up the shorter
   1857        * of the two vectors to limit the maximum amount of work per pixel
   1858        */
   1859       e = Pmax2 / Pmin2;
   1860       if (e > maxEccentricity) {
   1861          /* float s=e / maxEccentricity;
   1862             minor[0] *= s;
   1863             minor[1] *= s;
   1864             Pmin2 *= s; */
   1865          Pmin2 = Pmax2 / maxEccentricity;
   1866       }
   1867 
   1868       /* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid
   1869        * this since 0.5*log(x) = log(sqrt(x))
   1870        */
   1871       lambda = 0.5F * util_fast_log2(Pmin2) + samp->sampler->lod_bias;
   1872       compute_lod(samp->sampler, lambda, c0, lod);
   1873    }
   1874    else {
   1875       assert(control == tgsi_sampler_lod_explicit);
   1876 
   1877       memcpy(lod, c0, sizeof(lod));
   1878    }
   1879 
   1880    /* XXX: Take into account all lod values.
   1881     */
   1882    lambda = lod[0];
   1883    level0 = samp->view->u.tex.first_level + (int)lambda;
   1884 
   1885    /* If the ellipse covers the whole image, we can
   1886     * simply return the average of the whole image.
   1887     */
   1888    if (level0 >= (int) texture->last_level) {
   1889       int j;
   1890       for (j = 0; j < TGSI_QUAD_SIZE; j++)
   1891          samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], texture->last_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
   1892    }
   1893    else {
   1894       /* don't bother interpolating between multiple LODs; it doesn't
   1895        * seem to be worth the extra running time.
   1896        */
   1897       img_filter_2d_ewa(tgsi_sampler, s, t, p, level0, tgsi_sampler_lod_bias,
   1898                         dudx, dvdx, dudy, dvdy, rgba);
   1899    }
   1900 
   1901    if (DEBUG_TEX) {
   1902       print_sample_4(__FUNCTION__, rgba);
   1903    }
   1904 }
   1905 
   1906 
   1907 /**
   1908  * Specialized version of mip_filter_linear with hard-wired calls to
   1909  * 2d lambda calculation and 2d_linear_repeat_POT img filters.
   1910  */
   1911 static void
   1912 mip_filter_linear_2d_linear_repeat_POT(
   1913    struct tgsi_sampler *tgsi_sampler,
   1914    const float s[TGSI_QUAD_SIZE],
   1915    const float t[TGSI_QUAD_SIZE],
   1916    const float p[TGSI_QUAD_SIZE],
   1917    const float c0[TGSI_QUAD_SIZE],
   1918    enum tgsi_sampler_control control,
   1919    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
   1920 {
   1921    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1922    const struct pipe_resource *texture = samp->view->texture;
   1923    int j;
   1924    float lambda;
   1925    float lod[TGSI_QUAD_SIZE];
   1926 
   1927    if (control == tgsi_sampler_lod_bias) {
   1928       lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
   1929       compute_lod(samp->sampler, lambda, c0, lod);
   1930    } else {
   1931       assert(control == tgsi_sampler_lod_explicit);
   1932 
   1933       memcpy(lod, c0, sizeof(lod));
   1934    }
   1935 
   1936    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   1937       int level0 = samp->view->u.tex.first_level + (int)lod[j];
   1938 
   1939       /* Catches both negative and large values of level0:
   1940        */
   1941       if ((unsigned)level0 >= texture->last_level) {
   1942          if (level0 < 0)
   1943             img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
   1944          else
   1945             img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j], p[j], samp->view->texture->last_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
   1946 
   1947       }
   1948       else {
   1949          float levelBlend = frac(lod[j]);
   1950          float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
   1951          int c;
   1952 
   1953          img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j], p[j], level0,   samp->faces[j], tgsi_sampler_lod_bias, &rgbax[0][0]);
   1954          img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j], p[j], level0+1, samp->faces[j], tgsi_sampler_lod_bias, &rgbax[0][1]);
   1955 
   1956          for (c = 0; c < TGSI_NUM_CHANNELS; c++)
   1957             rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
   1958       }
   1959    }
   1960 
   1961    if (DEBUG_TEX) {
   1962       print_sample_4(__FUNCTION__, rgba);
   1963    }
   1964 }
   1965 
   1966 
   1967 /**
   1968  * Do shadow/depth comparisons.
   1969  */
   1970 static void
   1971 sample_compare(struct tgsi_sampler *tgsi_sampler,
   1972                const float s[TGSI_QUAD_SIZE],
   1973                const float t[TGSI_QUAD_SIZE],
   1974                const float p[TGSI_QUAD_SIZE],
   1975                const float c0[TGSI_QUAD_SIZE],
   1976                enum tgsi_sampler_control control,
   1977                float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
   1978 {
   1979    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   1980    const struct pipe_sampler_state *sampler = samp->sampler;
   1981    int j, k0, k1, k2, k3;
   1982    float val;
   1983    float pc0, pc1, pc2, pc3;
   1984 
   1985    samp->mip_filter(tgsi_sampler, s, t, p, c0, control, rgba);
   1986 
   1987    /**
   1988     * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
   1989     * for 2D Array texture we need to use the 'c0' (aka Q).
   1990     * When we sampled the depth texture, the depth value was put into all
   1991     * RGBA channels.  We look at the red channel here.
   1992     */
   1993 
   1994    if (samp->view->texture->target == PIPE_TEXTURE_2D_ARRAY ||
   1995        samp->view->texture->target == PIPE_TEXTURE_CUBE) {
   1996       pc0 = CLAMP(c0[0], 0.0F, 1.0F);
   1997       pc1 = CLAMP(c0[1], 0.0F, 1.0F);
   1998       pc2 = CLAMP(c0[2], 0.0F, 1.0F);
   1999       pc3 = CLAMP(c0[3], 0.0F, 1.0F);
   2000    } else {
   2001       pc0 = CLAMP(p[0], 0.0F, 1.0F);
   2002       pc1 = CLAMP(p[1], 0.0F, 1.0F);
   2003       pc2 = CLAMP(p[2], 0.0F, 1.0F);
   2004       pc3 = CLAMP(p[3], 0.0F, 1.0F);
   2005    }
   2006    /* compare four texcoords vs. four texture samples */
   2007    switch (sampler->compare_func) {
   2008    case PIPE_FUNC_LESS:
   2009       k0 = pc0 < rgba[0][0];
   2010       k1 = pc1 < rgba[0][1];
   2011       k2 = pc2 < rgba[0][2];
   2012       k3 = pc3 < rgba[0][3];
   2013       break;
   2014    case PIPE_FUNC_LEQUAL:
   2015       k0 = pc0 <= rgba[0][0];
   2016       k1 = pc1 <= rgba[0][1];
   2017       k2 = pc2 <= rgba[0][2];
   2018       k3 = pc3 <= rgba[0][3];
   2019       break;
   2020    case PIPE_FUNC_GREATER:
   2021       k0 = pc0 > rgba[0][0];
   2022       k1 = pc1 > rgba[0][1];
   2023       k2 = pc2 > rgba[0][2];
   2024       k3 = pc3 > rgba[0][3];
   2025       break;
   2026    case PIPE_FUNC_GEQUAL:
   2027       k0 = pc0 >= rgba[0][0];
   2028       k1 = pc1 >= rgba[0][1];
   2029       k2 = pc2 >= rgba[0][2];
   2030       k3 = pc3 >= rgba[0][3];
   2031       break;
   2032    case PIPE_FUNC_EQUAL:
   2033       k0 = pc0 == rgba[0][0];
   2034       k1 = pc1 == rgba[0][1];
   2035       k2 = pc2 == rgba[0][2];
   2036       k3 = pc3 == rgba[0][3];
   2037       break;
   2038    case PIPE_FUNC_NOTEQUAL:
   2039       k0 = pc0 != rgba[0][0];
   2040       k1 = pc1 != rgba[0][1];
   2041       k2 = pc2 != rgba[0][2];
   2042       k3 = pc3 != rgba[0][3];
   2043       break;
   2044    case PIPE_FUNC_ALWAYS:
   2045       k0 = k1 = k2 = k3 = 1;
   2046       break;
   2047    case PIPE_FUNC_NEVER:
   2048       k0 = k1 = k2 = k3 = 0;
   2049       break;
   2050    default:
   2051       k0 = k1 = k2 = k3 = 0;
   2052       assert(0);
   2053       break;
   2054    }
   2055 
   2056    if (sampler->mag_img_filter == PIPE_TEX_FILTER_LINEAR) {
   2057       /* convert four pass/fail values to an intensity in [0,1] */
   2058       val = 0.25F * (k0 + k1 + k2 + k3);
   2059 
   2060       /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
   2061       for (j = 0; j < 4; j++) {
   2062 	 rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
   2063 	 rgba[3][j] = 1.0F;
   2064       }
   2065    } else {
   2066       for (j = 0; j < 4; j++) {
   2067 	 rgba[0][j] = k0;
   2068 	 rgba[1][j] = k1;
   2069 	 rgba[2][j] = k2;
   2070 	 rgba[3][j] = 1.0F;
   2071       }
   2072    }
   2073 }
   2074 
   2075 
   2076 /**
   2077  * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
   2078  * Put face info into the sampler faces[] array.
   2079  */
   2080 static void
   2081 sample_cube(struct tgsi_sampler *tgsi_sampler,
   2082             const float s[TGSI_QUAD_SIZE],
   2083             const float t[TGSI_QUAD_SIZE],
   2084             const float p[TGSI_QUAD_SIZE],
   2085             const float c0[TGSI_QUAD_SIZE],
   2086             enum tgsi_sampler_control control,
   2087             float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
   2088 {
   2089    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   2090    unsigned j;
   2091    float ssss[4], tttt[4];
   2092 
   2093    /* Not actually used, but the intermediate steps that do the
   2094     * dereferencing don't know it.
   2095     */
   2096    static const float pppp[4] = { 0, 0, 0, 0 };
   2097 
   2098    /*
   2099      major axis
   2100      direction    target                             sc     tc    ma
   2101      ----------   -------------------------------    ---    ---   ---
   2102      +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
   2103      -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
   2104      +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
   2105      -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
   2106      +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
   2107      -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
   2108    */
   2109 
   2110    /* Choose the cube face and compute new s/t coords for the 2D face.
   2111     *
   2112     * Use the same cube face for all four pixels in the quad.
   2113     *
   2114     * This isn't ideal, but if we want to use a different cube face
   2115     * per pixel in the quad, we'd have to also compute the per-face
   2116     * LOD here too.  That's because the four post-face-selection
   2117     * texcoords are no longer related to each other (they're
   2118     * per-face!)  so we can't use subtraction to compute the partial
   2119     * deriviates to compute the LOD.  Doing so (near cube edges
   2120     * anyway) gives us pretty much random values.
   2121     */
   2122    {
   2123       /* use the average of the four pixel's texcoords to choose the face */
   2124       const float rx = 0.25F * (s[0] + s[1] + s[2] + s[3]);
   2125       const float ry = 0.25F * (t[0] + t[1] + t[2] + t[3]);
   2126       const float rz = 0.25F * (p[0] + p[1] + p[2] + p[3]);
   2127       const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
   2128 
   2129       if (arx >= ary && arx >= arz) {
   2130          float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
   2131          uint face = (rx >= 0.0F) ? PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
   2132          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   2133             const float ima = -0.5F / fabsf(s[j]);
   2134             ssss[j] = sign *  p[j] * ima + 0.5F;
   2135             tttt[j] =         t[j] * ima + 0.5F;
   2136             samp->faces[j] = face;
   2137          }
   2138       }
   2139       else if (ary >= arx && ary >= arz) {
   2140          float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
   2141          uint face = (ry >= 0.0F) ? PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
   2142          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   2143             const float ima = -0.5F / fabsf(t[j]);
   2144             ssss[j] =        -s[j] * ima + 0.5F;
   2145             tttt[j] = sign * -p[j] * ima + 0.5F;
   2146             samp->faces[j] = face;
   2147          }
   2148       }
   2149       else {
   2150          float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
   2151          uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
   2152          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   2153             const float ima = -0.5F / fabsf(p[j]);
   2154             ssss[j] = sign * -s[j] * ima + 0.5F;
   2155             tttt[j] =         t[j] * ima + 0.5F;
   2156             samp->faces[j] = face;
   2157          }
   2158       }
   2159    }
   2160 
   2161    /* In our little pipeline, the compare stage is next.  If compare
   2162     * is not active, this will point somewhere deeper into the
   2163     * pipeline, eg. to mip_filter or even img_filter.
   2164     */
   2165    samp->compare(tgsi_sampler, ssss, tttt, pppp, c0, control, rgba);
   2166 }
   2167 
   2168 
   2169 static void
   2170 do_swizzling(const struct sp_sampler_variant *samp,
   2171              float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
   2172              float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
   2173 {
   2174    int j;
   2175    const unsigned swizzle_r = samp->key.bits.swizzle_r;
   2176    const unsigned swizzle_g = samp->key.bits.swizzle_g;
   2177    const unsigned swizzle_b = samp->key.bits.swizzle_b;
   2178    const unsigned swizzle_a = samp->key.bits.swizzle_a;
   2179 
   2180    switch (swizzle_r) {
   2181    case PIPE_SWIZZLE_ZERO:
   2182       for (j = 0; j < 4; j++)
   2183          out[0][j] = 0.0f;
   2184       break;
   2185    case PIPE_SWIZZLE_ONE:
   2186       for (j = 0; j < 4; j++)
   2187          out[0][j] = 1.0f;
   2188       break;
   2189    default:
   2190       assert(swizzle_r < 4);
   2191       for (j = 0; j < 4; j++)
   2192          out[0][j] = in[swizzle_r][j];
   2193    }
   2194 
   2195    switch (swizzle_g) {
   2196    case PIPE_SWIZZLE_ZERO:
   2197       for (j = 0; j < 4; j++)
   2198          out[1][j] = 0.0f;
   2199       break;
   2200    case PIPE_SWIZZLE_ONE:
   2201       for (j = 0; j < 4; j++)
   2202          out[1][j] = 1.0f;
   2203       break;
   2204    default:
   2205       assert(swizzle_g < 4);
   2206       for (j = 0; j < 4; j++)
   2207          out[1][j] = in[swizzle_g][j];
   2208    }
   2209 
   2210    switch (swizzle_b) {
   2211    case PIPE_SWIZZLE_ZERO:
   2212       for (j = 0; j < 4; j++)
   2213          out[2][j] = 0.0f;
   2214       break;
   2215    case PIPE_SWIZZLE_ONE:
   2216       for (j = 0; j < 4; j++)
   2217          out[2][j] = 1.0f;
   2218       break;
   2219    default:
   2220       assert(swizzle_b < 4);
   2221       for (j = 0; j < 4; j++)
   2222          out[2][j] = in[swizzle_b][j];
   2223    }
   2224 
   2225    switch (swizzle_a) {
   2226    case PIPE_SWIZZLE_ZERO:
   2227       for (j = 0; j < 4; j++)
   2228          out[3][j] = 0.0f;
   2229       break;
   2230    case PIPE_SWIZZLE_ONE:
   2231       for (j = 0; j < 4; j++)
   2232          out[3][j] = 1.0f;
   2233       break;
   2234    default:
   2235       assert(swizzle_a < 4);
   2236       for (j = 0; j < 4; j++)
   2237          out[3][j] = in[swizzle_a][j];
   2238    }
   2239 }
   2240 
   2241 
   2242 static void
   2243 sample_swizzle(struct tgsi_sampler *tgsi_sampler,
   2244                const float s[TGSI_QUAD_SIZE],
   2245                const float t[TGSI_QUAD_SIZE],
   2246                const float p[TGSI_QUAD_SIZE],
   2247                const float c0[TGSI_QUAD_SIZE],
   2248                enum tgsi_sampler_control control,
   2249                float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
   2250 {
   2251    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   2252    float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
   2253 
   2254    samp->sample_target(tgsi_sampler, s, t, p, c0, control, rgba_temp);
   2255 
   2256    do_swizzling(samp, rgba_temp, rgba);
   2257 }
   2258 
   2259 
   2260 static wrap_nearest_func
   2261 get_nearest_unorm_wrap(unsigned mode)
   2262 {
   2263    switch (mode) {
   2264    case PIPE_TEX_WRAP_CLAMP:
   2265       return wrap_nearest_unorm_clamp;
   2266    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
   2267       return wrap_nearest_unorm_clamp_to_edge;
   2268    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
   2269       return wrap_nearest_unorm_clamp_to_border;
   2270    default:
   2271       assert(0);
   2272       return wrap_nearest_unorm_clamp;
   2273    }
   2274 }
   2275 
   2276 
   2277 static wrap_nearest_func
   2278 get_nearest_wrap(unsigned mode)
   2279 {
   2280    switch (mode) {
   2281    case PIPE_TEX_WRAP_REPEAT:
   2282       return wrap_nearest_repeat;
   2283    case PIPE_TEX_WRAP_CLAMP:
   2284       return wrap_nearest_clamp;
   2285    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
   2286       return wrap_nearest_clamp_to_edge;
   2287    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
   2288       return wrap_nearest_clamp_to_border;
   2289    case PIPE_TEX_WRAP_MIRROR_REPEAT:
   2290       return wrap_nearest_mirror_repeat;
   2291    case PIPE_TEX_WRAP_MIRROR_CLAMP:
   2292       return wrap_nearest_mirror_clamp;
   2293    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
   2294       return wrap_nearest_mirror_clamp_to_edge;
   2295    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
   2296       return wrap_nearest_mirror_clamp_to_border;
   2297    default:
   2298       assert(0);
   2299       return wrap_nearest_repeat;
   2300    }
   2301 }
   2302 
   2303 
   2304 static wrap_linear_func
   2305 get_linear_unorm_wrap(unsigned mode)
   2306 {
   2307    switch (mode) {
   2308    case PIPE_TEX_WRAP_CLAMP:
   2309       return wrap_linear_unorm_clamp;
   2310    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
   2311       return wrap_linear_unorm_clamp_to_edge;
   2312    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
   2313       return wrap_linear_unorm_clamp_to_border;
   2314    default:
   2315       assert(0);
   2316       return wrap_linear_unorm_clamp;
   2317    }
   2318 }
   2319 
   2320 
   2321 static wrap_linear_func
   2322 get_linear_wrap(unsigned mode)
   2323 {
   2324    switch (mode) {
   2325    case PIPE_TEX_WRAP_REPEAT:
   2326       return wrap_linear_repeat;
   2327    case PIPE_TEX_WRAP_CLAMP:
   2328       return wrap_linear_clamp;
   2329    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
   2330       return wrap_linear_clamp_to_edge;
   2331    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
   2332       return wrap_linear_clamp_to_border;
   2333    case PIPE_TEX_WRAP_MIRROR_REPEAT:
   2334       return wrap_linear_mirror_repeat;
   2335    case PIPE_TEX_WRAP_MIRROR_CLAMP:
   2336       return wrap_linear_mirror_clamp;
   2337    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
   2338       return wrap_linear_mirror_clamp_to_edge;
   2339    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
   2340       return wrap_linear_mirror_clamp_to_border;
   2341    default:
   2342       assert(0);
   2343       return wrap_linear_repeat;
   2344    }
   2345 }
   2346 
   2347 
   2348 /**
   2349  * Is swizzling needed for the given state key?
   2350  */
   2351 static INLINE bool
   2352 any_swizzle(union sp_sampler_key key)
   2353 {
   2354    return (key.bits.swizzle_r != PIPE_SWIZZLE_RED ||
   2355            key.bits.swizzle_g != PIPE_SWIZZLE_GREEN ||
   2356            key.bits.swizzle_b != PIPE_SWIZZLE_BLUE ||
   2357            key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA);
   2358 }
   2359 
   2360 
   2361 static compute_lambda_func
   2362 get_lambda_func(const union sp_sampler_key key)
   2363 {
   2364    if (key.bits.processor == TGSI_PROCESSOR_VERTEX)
   2365       return compute_lambda_vert;
   2366 
   2367    switch (key.bits.target) {
   2368    case PIPE_TEXTURE_1D:
   2369    case PIPE_TEXTURE_1D_ARRAY:
   2370       return compute_lambda_1d;
   2371    case PIPE_TEXTURE_2D:
   2372    case PIPE_TEXTURE_2D_ARRAY:
   2373    case PIPE_TEXTURE_RECT:
   2374    case PIPE_TEXTURE_CUBE:
   2375       return compute_lambda_2d;
   2376    case PIPE_TEXTURE_3D:
   2377       return compute_lambda_3d;
   2378    default:
   2379       assert(0);
   2380       return compute_lambda_1d;
   2381    }
   2382 }
   2383 
   2384 
   2385 static img_filter_func
   2386 get_img_filter(const union sp_sampler_key key,
   2387                unsigned filter,
   2388                const struct pipe_sampler_state *sampler)
   2389 {
   2390    switch (key.bits.target) {
   2391    case PIPE_TEXTURE_1D:
   2392       if (filter == PIPE_TEX_FILTER_NEAREST)
   2393          return img_filter_1d_nearest;
   2394       else
   2395          return img_filter_1d_linear;
   2396       break;
   2397    case PIPE_TEXTURE_1D_ARRAY:
   2398       if (filter == PIPE_TEX_FILTER_NEAREST)
   2399          return img_filter_1d_array_nearest;
   2400       else
   2401          return img_filter_1d_array_linear;
   2402       break;
   2403    case PIPE_TEXTURE_2D:
   2404    case PIPE_TEXTURE_RECT:
   2405       /* Try for fast path:
   2406        */
   2407       if (key.bits.is_pot &&
   2408           sampler->wrap_s == sampler->wrap_t &&
   2409           sampler->normalized_coords)
   2410       {
   2411          switch (sampler->wrap_s) {
   2412          case PIPE_TEX_WRAP_REPEAT:
   2413             switch (filter) {
   2414             case PIPE_TEX_FILTER_NEAREST:
   2415                return img_filter_2d_nearest_repeat_POT;
   2416             case PIPE_TEX_FILTER_LINEAR:
   2417                return img_filter_2d_linear_repeat_POT;
   2418             default:
   2419                break;
   2420             }
   2421             break;
   2422          case PIPE_TEX_WRAP_CLAMP:
   2423             switch (filter) {
   2424             case PIPE_TEX_FILTER_NEAREST:
   2425                return img_filter_2d_nearest_clamp_POT;
   2426             default:
   2427                break;
   2428             }
   2429          }
   2430       }
   2431       /* Otherwise use default versions:
   2432        */
   2433       if (filter == PIPE_TEX_FILTER_NEAREST)
   2434          return img_filter_2d_nearest;
   2435       else
   2436          return img_filter_2d_linear;
   2437       break;
   2438    case PIPE_TEXTURE_2D_ARRAY:
   2439       if (filter == PIPE_TEX_FILTER_NEAREST)
   2440          return img_filter_2d_array_nearest;
   2441       else
   2442          return img_filter_2d_array_linear;
   2443       break;
   2444    case PIPE_TEXTURE_CUBE:
   2445       if (filter == PIPE_TEX_FILTER_NEAREST)
   2446          return img_filter_cube_nearest;
   2447       else
   2448          return img_filter_cube_linear;
   2449       break;
   2450    case PIPE_TEXTURE_3D:
   2451       if (filter == PIPE_TEX_FILTER_NEAREST)
   2452          return img_filter_3d_nearest;
   2453       else
   2454          return img_filter_3d_linear;
   2455       break;
   2456    default:
   2457       assert(0);
   2458       return img_filter_1d_nearest;
   2459    }
   2460 }
   2461 
   2462 
   2463 /**
   2464  * Bind the given texture object and texture cache to the sampler variant.
   2465  */
   2466 void
   2467 sp_sampler_variant_bind_view( struct sp_sampler_variant *samp,
   2468                               struct softpipe_tex_tile_cache *tex_cache,
   2469                               const struct pipe_sampler_view *view )
   2470 {
   2471    const struct pipe_resource *texture = view->texture;
   2472 
   2473    samp->view = view;
   2474    samp->cache = tex_cache;
   2475    samp->xpot = util_logbase2( texture->width0 );
   2476    samp->ypot = util_logbase2( texture->height0 );
   2477 }
   2478 
   2479 
   2480 void
   2481 sp_sampler_variant_destroy( struct sp_sampler_variant *samp )
   2482 {
   2483    FREE(samp);
   2484 }
   2485 
   2486 
   2487 static void
   2488 sample_get_dims(struct tgsi_sampler *tgsi_sampler, int level,
   2489 		int dims[4])
   2490 {
   2491     struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   2492     const struct pipe_sampler_view *view = samp->view;
   2493     const struct pipe_resource *texture = view->texture;
   2494 
   2495     /* undefined according to EXT_gpu_program */
   2496     level += view->u.tex.first_level;
   2497     if (level > view->u.tex.last_level)
   2498 	return;
   2499 
   2500     dims[0] = u_minify(texture->width0, level);
   2501 
   2502     switch(texture->target) {
   2503     case PIPE_TEXTURE_1D_ARRAY:
   2504        dims[1] = texture->array_size;
   2505        /* fallthrough */
   2506     case PIPE_TEXTURE_1D:
   2507     case PIPE_BUFFER:
   2508        return;
   2509     case PIPE_TEXTURE_2D_ARRAY:
   2510        dims[2] = texture->array_size;
   2511        /* fallthrough */
   2512     case PIPE_TEXTURE_2D:
   2513     case PIPE_TEXTURE_CUBE:
   2514     case PIPE_TEXTURE_RECT:
   2515        dims[1] = u_minify(texture->height0, level);
   2516        return;
   2517     case PIPE_TEXTURE_3D:
   2518        dims[1] = u_minify(texture->height0, level);
   2519        dims[2] = u_minify(texture->depth0, level);
   2520        return;
   2521     default:
   2522        assert(!"unexpected texture target in sample_get_dims()");
   2523        return;
   2524     }
   2525 }
   2526 
   2527 /**
   2528  * This function is only used for getting unfiltered texels via the
   2529  * TXF opcode.  The GL spec says that out-of-bounds texel fetches
   2530  * produce undefined results.  Instead of crashing, lets just clamp
   2531  * coords to the texture image size.
   2532  */
   2533 static void
   2534 sample_get_texels(struct tgsi_sampler *tgsi_sampler,
   2535                   const int v_i[TGSI_QUAD_SIZE],
   2536                   const int v_j[TGSI_QUAD_SIZE],
   2537                   const int v_k[TGSI_QUAD_SIZE],
   2538                   const int lod[TGSI_QUAD_SIZE],
   2539                   const int8_t offset[3],
   2540                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
   2541 {
   2542    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
   2543    union tex_tile_address addr;
   2544    const struct pipe_resource *texture = samp->view->texture;
   2545    int j, c;
   2546    const float *tx;
   2547    const bool need_swizzle = any_swizzle(samp->key);
   2548    int width, height, depth, layers;
   2549 
   2550    addr.value = 0;
   2551    /* TODO write a better test for LOD */
   2552    addr.bits.level = lod[0];
   2553 
   2554    width = u_minify(texture->width0, addr.bits.level);
   2555    height = u_minify(texture->height0, addr.bits.level);
   2556    depth = u_minify(texture->depth0, addr.bits.level);
   2557    layers = texture->array_size;
   2558 
   2559    switch(texture->target) {
   2560    case PIPE_TEXTURE_1D:
   2561       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   2562          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
   2563 	 tx = get_texel_2d(samp, addr, x, 0);
   2564 	 for (c = 0; c < 4; c++) {
   2565 	    rgba[c][j] = tx[c];
   2566 	 }
   2567       }
   2568       break;
   2569    case PIPE_TEXTURE_1D_ARRAY:
   2570       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   2571          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
   2572          int y = CLAMP(v_j[j], 0, layers - 1);
   2573 	 tx = get_texel_1d_array(samp, addr, x, y);
   2574 	 for (c = 0; c < 4; c++) {
   2575 	    rgba[c][j] = tx[c];
   2576 	 }
   2577       }
   2578       break;
   2579    case PIPE_TEXTURE_2D:
   2580    case PIPE_TEXTURE_RECT:
   2581       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   2582          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
   2583          int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
   2584 	 tx = get_texel_2d(samp, addr, x, y);
   2585 	 for (c = 0; c < 4; c++) {
   2586 	    rgba[c][j] = tx[c];
   2587 	 }
   2588       }
   2589       break;
   2590    case PIPE_TEXTURE_2D_ARRAY:
   2591       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   2592          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
   2593          int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
   2594          int layer = CLAMP(v_k[j], 0, layers - 1);
   2595 	 tx = get_texel_2d_array(samp, addr, x, y, layer);
   2596 	 for (c = 0; c < 4; c++) {
   2597 	    rgba[c][j] = tx[c];
   2598 	 }
   2599       }
   2600       break;
   2601    case PIPE_TEXTURE_3D:
   2602       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   2603          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
   2604          int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
   2605          int z = CLAMP(v_k[j] + offset[2], 0, depth - 1);
   2606 
   2607 	 tx = get_texel_3d(samp, addr, x, y, z);
   2608 	 for (c = 0; c < 4; c++) {
   2609 	    rgba[c][j] = tx[c];
   2610 	 }
   2611       }
   2612       break;
   2613    case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */
   2614    default:
   2615       assert(!"Unknown or CUBE texture type in TXF processing\n");
   2616       break;
   2617    }
   2618 
   2619    if (need_swizzle) {
   2620       float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
   2621       memcpy(rgba_temp, rgba, sizeof(rgba_temp));
   2622       do_swizzling(samp, rgba_temp, rgba);
   2623    }
   2624 }
   2625 
   2626 
   2627 /**
   2628  * Create a sampler variant for a given set of non-orthogonal state.
   2629  */
   2630 struct sp_sampler_variant *
   2631 sp_create_sampler_variant( const struct pipe_sampler_state *sampler,
   2632                            const union sp_sampler_key key )
   2633 {
   2634    struct sp_sampler_variant *samp = CALLOC_STRUCT(sp_sampler_variant);
   2635    if (!samp)
   2636       return NULL;
   2637 
   2638    samp->sampler = sampler;
   2639    samp->key = key;
   2640 
   2641    /* Note that (for instance) linear_texcoord_s and
   2642     * nearest_texcoord_s may be active at the same time, if the
   2643     * sampler min_img_filter differs from its mag_img_filter.
   2644     */
   2645    if (sampler->normalized_coords) {
   2646       samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
   2647       samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
   2648       samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
   2649 
   2650       samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
   2651       samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
   2652       samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
   2653    }
   2654    else {
   2655       samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
   2656       samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
   2657       samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
   2658 
   2659       samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
   2660       samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
   2661       samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
   2662    }
   2663 
   2664    samp->compute_lambda = get_lambda_func( key );
   2665 
   2666    samp->min_img_filter = get_img_filter(key, sampler->min_img_filter, sampler);
   2667    samp->mag_img_filter = get_img_filter(key, sampler->mag_img_filter, sampler);
   2668 
   2669    switch (sampler->min_mip_filter) {
   2670    case PIPE_TEX_MIPFILTER_NONE:
   2671       if (sampler->min_img_filter == sampler->mag_img_filter)
   2672          samp->mip_filter = mip_filter_none_no_filter_select;
   2673       else
   2674          samp->mip_filter = mip_filter_none;
   2675       break;
   2676 
   2677    case PIPE_TEX_MIPFILTER_NEAREST:
   2678       samp->mip_filter = mip_filter_nearest;
   2679       break;
   2680 
   2681    case PIPE_TEX_MIPFILTER_LINEAR:
   2682       if (key.bits.is_pot &&
   2683           key.bits.target == PIPE_TEXTURE_2D &&
   2684           sampler->min_img_filter == sampler->mag_img_filter &&
   2685           sampler->normalized_coords &&
   2686           sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
   2687           sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
   2688           sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR) {
   2689          samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT;
   2690       }
   2691       else {
   2692          samp->mip_filter = mip_filter_linear;
   2693       }
   2694 
   2695       /* Anisotropic filtering extension. */
   2696       if (sampler->max_anisotropy > 1) {
   2697       	samp->mip_filter = mip_filter_linear_aniso;
   2698 
   2699       	/* Override min_img_filter:
   2700       	 * min_img_filter needs to be set to NEAREST since we need to access
   2701       	 * each texture pixel as it is and weight it later; using linear
   2702       	 * filters will have incorrect results.
   2703       	 * By setting the filter to NEAREST here, we can avoid calling the
   2704       	 * generic img_filter_2d_nearest in the anisotropic filter function,
   2705       	 * making it possible to use one of the accelerated implementations
   2706       	 */
   2707       	samp->min_img_filter = get_img_filter(key, PIPE_TEX_FILTER_NEAREST, sampler);
   2708 
   2709       	/* on first access create the lookup table containing the filter weights. */
   2710         if (!weightLut) {
   2711            create_filter_table();
   2712         }
   2713       }
   2714 
   2715       break;
   2716    }
   2717 
   2718    if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
   2719       samp->compare = sample_compare;
   2720    }
   2721    else {
   2722       /* Skip compare operation by promoting the mip_filter function
   2723        * pointer:
   2724        */
   2725       samp->compare = samp->mip_filter;
   2726    }
   2727 
   2728    if (key.bits.target == PIPE_TEXTURE_CUBE) {
   2729       samp->sample_target = sample_cube;
   2730    }
   2731    else {
   2732       samp->faces[0] = 0;
   2733       samp->faces[1] = 0;
   2734       samp->faces[2] = 0;
   2735       samp->faces[3] = 0;
   2736 
   2737       /* Skip cube face determination by promoting the compare
   2738        * function pointer:
   2739        */
   2740       samp->sample_target = samp->compare;
   2741    }
   2742 
   2743    if (any_swizzle(key)) {
   2744       samp->base.get_samples = sample_swizzle;
   2745    }
   2746    else {
   2747       samp->base.get_samples = samp->sample_target;
   2748    }
   2749 
   2750    samp->base.get_dims = sample_get_dims;
   2751    samp->base.get_texel = sample_get_texels;
   2752    return samp;
   2753 }
   2754