Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2010 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     21  *
     22  * The above copyright notice and this permission notice (including the
     23  * next paragraph) shall be included in all copies or substantial portions
     24  * of the Software.
     25  *
     26  **************************************************************************/
     27 
     28 
     29 #include "lp_bld_type.h"
     30 #include "lp_bld_arit.h"
     31 #include "lp_bld_const.h"
     32 #include "lp_bld_swizzle.h"
     33 #include "lp_bld_quad.h"
     34 
     35 
     36 static const unsigned char
     37 swizzle_left[4] = {
     38    LP_BLD_QUAD_TOP_LEFT,     LP_BLD_QUAD_TOP_LEFT,
     39    LP_BLD_QUAD_BOTTOM_LEFT,  LP_BLD_QUAD_BOTTOM_LEFT
     40 };
     41 
     42 static const unsigned char
     43 swizzle_right[4] = {
     44    LP_BLD_QUAD_TOP_RIGHT,    LP_BLD_QUAD_TOP_RIGHT,
     45    LP_BLD_QUAD_BOTTOM_RIGHT, LP_BLD_QUAD_BOTTOM_RIGHT
     46 };
     47 
     48 static const unsigned char
     49 swizzle_top[4] = {
     50    LP_BLD_QUAD_TOP_LEFT,     LP_BLD_QUAD_TOP_RIGHT,
     51    LP_BLD_QUAD_TOP_LEFT,     LP_BLD_QUAD_TOP_RIGHT
     52 };
     53 
     54 static const unsigned char
     55 swizzle_bottom[4] = {
     56    LP_BLD_QUAD_BOTTOM_LEFT,  LP_BLD_QUAD_BOTTOM_RIGHT,
     57    LP_BLD_QUAD_BOTTOM_LEFT,  LP_BLD_QUAD_BOTTOM_RIGHT
     58 };
     59 
     60 
     61 LLVMValueRef
     62 lp_build_ddx(struct lp_build_context *bld,
     63              LLVMValueRef a)
     64 {
     65    LLVMValueRef a_left  = lp_build_swizzle_aos(bld, a, swizzle_left);
     66    LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right);
     67    return lp_build_sub(bld, a_right, a_left);
     68 }
     69 
     70 
     71 LLVMValueRef
     72 lp_build_ddy(struct lp_build_context *bld,
     73              LLVMValueRef a)
     74 {
     75    LLVMValueRef a_top    = lp_build_swizzle_aos(bld, a, swizzle_top);
     76    LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom);
     77    return lp_build_sub(bld, a_bottom, a_top);
     78 }
     79 
     80 /*
     81  * To be able to handle multiple quads at once in texture sampling and
     82  * do lod calculations per quad, it is necessary to get the per-quad
     83  * derivatives into the lp_build_rho function.
     84  * For 8-wide vectors the packed derivative values for 3 coords would
     85  * look like this, this scales to a arbitrary (multiple of 4) vector size:
     86  * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
     87  * dr1dx dr1dy _____ _____ dr2dx dr2dy _____ _____
     88  * The second vector will be unused for 1d and 2d textures.
     89  */
     90 LLVMValueRef
     91 lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
     92                                  LLVMValueRef a)
     93 {
     94    struct gallivm_state *gallivm = bld->gallivm;
     95    LLVMBuilderRef builder = gallivm->builder;
     96    LLVMValueRef vec1, vec2;
     97 
     98    /* same packing as _twocoord, but can use aos swizzle helper */
     99 
    100    /*
    101     * XXX could make swizzle1 a noop swizzle by using right top/bottom
    102     * pair for ddy
    103     */
    104    static const unsigned char swizzle1[] = {
    105       LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_LEFT,
    106       LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
    107    };
    108    static const unsigned char swizzle2[] = {
    109       LP_BLD_QUAD_TOP_RIGHT, LP_BLD_QUAD_BOTTOM_LEFT,
    110       LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
    111    };
    112 
    113    vec1 = lp_build_swizzle_aos(bld, a, swizzle1);
    114    vec2 = lp_build_swizzle_aos(bld, a, swizzle2);
    115 
    116    if (bld->type.floating)
    117       return LLVMBuildFSub(builder, vec2, vec1, "ddxddy");
    118    else
    119       return LLVMBuildSub(builder, vec2, vec1, "ddxddy");
    120 }
    121 
    122 
    123 LLVMValueRef
    124 lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
    125                                  LLVMValueRef a, LLVMValueRef b)
    126 {
    127    struct gallivm_state *gallivm = bld->gallivm;
    128    LLVMBuilderRef builder = gallivm->builder;
    129    LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH/4];
    130    LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH/4];
    131    LLVMValueRef vec1, vec2;
    132    unsigned length, num_quads, i;
    133 
    134    /* XXX: do hsub version */
    135    length = bld->type.length;
    136    num_quads = length / 4;
    137    for (i = 0; i < num_quads; i++) {
    138       unsigned s1 = 4 * i;
    139       unsigned s2 = 4 * i + length;
    140       shuffles1[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
    141       shuffles1[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
    142       shuffles1[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
    143       shuffles1[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
    144       shuffles2[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s1);
    145       shuffles2[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s1);
    146       shuffles2[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s2);
    147       shuffles2[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s2);
    148    }
    149    vec1 = LLVMBuildShuffleVector(builder, a, b,
    150                                  LLVMConstVector(shuffles1, length), "");
    151    vec2 = LLVMBuildShuffleVector(builder, a, b,
    152                                  LLVMConstVector(shuffles2, length), "");
    153    if (bld->type.floating)
    154       return LLVMBuildFSub(builder, vec2, vec1, "ddxddyddxddy");
    155    else
    156       return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy");
    157 }
    158 
    159