Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2010 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     21  *
     22  * The above copyright notice and this permission notice (including the
     23  * next paragraph) shall be included in all copies or substantial portions
     24  * of the Software.
     25  *
     26  **************************************************************************/
     27 
     28 
     29 #include "lp_bld_type.h"
     30 #include "lp_bld_arit.h"
     31 #include "lp_bld_const.h"
     32 #include "lp_bld_swizzle.h"
     33 #include "lp_bld_quad.h"
     34 #include "lp_bld_pack.h"
     35 
     36 
     37 static const unsigned char
     38 swizzle_left[4] = {
     39    LP_BLD_QUAD_TOP_LEFT,     LP_BLD_QUAD_TOP_LEFT,
     40    LP_BLD_QUAD_BOTTOM_LEFT,  LP_BLD_QUAD_BOTTOM_LEFT
     41 };
     42 
     43 static const unsigned char
     44 swizzle_right[4] = {
     45    LP_BLD_QUAD_TOP_RIGHT,    LP_BLD_QUAD_TOP_RIGHT,
     46    LP_BLD_QUAD_BOTTOM_RIGHT, LP_BLD_QUAD_BOTTOM_RIGHT
     47 };
     48 
     49 static const unsigned char
     50 swizzle_top[4] = {
     51    LP_BLD_QUAD_TOP_LEFT,     LP_BLD_QUAD_TOP_RIGHT,
     52    LP_BLD_QUAD_TOP_LEFT,     LP_BLD_QUAD_TOP_RIGHT
     53 };
     54 
     55 static const unsigned char
     56 swizzle_bottom[4] = {
     57    LP_BLD_QUAD_BOTTOM_LEFT,  LP_BLD_QUAD_BOTTOM_RIGHT,
     58    LP_BLD_QUAD_BOTTOM_LEFT,  LP_BLD_QUAD_BOTTOM_RIGHT
     59 };
     60 
     61 
     62 LLVMValueRef
     63 lp_build_ddx(struct lp_build_context *bld,
     64              LLVMValueRef a)
     65 {
     66    LLVMValueRef a_left  = lp_build_swizzle_aos(bld, a, swizzle_left);
     67    LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right);
     68    return lp_build_sub(bld, a_right, a_left);
     69 }
     70 
     71 
     72 LLVMValueRef
     73 lp_build_ddy(struct lp_build_context *bld,
     74              LLVMValueRef a)
     75 {
     76    LLVMValueRef a_top    = lp_build_swizzle_aos(bld, a, swizzle_top);
     77    LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom);
     78    return lp_build_sub(bld, a_bottom, a_top);
     79 }
     80 
     81 /*
     82  * Helper for building packed ddx/ddy vector for one coord (scalar per quad
     83  * values). The vector will look like this (8-wide):
     84  * dr1dx _____ -dr1dy _____ dr2dx _____ -dr2dy _____
     85  * This only requires one shuffle instead of two for more straightforward packing.
     86  */
     87 LLVMValueRef
     88 lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
     89                                  LLVMValueRef a)
     90 {
     91    struct gallivm_state *gallivm = bld->gallivm;
     92    LLVMBuilderRef builder = gallivm->builder;
     93    LLVMValueRef vec1, vec2;
     94 
     95    /* use aos swizzle helper */
     96 
     97    static const unsigned char swizzle1[] = { /* no-op swizzle */
     98       LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE,
     99       LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_SWIZZLE_DONTCARE
    100    };
    101    static const unsigned char swizzle2[] = {
    102       LP_BLD_QUAD_TOP_RIGHT, LP_BLD_SWIZZLE_DONTCARE,
    103       LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE
    104    };
    105 
    106    vec1 = lp_build_swizzle_aos(bld, a, swizzle1);
    107    vec2 = lp_build_swizzle_aos(bld, a, swizzle2);
    108 
    109    if (bld->type.floating)
    110       return LLVMBuildFSub(builder, vec2, vec1, "ddxddy");
    111    else
    112       return LLVMBuildSub(builder, vec2, vec1, "ddxddy");
    113 }
    114 
    115 
    116 /*
    117  * Helper for building packed ddx/ddy vector for one coord (scalar per quad
    118  * values). The vector will look like this (8-wide):
    119  * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
    120  * This only needs 2 (v)shufps.
    121  */
    122 LLVMValueRef
    123 lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
    124                                  LLVMValueRef a, LLVMValueRef b)
    125 {
    126    struct gallivm_state *gallivm = bld->gallivm;
    127    LLVMBuilderRef builder = gallivm->builder;
    128    LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH/4];
    129    LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH/4];
    130    LLVMValueRef vec1, vec2;
    131    unsigned length, num_quads, i;
    132 
    133    /* XXX: do hsub version */
    134    length = bld->type.length;
    135    num_quads = length / 4;
    136    for (i = 0; i < num_quads; i++) {
    137       unsigned s1 = 4 * i;
    138       unsigned s2 = 4 * i + length;
    139       shuffles1[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
    140       shuffles1[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
    141       shuffles1[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
    142       shuffles1[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
    143       shuffles2[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s1);
    144       shuffles2[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s1);
    145       shuffles2[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s2);
    146       shuffles2[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s2);
    147    }
    148    vec1 = LLVMBuildShuffleVector(builder, a, b,
    149                                  LLVMConstVector(shuffles1, length), "");
    150    vec2 = LLVMBuildShuffleVector(builder, a, b,
    151                                  LLVMConstVector(shuffles2, length), "");
    152    if (bld->type.floating)
    153       return LLVMBuildFSub(builder, vec2, vec1, "ddxddyddxddy");
    154    else
    155       return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy");
    156 }
    157 
    158 
    159 /**
    160  * Twiddle from quad format to row format
    161  *
    162  *   src0      src1
    163  * ######### #########      #################
    164  * # 0 | 1 # # 4 | 5 #      # 0 | 1 | 4 | 5 # src0
    165  * #---+---# #---+---#  ->  #################
    166  * # 2 | 3 # # 6 | 7 #      # 2 | 3 | 6 | 7 # src1
    167  * ######### #########      #################
    168  *
    169  */
    170 void
    171 lp_bld_quad_twiddle(struct gallivm_state *gallivm,
    172                     struct lp_type lp_dst_type,
    173                     const LLVMValueRef* src,
    174                     unsigned src_count,
    175                     LLVMValueRef* dst)
    176 {
    177    LLVMBuilderRef builder = gallivm->builder;
    178    LLVMTypeRef dst_type_ref;
    179    LLVMTypeRef type2_ref;
    180    struct lp_type type2;
    181    unsigned i;
    182 
    183    assert((src_count % 2) == 0);
    184 
    185    /* Create a type with only 2 elements */
    186    type2 = lp_dst_type;
    187    type2.width = (lp_dst_type.width * lp_dst_type.length) / 2;
    188    type2.length = 2;
    189    type2.floating = 0;
    190 
    191    type2_ref = lp_build_vec_type(gallivm, type2);
    192    dst_type_ref = lp_build_vec_type(gallivm, lp_dst_type);
    193 
    194    for (i = 0; i < src_count; i += 2) {
    195       LLVMValueRef src0, src1;
    196 
    197       src0 = LLVMBuildBitCast(builder, src[i + 0], type2_ref, "");
    198       src1 = LLVMBuildBitCast(builder, src[i + 1], type2_ref, "");
    199 
    200       dst[i + 0] = lp_build_interleave2(gallivm, type2, src0, src1, 0);
    201       dst[i + 1] = lp_build_interleave2(gallivm, type2, src0, src1, 1);
    202 
    203       dst[i + 0] = LLVMBuildBitCast(builder, dst[i + 0], dst_type_ref, "");
    204       dst[i + 1] = LLVMBuildBitCast(builder, dst[i + 1], dst_type_ref, "");
    205    }
    206 }
    207