1 /************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20 * USE OR OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * The above copyright notice and this permission notice (including the 23 * next paragraph) shall be included in all copies or substantial portions 24 * of the Software. 25 * 26 **************************************************************************/ 27 28 29 #include "lp_bld_type.h" 30 #include "lp_bld_arit.h" 31 #include "lp_bld_const.h" 32 #include "lp_bld_swizzle.h" 33 #include "lp_bld_quad.h" 34 #include "lp_bld_pack.h" 35 36 37 static const unsigned char 38 swizzle_left[4] = { 39 LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_LEFT, 40 LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_QUAD_BOTTOM_LEFT 41 }; 42 43 static const unsigned char 44 swizzle_right[4] = { 45 LP_BLD_QUAD_TOP_RIGHT, LP_BLD_QUAD_TOP_RIGHT, 46 LP_BLD_QUAD_BOTTOM_RIGHT, LP_BLD_QUAD_BOTTOM_RIGHT 47 }; 48 49 static const unsigned char 50 swizzle_top[4] = { 51 LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_RIGHT, 52 LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_RIGHT 53 }; 54 55 static const unsigned char 56 swizzle_bottom[4] = { 57 LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_QUAD_BOTTOM_RIGHT, 58 LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_QUAD_BOTTOM_RIGHT 59 }; 60 61 62 LLVMValueRef 63 lp_build_ddx(struct lp_build_context *bld, 64 LLVMValueRef a) 65 { 66 LLVMValueRef a_left = lp_build_swizzle_aos(bld, a, swizzle_left); 67 LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right); 68 return lp_build_sub(bld, a_right, a_left); 69 } 70 71 72 LLVMValueRef 73 lp_build_ddy(struct lp_build_context *bld, 74 LLVMValueRef a) 75 { 76 LLVMValueRef a_top = lp_build_swizzle_aos(bld, a, swizzle_top); 77 LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom); 78 return lp_build_sub(bld, a_bottom, a_top); 79 } 80 81 /* 82 * Helper for building packed ddx/ddy vector for one coord (scalar per quad 83 * values). The vector will look like this (8-wide): 84 * dr1dx _____ -dr1dy _____ dr2dx _____ -dr2dy _____ 85 * This only requires one shuffle instead of two for more straightforward packing. 86 */ 87 LLVMValueRef 88 lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld, 89 LLVMValueRef a) 90 { 91 struct gallivm_state *gallivm = bld->gallivm; 92 LLVMBuilderRef builder = gallivm->builder; 93 LLVMValueRef vec1, vec2; 94 95 /* use aos swizzle helper */ 96 97 static const unsigned char swizzle1[] = { /* no-op swizzle */ 98 LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE, 99 LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_SWIZZLE_DONTCARE 100 }; 101 static const unsigned char swizzle2[] = { 102 LP_BLD_QUAD_TOP_RIGHT, LP_BLD_SWIZZLE_DONTCARE, 103 LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE 104 }; 105 106 vec1 = lp_build_swizzle_aos(bld, a, swizzle1); 107 vec2 = lp_build_swizzle_aos(bld, a, swizzle2); 108 109 if (bld->type.floating) 110 return LLVMBuildFSub(builder, vec2, vec1, "ddxddy"); 111 else 112 return LLVMBuildSub(builder, vec2, vec1, "ddxddy"); 113 } 114 115 116 /* 117 * Helper for building packed ddx/ddy vector for one coord (scalar per quad 118 * values). The vector will look like this (8-wide): 119 * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy 120 * This only needs 2 (v)shufps. 121 */ 122 LLVMValueRef 123 lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld, 124 LLVMValueRef a, LLVMValueRef b) 125 { 126 struct gallivm_state *gallivm = bld->gallivm; 127 LLVMBuilderRef builder = gallivm->builder; 128 LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH/4]; 129 LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH/4]; 130 LLVMValueRef vec1, vec2; 131 unsigned length, num_quads, i; 132 133 /* XXX: do hsub version */ 134 length = bld->type.length; 135 num_quads = length / 4; 136 for (i = 0; i < num_quads; i++) { 137 unsigned s1 = 4 * i; 138 unsigned s2 = 4 * i + length; 139 shuffles1[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1); 140 shuffles1[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1); 141 shuffles1[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2); 142 shuffles1[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2); 143 shuffles2[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s1); 144 shuffles2[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s1); 145 shuffles2[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s2); 146 shuffles2[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s2); 147 } 148 vec1 = LLVMBuildShuffleVector(builder, a, b, 149 LLVMConstVector(shuffles1, length), ""); 150 vec2 = LLVMBuildShuffleVector(builder, a, b, 151 LLVMConstVector(shuffles2, length), ""); 152 if (bld->type.floating) 153 return LLVMBuildFSub(builder, vec2, vec1, "ddxddyddxddy"); 154 else 155 return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy"); 156 } 157 158 159 /** 160 * Twiddle from quad format to row format 161 * 162 * src0 src1 163 * ######### ######### ################# 164 * # 0 | 1 # # 4 | 5 # # 0 | 1 | 4 | 5 # src0 165 * #---+---# #---+---# -> ################# 166 * # 2 | 3 # # 6 | 7 # # 2 | 3 | 6 | 7 # src1 167 * ######### ######### ################# 168 * 169 */ 170 void 171 lp_bld_quad_twiddle(struct gallivm_state *gallivm, 172 struct lp_type lp_dst_type, 173 const LLVMValueRef* src, 174 unsigned src_count, 175 LLVMValueRef* dst) 176 { 177 LLVMBuilderRef builder = gallivm->builder; 178 LLVMTypeRef dst_type_ref; 179 LLVMTypeRef type2_ref; 180 struct lp_type type2; 181 unsigned i; 182 183 assert((src_count % 2) == 0); 184 185 /* Create a type with only 2 elements */ 186 type2 = lp_dst_type; 187 type2.width = (lp_dst_type.width * lp_dst_type.length) / 2; 188 type2.length = 2; 189 type2.floating = 0; 190 191 type2_ref = lp_build_vec_type(gallivm, type2); 192 dst_type_ref = lp_build_vec_type(gallivm, lp_dst_type); 193 194 for (i = 0; i < src_count; i += 2) { 195 LLVMValueRef src0, src1; 196 197 src0 = LLVMBuildBitCast(builder, src[i + 0], type2_ref, ""); 198 src1 = LLVMBuildBitCast(builder, src[i + 1], type2_ref, ""); 199 200 dst[i + 0] = lp_build_interleave2(gallivm, type2, src0, src1, 0); 201 dst[i + 1] = lp_build_interleave2(gallivm, type2, src0, src1, 1); 202 203 dst[i + 0] = LLVMBuildBitCast(builder, dst[i + 0], dst_type_ref, ""); 204 dst[i + 1] = LLVMBuildBitCast(builder, dst[i + 1], dst_type_ref, ""); 205 } 206 } 207