1 /************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29 #include "pipe/p_defines.h" 30 31 #include "util/u_format.h" 32 #include "util/u_memory.h" 33 #include "util/u_string.h" 34 35 #include "lp_bld_type.h" 36 #include "lp_bld_const.h" 37 #include "lp_bld_conv.h" 38 #include "lp_bld_swizzle.h" 39 #include "lp_bld_gather.h" 40 #include "lp_bld_debug.h" 41 #include "lp_bld_format.h" 42 43 44 void 45 lp_build_format_swizzle_soa(const struct util_format_description *format_desc, 46 struct lp_build_context *bld, 47 const LLVMValueRef *unswizzled, 48 LLVMValueRef swizzled_out[4]) 49 { 50 assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO); 51 assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE); 52 53 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 54 /* 55 * Return zzz1 for depth-stencil formats. 56 * 57 * XXX: Allow to control the depth swizzle with an additional parameter, 58 * as the caller may wish another depth swizzle, or retain the stencil 59 * value. 60 */ 61 enum util_format_swizzle swizzle = format_desc->swizzle[0]; 62 LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle); 63 swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth; 64 swizzled_out[3] = bld->one; 65 } 66 else { 67 unsigned chan; 68 for (chan = 0; chan < 4; ++chan) { 69 enum util_format_swizzle swizzle = format_desc->swizzle[chan]; 70 swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle); 71 } 72 } 73 } 74 75 76 /** 77 * Unpack several pixels in SoA. 78 * 79 * It takes a vector of packed pixels: 80 * 81 * packed = {P0, P1, P2, P3, ..., Pn} 82 * 83 * And will produce four vectors: 84 * 85 * red = {R0, R1, R2, R3, ..., Rn} 86 * green = {G0, G1, G2, G3, ..., Gn} 87 * blue = {B0, B1, B2, B3, ..., Bn} 88 * alpha = {A0, A1, A2, A3, ..., An} 89 * 90 * It requires that a packed pixel fits into an element of the output 91 * channels. The common case is when converting pixel with a depth of 32 bit or 92 * less into floats. 93 * 94 * \param format_desc the format of the 'packed' incoming pixel vector 95 * \param type the desired type for rgba_out (type.length = n, above) 96 * \param packed the incoming vector of packed pixels 97 * \param rgba_out returns the SoA R,G,B,A vectors 98 */ 99 void 100 lp_build_unpack_rgba_soa(struct gallivm_state *gallivm, 101 const struct util_format_description *format_desc, 102 struct lp_type type, 103 LLVMValueRef packed, 104 LLVMValueRef rgba_out[4]) 105 { 106 LLVMBuilderRef builder = gallivm->builder; 107 struct lp_build_context bld; 108 LLVMValueRef inputs[4]; 109 unsigned start; 110 unsigned chan; 111 112 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); 113 assert(format_desc->block.width == 1); 114 assert(format_desc->block.height == 1); 115 assert(format_desc->block.bits <= type.width); 116 /* FIXME: Support more output types */ 117 assert(type.floating); 118 assert(type.width == 32); 119 120 lp_build_context_init(&bld, gallivm, type); 121 122 /* Decode the input vector components */ 123 start = 0; 124 for (chan = 0; chan < format_desc->nr_channels; ++chan) { 125 const unsigned width = format_desc->channel[chan].size; 126 const unsigned stop = start + width; 127 LLVMValueRef input; 128 129 input = packed; 130 131 switch(format_desc->channel[chan].type) { 132 case UTIL_FORMAT_TYPE_VOID: 133 input = lp_build_undef(gallivm, type); 134 break; 135 136 case UTIL_FORMAT_TYPE_UNSIGNED: 137 /* 138 * Align the LSB 139 */ 140 141 if (start) { 142 input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), ""); 143 } 144 145 /* 146 * Zero the MSBs 147 */ 148 149 if (stop < format_desc->block.bits) { 150 unsigned mask = ((unsigned long long)1 << width) - 1; 151 input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), ""); 152 } 153 154 /* 155 * Type conversion 156 */ 157 158 if (type.floating) { 159 if(format_desc->channel[chan].normalized) 160 input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); 161 else 162 input = LLVMBuildSIToFP(builder, input, 163 lp_build_vec_type(gallivm, type), ""); 164 } 165 else { 166 /* FIXME */ 167 assert(0); 168 input = lp_build_undef(gallivm, type); 169 } 170 171 break; 172 173 case UTIL_FORMAT_TYPE_SIGNED: 174 /* 175 * Align the sign bit first. 176 */ 177 178 if (stop < type.width) { 179 unsigned bits = type.width - stop; 180 LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); 181 input = LLVMBuildShl(builder, input, bits_val, ""); 182 } 183 184 /* 185 * Align the LSB (with an arithmetic shift to preserve the sign) 186 */ 187 188 if (format_desc->channel[chan].size < type.width) { 189 unsigned bits = type.width - format_desc->channel[chan].size; 190 LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); 191 input = LLVMBuildAShr(builder, input, bits_val, ""); 192 } 193 194 /* 195 * Type conversion 196 */ 197 198 if (type.floating) { 199 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); 200 if (format_desc->channel[chan].normalized) { 201 double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1); 202 LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); 203 input = LLVMBuildFMul(builder, input, scale_val, ""); 204 } 205 } 206 else { 207 /* FIXME */ 208 assert(0); 209 input = lp_build_undef(gallivm, type); 210 } 211 212 break; 213 214 case UTIL_FORMAT_TYPE_FLOAT: 215 if (type.floating) { 216 assert(start == 0); 217 assert(stop == 32); 218 assert(type.width == 32); 219 input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), ""); 220 } 221 else { 222 /* FIXME */ 223 assert(0); 224 input = lp_build_undef(gallivm, type); 225 } 226 break; 227 228 case UTIL_FORMAT_TYPE_FIXED: 229 if (type.floating) { 230 double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1); 231 LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); 232 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); 233 input = LLVMBuildFMul(builder, input, scale_val, ""); 234 } 235 else { 236 /* FIXME */ 237 assert(0); 238 input = lp_build_undef(gallivm, type); 239 } 240 break; 241 242 default: 243 assert(0); 244 input = lp_build_undef(gallivm, type); 245 break; 246 } 247 248 inputs[chan] = input; 249 250 start = stop; 251 } 252 253 lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out); 254 } 255 256 257 void 258 lp_build_rgba8_to_f32_soa(struct gallivm_state *gallivm, 259 struct lp_type dst_type, 260 LLVMValueRef packed, 261 LLVMValueRef *rgba) 262 { 263 LLVMBuilderRef builder = gallivm->builder; 264 LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff); 265 unsigned chan; 266 267 packed = LLVMBuildBitCast(builder, packed, 268 lp_build_int_vec_type(gallivm, dst_type), ""); 269 270 /* Decode the input vector components */ 271 for (chan = 0; chan < 4; ++chan) { 272 unsigned start = chan*8; 273 unsigned stop = start + 8; 274 LLVMValueRef input; 275 276 input = packed; 277 278 if (start) 279 input = LLVMBuildLShr(builder, input, 280 lp_build_const_int_vec(gallivm, dst_type, start), ""); 281 282 if (stop < 32) 283 input = LLVMBuildAnd(builder, input, mask, ""); 284 285 input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input); 286 287 rgba[chan] = input; 288 } 289 } 290 291 292 293 /** 294 * Fetch a texels from a texture, returning them in SoA layout. 295 * 296 * \param type the desired return type for 'rgba'. The vector length 297 * is the number of texels to fetch 298 * 299 * \param base_ptr points to start of the texture image block. For non- 300 * compressed formats, this simply points to the texel. 301 * For compressed formats, it points to the start of the 302 * compressed data block. 303 * 304 * \param i, j the sub-block pixel coordinates. For non-compressed formats 305 * these will always be (0,0). For compressed formats, i will 306 * be in [0, block_width-1] and j will be in [0, block_height-1]. 307 */ 308 void 309 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, 310 const struct util_format_description *format_desc, 311 struct lp_type type, 312 LLVMValueRef base_ptr, 313 LLVMValueRef offset, 314 LLVMValueRef i, 315 LLVMValueRef j, 316 LLVMValueRef rgba_out[4]) 317 { 318 LLVMBuilderRef builder = gallivm->builder; 319 320 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && 321 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || 322 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && 323 format_desc->block.width == 1 && 324 format_desc->block.height == 1 && 325 format_desc->block.bits <= type.width && 326 (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT || 327 format_desc->channel[0].size == 32)) 328 { 329 /* 330 * The packed pixel fits into an element of the destination format. Put 331 * the packed pixels into a vector and extract each component for all 332 * vector elements in parallel. 333 */ 334 335 LLVMValueRef packed; 336 337 /* 338 * gather the texels from the texture 339 * Ex: packed = {BGRA, BGRA, BGRA, BGRA}. 340 */ 341 packed = lp_build_gather(gallivm, 342 type.length, 343 format_desc->block.bits, 344 type.width, 345 base_ptr, offset); 346 347 /* 348 * convert texels to float rgba 349 */ 350 lp_build_unpack_rgba_soa(gallivm, 351 format_desc, 352 type, 353 packed, rgba_out); 354 return; 355 } 356 357 /* 358 * Try calling lp_build_fetch_rgba_aos for all pixels. 359 */ 360 361 if (util_format_fits_8unorm(format_desc) && 362 type.floating && type.width == 32 && 363 (type.length == 1 || (type.length % 4 == 0))) { 364 struct lp_type tmp_type; 365 LLVMValueRef tmp; 366 367 memset(&tmp_type, 0, sizeof tmp_type); 368 tmp_type.width = 8; 369 tmp_type.length = type.length * 4; 370 tmp_type.norm = TRUE; 371 372 tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, 373 base_ptr, offset, i, j); 374 375 lp_build_rgba8_to_f32_soa(gallivm, 376 type, 377 tmp, 378 rgba_out); 379 380 return; 381 } 382 383 /* 384 * Fallback to calling lp_build_fetch_rgba_aos for each pixel. 385 * 386 * This is not the most efficient way of fetching pixels, as we 387 * miss some opportunities to do vectorization, but this is 388 * convenient for formats or scenarios for which there was no 389 * opportunity or incentive to optimize. 390 */ 391 392 { 393 unsigned k, chan; 394 struct lp_type tmp_type; 395 396 if (gallivm_debug & GALLIVM_DEBUG_PERF) { 397 debug_printf("%s: scalar unpacking of %s\n", 398 __FUNCTION__, format_desc->short_name); 399 } 400 401 tmp_type = type; 402 tmp_type.length = 4; 403 404 for (chan = 0; chan < 4; ++chan) { 405 rgba_out[chan] = lp_build_undef(gallivm, type); 406 } 407 408 /* loop over number of pixels */ 409 for(k = 0; k < type.length; ++k) { 410 LLVMValueRef index = lp_build_const_int32(gallivm, k); 411 LLVMValueRef offset_elem; 412 LLVMValueRef i_elem, j_elem; 413 LLVMValueRef tmp; 414 415 offset_elem = LLVMBuildExtractElement(builder, offset, 416 index, ""); 417 418 i_elem = LLVMBuildExtractElement(builder, i, index, ""); 419 j_elem = LLVMBuildExtractElement(builder, j, index, ""); 420 421 /* Get a single float[4]={R,G,B,A} pixel */ 422 tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, 423 base_ptr, offset_elem, 424 i_elem, j_elem); 425 426 /* 427 * Insert the AoS tmp value channels into the SoA result vectors at 428 * position = 'index'. 429 */ 430 for (chan = 0; chan < 4; ++chan) { 431 LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan), 432 tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, ""); 433 rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan], 434 tmp_chan, index, ""); 435 } 436 } 437 } 438 } 439