Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2010 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     21  *
     22  * The above copyright notice and this permission notice (including the
     23  * next paragraph) shall be included in all copies or substantial portions
     24  * of the Software.
     25  *
     26  **************************************************************************/
     27 
     28 
     29 /**
     30  * @file
     31  * YUV pixel format manipulation.
     32  *
     33  * @author Jose Fonseca <jfonseca (at) vmware.com>
     34  */
     35 
     36 
     37 #include "util/u_format.h"
     38 #include "util/u_cpu_detect.h"
     39 
     40 #include "lp_bld_arit.h"
     41 #include "lp_bld_type.h"
     42 #include "lp_bld_const.h"
     43 #include "lp_bld_conv.h"
     44 #include "lp_bld_gather.h"
     45 #include "lp_bld_format.h"
     46 #include "lp_bld_init.h"
     47 #include "lp_bld_logic.h"
     48 
     49 /**
     50  * Extract Y, U, V channels from packed UYVY.
     51  * @param packed  is a <n x i32> vector with the packed UYVY blocks
     52  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
     53  */
     54 static void
     55 uyvy_to_yuv_soa(struct gallivm_state *gallivm,
     56                 unsigned n,
     57                 LLVMValueRef packed,
     58                 LLVMValueRef i,
     59                 LLVMValueRef *y,
     60                 LLVMValueRef *u,
     61                 LLVMValueRef *v)
     62 {
     63    LLVMBuilderRef builder = gallivm->builder;
     64    struct lp_type type;
     65    LLVMValueRef mask;
     66 
     67    memset(&type, 0, sizeof type);
     68    type.width = 32;
     69    type.length = n;
     70 
     71    assert(lp_check_value(type, packed));
     72    assert(lp_check_value(type, i));
     73 
     74    /*
     75     * Little endian:
     76     * y = (uyvy >> (16*i + 8)) & 0xff
     77     * u = (uyvy        ) & 0xff
     78     * v = (uyvy >> 16  ) & 0xff
     79     *
     80     * Big endian:
     81     * y = (uyvy >> (-16*i + 16)) & 0xff
     82     * u = (uyvy >> 24) & 0xff
     83     * v = (uyvy >>  8) & 0xff
     84     */
     85 
     86 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
     87    /*
     88     * Avoid shift with per-element count.
     89     * No support on x86, gets translated to roughly 5 instructions
     90     * per element. Didn't measure performance but cuts shader size
     91     * by quite a bit (less difference if cpu has no sse4.1 support).
     92     */
     93    if (util_cpu_caps.has_sse2 && n > 1) {
     94       LLVMValueRef sel, tmp, tmp2;
     95       struct lp_build_context bld32;
     96 
     97       lp_build_context_init(&bld32, gallivm, type);
     98 
     99       tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
    100       tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");
    101       sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
    102       *y = lp_build_select(&bld32, sel, tmp, tmp2);
    103    } else
    104 #endif
    105    {
    106       LLVMValueRef shift;
    107 #ifdef PIPE_ARCH_LITTLE_ENDIAN
    108       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
    109       shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
    110 #else
    111       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
    112       shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 16), "");
    113 #endif
    114       *y = LLVMBuildLShr(builder, packed, shift, "");
    115    }
    116 
    117 #ifdef PIPE_ARCH_LITTLE_ENDIAN
    118    *u = packed;
    119    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
    120 #else
    121    *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
    122    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
    123 #endif
    124 
    125    mask = lp_build_const_int_vec(gallivm, type, 0xff);
    126 
    127    *y = LLVMBuildAnd(builder, *y, mask, "y");
    128    *u = LLVMBuildAnd(builder, *u, mask, "u");
    129    *v = LLVMBuildAnd(builder, *v, mask, "v");
    130 }
    131 
    132 
    133 /**
    134  * Extract Y, U, V channels from packed YUYV.
    135  * @param packed  is a <n x i32> vector with the packed YUYV blocks
    136  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
    137  */
    138 static void
    139 yuyv_to_yuv_soa(struct gallivm_state *gallivm,
    140                 unsigned n,
    141                 LLVMValueRef packed,
    142                 LLVMValueRef i,
    143                 LLVMValueRef *y,
    144                 LLVMValueRef *u,
    145                 LLVMValueRef *v)
    146 {
    147    LLVMBuilderRef builder = gallivm->builder;
    148    struct lp_type type;
    149    LLVMValueRef mask;
    150 
    151    memset(&type, 0, sizeof type);
    152    type.width = 32;
    153    type.length = n;
    154 
    155    assert(lp_check_value(type, packed));
    156    assert(lp_check_value(type, i));
    157 
    158    /*
    159    * Little endian:
    160     * y = (yuyv >> 16*i) & 0xff
    161     * u = (yuyv >> 8   ) & 0xff
    162     * v = (yuyv >> 24  ) & 0xff
    163     *
    164     * Big endian:
    165     * y = (yuyv >> (-16*i + 24) & 0xff
    166     * u = (yuyv >> 16)          & 0xff
    167     * v = (yuyv)                & 0xff
    168     */
    169 
    170 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
    171    /*
    172     * Avoid shift with per-element count.
    173     * No support on x86, gets translated to roughly 5 instructions
    174     * per element. Didn't measure performance but cuts shader size
    175     * by quite a bit (less difference if cpu has no sse4.1 support).
    176     */
    177    if (util_cpu_caps.has_sse2 && n > 1) {
    178       LLVMValueRef sel, tmp;
    179       struct lp_build_context bld32;
    180 
    181       lp_build_context_init(&bld32, gallivm, type);
    182 
    183       tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
    184       sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
    185        *y = lp_build_select(&bld32, sel, packed, tmp);
    186    } else
    187 #endif
    188    {
    189       LLVMValueRef shift;
    190 #ifdef PIPE_ARCH_LITTLE_ENDIAN
    191       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
    192 #else
    193       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
    194       shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 24), "");
    195 #endif
    196       *y = LLVMBuildLShr(builder, packed, shift, "");
    197    }
    198 
    199 #ifdef PIPE_ARCH_LITTLE_ENDIAN
    200    *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
    201    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
    202 #else
    203    *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
    204    *v = packed;
    205 #endif
    206 
    207    mask = lp_build_const_int_vec(gallivm, type, 0xff);
    208 
    209    *y = LLVMBuildAnd(builder, *y, mask, "y");
    210    *u = LLVMBuildAnd(builder, *u, mask, "u");
    211    *v = LLVMBuildAnd(builder, *v, mask, "v");
    212 }
    213 
    214 
    215 static inline void
    216 yuv_to_rgb_soa(struct gallivm_state *gallivm,
    217                unsigned n,
    218                LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
    219                LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
    220 {
    221    LLVMBuilderRef builder = gallivm->builder;
    222    struct lp_type type;
    223    struct lp_build_context bld;
    224 
    225    LLVMValueRef c0;
    226    LLVMValueRef c8;
    227    LLVMValueRef c16;
    228    LLVMValueRef c128;
    229    LLVMValueRef c255;
    230 
    231    LLVMValueRef cy;
    232    LLVMValueRef cug;
    233    LLVMValueRef cub;
    234    LLVMValueRef cvr;
    235    LLVMValueRef cvg;
    236 
    237    memset(&type, 0, sizeof type);
    238    type.sign = TRUE;
    239    type.width = 32;
    240    type.length = n;
    241 
    242    lp_build_context_init(&bld, gallivm, type);
    243 
    244    assert(lp_check_value(type, y));
    245    assert(lp_check_value(type, u));
    246    assert(lp_check_value(type, v));
    247 
    248    /*
    249     * Constants
    250     */
    251 
    252    c0   = lp_build_const_int_vec(gallivm, type,   0);
    253    c8   = lp_build_const_int_vec(gallivm, type,   8);
    254    c16  = lp_build_const_int_vec(gallivm, type,  16);
    255    c128 = lp_build_const_int_vec(gallivm, type, 128);
    256    c255 = lp_build_const_int_vec(gallivm, type, 255);
    257 
    258    cy  = lp_build_const_int_vec(gallivm, type,  298);
    259    cug = lp_build_const_int_vec(gallivm, type, -100);
    260    cub = lp_build_const_int_vec(gallivm, type,  516);
    261    cvr = lp_build_const_int_vec(gallivm, type,  409);
    262    cvg = lp_build_const_int_vec(gallivm, type, -208);
    263 
    264    /*
    265     *  y -= 16;
    266     *  u -= 128;
    267     *  v -= 128;
    268     */
    269 
    270    y = LLVMBuildSub(builder, y, c16, "");
    271    u = LLVMBuildSub(builder, u, c128, "");
    272    v = LLVMBuildSub(builder, v, c128, "");
    273 
    274    /*
    275     * r = 298 * _y            + 409 * _v + 128;
    276     * g = 298 * _y - 100 * _u - 208 * _v + 128;
    277     * b = 298 * _y + 516 * _u            + 128;
    278     */
    279 
    280    y = LLVMBuildMul(builder, y, cy, "");
    281    y = LLVMBuildAdd(builder, y, c128, "");
    282 
    283    *r = LLVMBuildMul(builder, v, cvr, "");
    284    *g = LLVMBuildAdd(builder,
    285                      LLVMBuildMul(builder, u, cug, ""),
    286                      LLVMBuildMul(builder, v, cvg, ""),
    287                      "");
    288    *b = LLVMBuildMul(builder, u, cub, "");
    289 
    290    *r = LLVMBuildAdd(builder, *r, y, "");
    291    *g = LLVMBuildAdd(builder, *g, y, "");
    292    *b = LLVMBuildAdd(builder, *b, y, "");
    293 
    294    /*
    295     * r >>= 8;
    296     * g >>= 8;
    297     * b >>= 8;
    298     */
    299 
    300    *r = LLVMBuildAShr(builder, *r, c8, "r");
    301    *g = LLVMBuildAShr(builder, *g, c8, "g");
    302    *b = LLVMBuildAShr(builder, *b, c8, "b");
    303 
    304    /*
    305     * Clamp
    306     */
    307 
    308    *r = lp_build_clamp(&bld, *r, c0, c255);
    309    *g = lp_build_clamp(&bld, *g, c0, c255);
    310    *b = lp_build_clamp(&bld, *b, c0, c255);
    311 }
    312 
    313 
    314 static LLVMValueRef
    315 rgb_to_rgba_aos(struct gallivm_state *gallivm,
    316                 unsigned n,
    317                 LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
    318 {
    319    LLVMBuilderRef builder = gallivm->builder;
    320    struct lp_type type;
    321    LLVMValueRef a;
    322    LLVMValueRef rgba;
    323 
    324    memset(&type, 0, sizeof type);
    325    type.sign = TRUE;
    326    type.width = 32;
    327    type.length = n;
    328 
    329    assert(lp_check_value(type, r));
    330    assert(lp_check_value(type, g));
    331    assert(lp_check_value(type, b));
    332 
    333    /*
    334     * Make a 4 x unorm8 vector
    335     */
    336 
    337 #ifdef PIPE_ARCH_LITTLE_ENDIAN
    338    r = r;
    339    g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
    340    b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
    341    a = lp_build_const_int_vec(gallivm, type, 0xff000000);
    342 #else
    343    r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type, 24), "");
    344    g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 16), "");
    345    b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 8), "");
    346    a = lp_build_const_int_vec(gallivm, type, 0x000000ff);
    347 #endif
    348 
    349    rgba = r;
    350    rgba = LLVMBuildOr(builder, rgba, g, "");
    351    rgba = LLVMBuildOr(builder, rgba, b, "");
    352    rgba = LLVMBuildOr(builder, rgba, a, "");
    353 
    354    rgba = LLVMBuildBitCast(builder, rgba,
    355                            LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");
    356 
    357    return rgba;
    358 }
    359 
    360 
    361 /**
    362  * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
    363  */
    364 static LLVMValueRef
    365 uyvy_to_rgba_aos(struct gallivm_state *gallivm,
    366                  unsigned n,
    367                  LLVMValueRef packed,
    368                  LLVMValueRef i)
    369 {
    370    LLVMValueRef y, u, v;
    371    LLVMValueRef r, g, b;
    372    LLVMValueRef rgba;
    373 
    374    uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
    375    yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
    376    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
    377 
    378    return rgba;
    379 }
    380 
    381 
    382 /**
    383  * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
    384  */
    385 static LLVMValueRef
    386 yuyv_to_rgba_aos(struct gallivm_state *gallivm,
    387                  unsigned n,
    388                  LLVMValueRef packed,
    389                  LLVMValueRef i)
    390 {
    391    LLVMValueRef y, u, v;
    392    LLVMValueRef r, g, b;
    393    LLVMValueRef rgba;
    394 
    395    yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
    396    yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
    397    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
    398 
    399    return rgba;
    400 }
    401 
    402 
    403 /**
    404  * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
    405  */
    406 static LLVMValueRef
    407 rgbg_to_rgba_aos(struct gallivm_state *gallivm,
    408                  unsigned n,
    409                  LLVMValueRef packed,
    410                  LLVMValueRef i)
    411 {
    412    LLVMValueRef r, g, b;
    413    LLVMValueRef rgba;
    414 
    415    uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
    416    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
    417 
    418    return rgba;
    419 }
    420 
    421 
    422 /**
    423  * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
    424  */
    425 static LLVMValueRef
    426 grgb_to_rgba_aos(struct gallivm_state *gallivm,
    427                  unsigned n,
    428                  LLVMValueRef packed,
    429                  LLVMValueRef i)
    430 {
    431    LLVMValueRef r, g, b;
    432    LLVMValueRef rgba;
    433 
    434    yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
    435    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
    436 
    437    return rgba;
    438 }
    439 
    440 /**
    441  * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS
    442  */
    443 static LLVMValueRef
    444 grbr_to_rgba_aos(struct gallivm_state *gallivm,
    445                  unsigned n,
    446                  LLVMValueRef packed,
    447                  LLVMValueRef i)
    448 {
    449    LLVMValueRef r, g, b;
    450    LLVMValueRef rgba;
    451 
    452    uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
    453    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
    454 
    455    return rgba;
    456 }
    457 
    458 
    459 /**
    460  * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS
    461  */
    462 static LLVMValueRef
    463 rgrb_to_rgba_aos(struct gallivm_state *gallivm,
    464                  unsigned n,
    465                  LLVMValueRef packed,
    466                  LLVMValueRef i)
    467 {
    468    LLVMValueRef r, g, b;
    469    LLVMValueRef rgba;
    470 
    471    yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
    472    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
    473 
    474    return rgba;
    475 }
    476 
    477 /**
    478  * @param n  is the number of pixels processed
    479  * @param packed  is a <n x i32> vector with the packed YUYV blocks
    480  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
    481  * @return  a <4*n x i8> vector with the pixel RGBA values in AoS
    482  */
    483 LLVMValueRef
    484 lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
    485                                    const struct util_format_description *format_desc,
    486                                    unsigned n,
    487                                    LLVMValueRef base_ptr,
    488                                    LLVMValueRef offset,
    489                                    LLVMValueRef i,
    490                                    LLVMValueRef j)
    491 {
    492    LLVMValueRef packed;
    493    LLVMValueRef rgba;
    494    struct lp_type fetch_type;
    495 
    496    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
    497    assert(format_desc->block.bits == 32);
    498    assert(format_desc->block.width == 2);
    499    assert(format_desc->block.height == 1);
    500 
    501    fetch_type = lp_type_uint(32);
    502    packed = lp_build_gather(gallivm, n, 32, fetch_type, TRUE, base_ptr, offset, FALSE);
    503 
    504    (void)j;
    505 
    506    switch (format_desc->format) {
    507    case PIPE_FORMAT_UYVY:
    508       rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
    509       break;
    510    case PIPE_FORMAT_YUYV:
    511       rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
    512       break;
    513    case PIPE_FORMAT_R8G8_B8G8_UNORM:
    514       rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
    515       break;
    516    case PIPE_FORMAT_G8R8_G8B8_UNORM:
    517       rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
    518       break;
    519    case PIPE_FORMAT_G8R8_B8R8_UNORM:
    520       rgba = grbr_to_rgba_aos(gallivm, n, packed, i);
    521       break;
    522    case PIPE_FORMAT_R8G8_R8B8_UNORM:
    523       rgba = rgrb_to_rgba_aos(gallivm, n, packed, i);
    524       break;
    525    default:
    526       assert(0);
    527       rgba =  LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));
    528       break;
    529    }
    530 
    531    return rgba;
    532 }
    533 
    534