Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2010 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     21  *
     22  * The above copyright notice and this permission notice (including the
     23  * next paragraph) shall be included in all copies or substantial portions
     24  * of the Software.
     25  *
     26  **************************************************************************/
     27 
     28 
     29 /**
     30  * @file
     31  * YUV pixel format manipulation.
     32  *
     33  * @author Jose Fonseca <jfonseca (at) vmware.com>
     34  */
     35 
     36 
     37 #include "util/u_format.h"
     38 #include "util/u_cpu_detect.h"
     39 
     40 #include "lp_bld_arit.h"
     41 #include "lp_bld_type.h"
     42 #include "lp_bld_const.h"
     43 #include "lp_bld_conv.h"
     44 #include "lp_bld_gather.h"
     45 #include "lp_bld_format.h"
     46 #include "lp_bld_init.h"
     47 #include "lp_bld_logic.h"
     48 
     49 /**
     50  * Extract Y, U, V channels from packed UYVY.
     51  * @param packed  is a <n x i32> vector with the packed UYVY blocks
     52  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
     53  */
     54 static void
     55 uyvy_to_yuv_soa(struct gallivm_state *gallivm,
     56                 unsigned n,
     57                 LLVMValueRef packed,
     58                 LLVMValueRef i,
     59                 LLVMValueRef *y,
     60                 LLVMValueRef *u,
     61                 LLVMValueRef *v)
     62 {
     63    LLVMBuilderRef builder = gallivm->builder;
     64    struct lp_type type;
     65    LLVMValueRef mask;
     66 
     67    memset(&type, 0, sizeof type);
     68    type.width = 32;
     69    type.length = n;
     70 
     71    assert(lp_check_value(type, packed));
     72    assert(lp_check_value(type, i));
     73 
     74    /*
     75     * y = (uyvy >> (16*i + 8)) & 0xff
     76     * u = (uyvy        ) & 0xff
     77     * v = (uyvy >> 16  ) & 0xff
     78     */
     79 
     80 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
     81    /*
     82     * Avoid shift with per-element count.
     83     * No support on x86, gets translated to roughly 5 instructions
     84     * per element. Didn't measure performance but cuts shader size
     85     * by quite a bit (less difference if cpu has no sse4.1 support).
     86     */
     87    if (util_cpu_caps.has_sse2 && n > 1) {
     88       LLVMValueRef sel, tmp, tmp2;
     89       struct lp_build_context bld32;
     90 
     91       lp_build_context_init(&bld32, gallivm, type);
     92 
     93       tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
     94       tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");
     95       sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
     96       *y = lp_build_select(&bld32, sel, tmp, tmp2);
     97    } else
     98 #endif
     99    {
    100       LLVMValueRef shift;
    101       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
    102       shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
    103       *y = LLVMBuildLShr(builder, packed, shift, "");
    104    }
    105 
    106    *u = packed;
    107    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
    108 
    109    mask = lp_build_const_int_vec(gallivm, type, 0xff);
    110 
    111    *y = LLVMBuildAnd(builder, *y, mask, "y");
    112    *u = LLVMBuildAnd(builder, *u, mask, "u");
    113    *v = LLVMBuildAnd(builder, *v, mask, "v");
    114 }
    115 
    116 
    117 /**
    118  * Extract Y, U, V channels from packed YUYV.
    119  * @param packed  is a <n x i32> vector with the packed YUYV blocks
    120  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
    121  */
    122 static void
    123 yuyv_to_yuv_soa(struct gallivm_state *gallivm,
    124                 unsigned n,
    125                 LLVMValueRef packed,
    126                 LLVMValueRef i,
    127                 LLVMValueRef *y,
    128                 LLVMValueRef *u,
    129                 LLVMValueRef *v)
    130 {
    131    LLVMBuilderRef builder = gallivm->builder;
    132    struct lp_type type;
    133    LLVMValueRef mask;
    134 
    135    memset(&type, 0, sizeof type);
    136    type.width = 32;
    137    type.length = n;
    138 
    139    assert(lp_check_value(type, packed));
    140    assert(lp_check_value(type, i));
    141 
    142    /*
    143     * y = (yuyv >> 16*i) & 0xff
    144     * u = (yuyv >> 8   ) & 0xff
    145     * v = (yuyv >> 24  ) & 0xff
    146     */
    147 
    148 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
    149    /*
    150     * Avoid shift with per-element count.
    151     * No support on x86, gets translated to roughly 5 instructions
    152     * per element. Didn't measure performance but cuts shader size
    153     * by quite a bit (less difference if cpu has no sse4.1 support).
    154     */
    155    if (util_cpu_caps.has_sse2 && n > 1) {
    156       LLVMValueRef sel, tmp;
    157       struct lp_build_context bld32;
    158 
    159       lp_build_context_init(&bld32, gallivm, type);
    160 
    161       tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
    162       sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
    163        *y = lp_build_select(&bld32, sel, packed, tmp);
    164    } else
    165 #endif
    166    {
    167       LLVMValueRef shift;
    168       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
    169       *y = LLVMBuildLShr(builder, packed, shift, "");
    170    }
    171 
    172    *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
    173    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
    174 
    175    mask = lp_build_const_int_vec(gallivm, type, 0xff);
    176 
    177    *y = LLVMBuildAnd(builder, *y, mask, "y");
    178    *u = LLVMBuildAnd(builder, *u, mask, "u");
    179    *v = LLVMBuildAnd(builder, *v, mask, "v");
    180 }
    181 
    182 
    183 static INLINE void
    184 yuv_to_rgb_soa(struct gallivm_state *gallivm,
    185                unsigned n,
    186                LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
    187                LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
    188 {
    189    LLVMBuilderRef builder = gallivm->builder;
    190    struct lp_type type;
    191    struct lp_build_context bld;
    192 
    193    LLVMValueRef c0;
    194    LLVMValueRef c8;
    195    LLVMValueRef c16;
    196    LLVMValueRef c128;
    197    LLVMValueRef c255;
    198 
    199    LLVMValueRef cy;
    200    LLVMValueRef cug;
    201    LLVMValueRef cub;
    202    LLVMValueRef cvr;
    203    LLVMValueRef cvg;
    204 
    205    memset(&type, 0, sizeof type);
    206    type.sign = TRUE;
    207    type.width = 32;
    208    type.length = n;
    209 
    210    lp_build_context_init(&bld, gallivm, type);
    211 
    212    assert(lp_check_value(type, y));
    213    assert(lp_check_value(type, u));
    214    assert(lp_check_value(type, v));
    215 
    216    /*
    217     * Constants
    218     */
    219 
    220    c0   = lp_build_const_int_vec(gallivm, type,   0);
    221    c8   = lp_build_const_int_vec(gallivm, type,   8);
    222    c16  = lp_build_const_int_vec(gallivm, type,  16);
    223    c128 = lp_build_const_int_vec(gallivm, type, 128);
    224    c255 = lp_build_const_int_vec(gallivm, type, 255);
    225 
    226    cy  = lp_build_const_int_vec(gallivm, type,  298);
    227    cug = lp_build_const_int_vec(gallivm, type, -100);
    228    cub = lp_build_const_int_vec(gallivm, type,  516);
    229    cvr = lp_build_const_int_vec(gallivm, type,  409);
    230    cvg = lp_build_const_int_vec(gallivm, type, -208);
    231 
    232    /*
    233     *  y -= 16;
    234     *  u -= 128;
    235     *  v -= 128;
    236     */
    237 
    238    y = LLVMBuildSub(builder, y, c16, "");
    239    u = LLVMBuildSub(builder, u, c128, "");
    240    v = LLVMBuildSub(builder, v, c128, "");
    241 
    242    /*
    243     * r = 298 * _y            + 409 * _v + 128;
    244     * g = 298 * _y - 100 * _u - 208 * _v + 128;
    245     * b = 298 * _y + 516 * _u            + 128;
    246     */
    247 
    248    y = LLVMBuildMul(builder, y, cy, "");
    249    y = LLVMBuildAdd(builder, y, c128, "");
    250 
    251    *r = LLVMBuildMul(builder, v, cvr, "");
    252    *g = LLVMBuildAdd(builder,
    253                      LLVMBuildMul(builder, u, cug, ""),
    254                      LLVMBuildMul(builder, v, cvg, ""),
    255                      "");
    256    *b = LLVMBuildMul(builder, u, cub, "");
    257 
    258    *r = LLVMBuildAdd(builder, *r, y, "");
    259    *g = LLVMBuildAdd(builder, *g, y, "");
    260    *b = LLVMBuildAdd(builder, *b, y, "");
    261 
    262    /*
    263     * r >>= 8;
    264     * g >>= 8;
    265     * b >>= 8;
    266     */
    267 
    268    *r = LLVMBuildAShr(builder, *r, c8, "r");
    269    *g = LLVMBuildAShr(builder, *g, c8, "g");
    270    *b = LLVMBuildAShr(builder, *b, c8, "b");
    271 
    272    /*
    273     * Clamp
    274     */
    275 
    276    *r = lp_build_clamp(&bld, *r, c0, c255);
    277    *g = lp_build_clamp(&bld, *g, c0, c255);
    278    *b = lp_build_clamp(&bld, *b, c0, c255);
    279 }
    280 
    281 
    282 static LLVMValueRef
    283 rgb_to_rgba_aos(struct gallivm_state *gallivm,
    284                 unsigned n,
    285                 LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
    286 {
    287    LLVMBuilderRef builder = gallivm->builder;
    288    struct lp_type type;
    289    LLVMValueRef a;
    290    LLVMValueRef rgba;
    291 
    292    memset(&type, 0, sizeof type);
    293    type.sign = TRUE;
    294    type.width = 32;
    295    type.length = n;
    296 
    297    assert(lp_check_value(type, r));
    298    assert(lp_check_value(type, g));
    299    assert(lp_check_value(type, b));
    300 
    301    /*
    302     * Make a 4 x unorm8 vector
    303     */
    304 
    305    r = r;
    306    g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
    307    b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
    308    a = lp_build_const_int_vec(gallivm, type, 0xff000000);
    309 
    310    rgba = r;
    311    rgba = LLVMBuildOr(builder, rgba, g, "");
    312    rgba = LLVMBuildOr(builder, rgba, b, "");
    313    rgba = LLVMBuildOr(builder, rgba, a, "");
    314 
    315    rgba = LLVMBuildBitCast(builder, rgba,
    316                            LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");
    317 
    318    return rgba;
    319 }
    320 
    321 
    322 /**
    323  * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
    324  */
    325 static LLVMValueRef
    326 uyvy_to_rgba_aos(struct gallivm_state *gallivm,
    327                  unsigned n,
    328                  LLVMValueRef packed,
    329                  LLVMValueRef i)
    330 {
    331    LLVMValueRef y, u, v;
    332    LLVMValueRef r, g, b;
    333    LLVMValueRef rgba;
    334 
    335    uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
    336    yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
    337    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
    338 
    339    return rgba;
    340 }
    341 
    342 
    343 /**
    344  * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
    345  */
    346 static LLVMValueRef
    347 yuyv_to_rgba_aos(struct gallivm_state *gallivm,
    348                  unsigned n,
    349                  LLVMValueRef packed,
    350                  LLVMValueRef i)
    351 {
    352    LLVMValueRef y, u, v;
    353    LLVMValueRef r, g, b;
    354    LLVMValueRef rgba;
    355 
    356    yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
    357    yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
    358    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
    359 
    360    return rgba;
    361 }
    362 
    363 
    364 /**
    365  * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
    366  */
    367 static LLVMValueRef
    368 rgbg_to_rgba_aos(struct gallivm_state *gallivm,
    369                  unsigned n,
    370                  LLVMValueRef packed,
    371                  LLVMValueRef i)
    372 {
    373    LLVMValueRef r, g, b;
    374    LLVMValueRef rgba;
    375 
    376    uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
    377    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
    378 
    379    return rgba;
    380 }
    381 
    382 
    383 /**
    384  * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
    385  */
    386 static LLVMValueRef
    387 grgb_to_rgba_aos(struct gallivm_state *gallivm,
    388                  unsigned n,
    389                  LLVMValueRef packed,
    390                  LLVMValueRef i)
    391 {
    392    LLVMValueRef r, g, b;
    393    LLVMValueRef rgba;
    394 
    395    yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
    396    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
    397 
    398    return rgba;
    399 }
    400 
    401 /**
    402  * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS
    403  */
    404 static LLVMValueRef
    405 grbr_to_rgba_aos(struct gallivm_state *gallivm,
    406                  unsigned n,
    407                  LLVMValueRef packed,
    408                  LLVMValueRef i)
    409 {
    410    LLVMValueRef r, g, b;
    411    LLVMValueRef rgba;
    412 
    413    uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
    414    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
    415 
    416    return rgba;
    417 }
    418 
    419 
    420 /**
    421  * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS
    422  */
    423 static LLVMValueRef
    424 rgrb_to_rgba_aos(struct gallivm_state *gallivm,
    425                  unsigned n,
    426                  LLVMValueRef packed,
    427                  LLVMValueRef i)
    428 {
    429    LLVMValueRef r, g, b;
    430    LLVMValueRef rgba;
    431 
    432    yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
    433    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
    434 
    435    return rgba;
    436 }
    437 
    438 /**
    439  * @param n  is the number of pixels processed
    440  * @param packed  is a <n x i32> vector with the packed YUYV blocks
    441  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
    442  * @return  a <4*n x i8> vector with the pixel RGBA values in AoS
    443  */
    444 LLVMValueRef
    445 lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
    446                                    const struct util_format_description *format_desc,
    447                                    unsigned n,
    448                                    LLVMValueRef base_ptr,
    449                                    LLVMValueRef offset,
    450                                    LLVMValueRef i,
    451                                    LLVMValueRef j)
    452 {
    453    LLVMValueRef packed;
    454    LLVMValueRef rgba;
    455 
    456    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
    457    assert(format_desc->block.bits == 32);
    458    assert(format_desc->block.width == 2);
    459    assert(format_desc->block.height == 1);
    460 
    461    packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset);
    462 
    463    (void)j;
    464 
    465    switch (format_desc->format) {
    466    case PIPE_FORMAT_UYVY:
    467       rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
    468       break;
    469    case PIPE_FORMAT_YUYV:
    470       rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
    471       break;
    472    case PIPE_FORMAT_R8G8_B8G8_UNORM:
    473       rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
    474       break;
    475    case PIPE_FORMAT_G8R8_G8B8_UNORM:
    476       rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
    477       break;
    478    case PIPE_FORMAT_G8R8_B8R8_UNORM:
    479       rgba = grbr_to_rgba_aos(gallivm, n, packed, i);
    480       break;
    481    case PIPE_FORMAT_R8G8_R8B8_UNORM:
    482       rgba = rgrb_to_rgba_aos(gallivm, n, packed, i);
    483       break;
    484    default:
    485       assert(0);
    486       rgba =  LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));
    487       break;
    488    }
    489 
    490    return rgba;
    491 }
    492 
    493