Home | History | Annotate | Download | only in vl
      1 /**************************************************************************
      2  *
      3  * Copyright 2016 Nayan Deshmukh.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 #include <stdio.h>
     29 
     30 #include "pipe/p_context.h"
     31 
     32 #include "tgsi/tgsi_ureg.h"
     33 
     34 #include "util/u_draw.h"
     35 #include "util/u_memory.h"
     36 #include "util/u_math.h"
     37 #include "util/u_rect.h"
     38 
     39 #include "vl_types.h"
     40 #include "vl_vertex_buffers.h"
     41 #include "vl_bicubic_filter.h"
     42 
     43 enum VS_OUTPUT
     44 {
     45    VS_O_VPOS = 0,
     46    VS_O_VTEX = 0
     47 };
     48 
     49 static void *
     50 create_vert_shader(struct vl_bicubic_filter *filter)
     51 {
     52    struct ureg_program *shader;
     53    struct ureg_src i_vpos;
     54    struct ureg_dst o_vpos, o_vtex;
     55 
     56    shader = ureg_create(PIPE_SHADER_VERTEX);
     57    if (!shader)
     58       return NULL;
     59 
     60    i_vpos = ureg_DECL_vs_input(shader, 0);
     61    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
     62    o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX);
     63 
     64    ureg_MOV(shader, o_vpos, i_vpos);
     65    ureg_MOV(shader, o_vtex, i_vpos);
     66 
     67    ureg_END(shader);
     68 
     69    return ureg_create_shader_and_destroy(shader, filter->pipe);
     70 }
     71 
     72 static void
     73 create_frag_shader_cubic_interpolater(struct ureg_program *shader, struct ureg_src tex_a,
     74                                       struct ureg_src tex_b, struct ureg_src tex_c,
     75                                       struct ureg_src tex_d, struct ureg_src t,
     76                                       struct ureg_dst o_fragment)
     77 {
     78    struct ureg_dst temp[11];
     79    struct ureg_dst t_2;
     80    unsigned i;
     81 
     82    for(i = 0; i < 11; ++i)
     83        temp[i] = ureg_DECL_temporary(shader);
     84    t_2 = ureg_DECL_temporary(shader);
     85 
     86    /*
     87     * |temp[0]|   |  0  2  0  0 |  |tex_a|
     88     * |temp[1]| = | -1  0  1  0 |* |tex_b|
     89     * |temp[2]|   |  2 -5  4 -1 |  |tex_c|
     90     * |temp[3]|   | -1  3 -3  1 |  |tex_d|
     91     */
     92    ureg_MUL(shader, temp[0], tex_b, ureg_imm1f(shader, 2.0f));
     93 
     94    ureg_MUL(shader, temp[1], tex_a, ureg_imm1f(shader, -1.0f));
     95    ureg_MAD(shader, temp[1], tex_c, ureg_imm1f(shader, 1.0f),
     96             ureg_src(temp[1]));
     97 
     98    ureg_MUL(shader, temp[2], tex_a, ureg_imm1f(shader, 2.0f));
     99    ureg_MAD(shader, temp[2], tex_b, ureg_imm1f(shader, -5.0f),
    100             ureg_src(temp[2]));
    101    ureg_MAD(shader, temp[2], tex_c, ureg_imm1f(shader, 4.0f),
    102             ureg_src(temp[2]));
    103    ureg_MAD(shader, temp[2], tex_d, ureg_imm1f(shader, -1.0f),
    104              ureg_src(temp[2]));
    105 
    106    ureg_MUL(shader, temp[3], tex_a, ureg_imm1f(shader, -1.0f));
    107    ureg_MAD(shader, temp[3], tex_b, ureg_imm1f(shader, 3.0f),
    108             ureg_src(temp[3]));
    109    ureg_MAD(shader, temp[3], tex_c, ureg_imm1f(shader, -3.0f),
    110             ureg_src(temp[3]));
    111    ureg_MAD(shader, temp[3], tex_d, ureg_imm1f(shader, 1.0f),
    112             ureg_src(temp[3]));
    113 
    114    /*
    115     * t_2 = t*t
    116     * o_fragment = 0.5*|1  t  t^2  t^3|*|temp[0]|
    117     *                                   |temp[1]|
    118     *                                   |temp[2]|
    119     *                                   |temp[3]|
    120     */
    121 
    122    ureg_MUL(shader, t_2, t, t);
    123    ureg_MUL(shader, temp[4], ureg_src(t_2), t);
    124 
    125    ureg_MUL(shader, temp[4], ureg_src(temp[4]),
    126             ureg_src(temp[3]));
    127    ureg_MUL(shader, temp[5], ureg_src(t_2),
    128             ureg_src(temp[2]));
    129    ureg_MUL(shader, temp[6], t,
    130             ureg_src(temp[1]));
    131    ureg_MUL(shader, temp[7], ureg_imm1f(shader, 1.0f),
    132             ureg_src(temp[0]));
    133    ureg_ADD(shader, temp[8], ureg_src(temp[4]),
    134             ureg_src(temp[5]));
    135    ureg_ADD(shader, temp[9], ureg_src(temp[6]),
    136             ureg_src(temp[7]));
    137 
    138    ureg_ADD(shader, temp[10], ureg_src(temp[8]),
    139             ureg_src(temp[9]));
    140    ureg_MUL(shader, o_fragment, ureg_src(temp[10]),
    141             ureg_imm1f(shader, 0.5f));
    142 
    143 
    144    for(i = 0; i < 11; ++i)
    145        ureg_release_temporary(shader, temp[i]);
    146    ureg_release_temporary(shader, t_2);
    147 }
    148 
    149 static void *
    150 create_frag_shader(struct vl_bicubic_filter *filter, unsigned video_width,
    151                    unsigned video_height, struct vertex2f *offsets)
    152 {
    153    struct pipe_screen *screen = filter->pipe->screen;
    154    struct ureg_program *shader;
    155    struct ureg_src i_vtex, vtex;
    156    struct ureg_src sampler;
    157    struct ureg_src half_pixel;
    158    struct ureg_dst t_array[23];
    159    struct ureg_dst o_fragment;
    160    struct ureg_dst t;
    161    unsigned i;
    162 
    163    if (screen->get_shader_param(
    164       screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_TEMPS) < 23) {
    165 
    166       return NULL;
    167    }
    168 
    169    shader = ureg_create(PIPE_SHADER_FRAGMENT);
    170    if (!shader) {
    171       return NULL;
    172    }
    173 
    174    i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
    175    sampler = ureg_DECL_sampler(shader, 0);
    176 
    177    for (i = 0; i < 23; ++i)
    178       t_array[i] = ureg_DECL_temporary(shader);
    179    t = ureg_DECL_temporary(shader);
    180 
    181    half_pixel = ureg_DECL_constant(shader, 0);
    182    o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
    183 
    184    /*
    185     * temp = (i_vtex - (0.5/dst_size)) * i_size)
    186     * t = frac(temp)
    187     * vtex = floor(i_vtex)/i_size
    188     */
    189    ureg_ADD(shader, ureg_writemask(t_array[21], TGSI_WRITEMASK_XY),
    190             i_vtex, ureg_negate(half_pixel));
    191    ureg_MUL(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
    192             ureg_src(t_array[21]), ureg_imm2f(shader, video_width, video_height));
    193    ureg_FRC(shader, ureg_writemask(t, TGSI_WRITEMASK_XY),
    194             ureg_src(t_array[22]));
    195 
    196    ureg_FLR(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
    197             ureg_src(t_array[22]));
    198    ureg_DIV(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
    199             ureg_src(t_array[22]), ureg_imm2f(shader, video_width, video_height));
    200    ureg_ADD(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
    201             ureg_src(t_array[22]), half_pixel);
    202 
    203    /*
    204     * t_array[0..*] = vtex + offset[0..*]
    205     * t_array[0..*] = tex(t_array[0..*], sampler)
    206     * t_array[16+i] = cubic_interpolate(t_array[4*i..4*i+3], t_x)
    207     * o_fragment = cubic_interpolate(t_array[16..19], t_y)
    208     */
    209    vtex = ureg_src(t_array[22]);
    210    for (i = 0; i < 16; ++i) {
    211         ureg_ADD(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_XY),
    212                   vtex, ureg_imm2f(shader, offsets[i].x, offsets[i].y));
    213         ureg_MOV(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_ZW),
    214                   ureg_imm1f(shader, 0.0f));
    215    }
    216 
    217    for (i = 0; i < 16; ++i) {
    218       ureg_TEX(shader, t_array[i], TGSI_TEXTURE_2D, ureg_src(t_array[i]), sampler);
    219    }
    220 
    221    for(i = 0; i < 4; ++i)
    222       create_frag_shader_cubic_interpolater(shader, ureg_src(t_array[4*i]),
    223               ureg_src(t_array[4*i+1]), ureg_src(t_array[4*i+2]), ureg_src(t_array[4*i+3]),
    224               ureg_scalar(ureg_src(t), TGSI_SWIZZLE_X), t_array[16+i]);
    225 
    226    create_frag_shader_cubic_interpolater(shader, ureg_src(t_array[16]),
    227             ureg_src(t_array[17]), ureg_src(t_array[18]), ureg_src(t_array[19]),
    228             ureg_scalar(ureg_src(t), TGSI_SWIZZLE_Y), o_fragment);
    229 
    230    for(i = 0; i < 23; ++i)
    231        ureg_release_temporary(shader, t_array[i]);
    232    ureg_release_temporary(shader, t);
    233 
    234    ureg_END(shader);
    235 
    236    return ureg_create_shader_and_destroy(shader, filter->pipe);
    237 }
    238 
    239 bool
    240 vl_bicubic_filter_init(struct vl_bicubic_filter *filter, struct pipe_context *pipe,
    241                       unsigned width, unsigned height)
    242 {
    243    struct pipe_rasterizer_state rs_state;
    244    struct pipe_blend_state blend;
    245    struct vertex2f offsets[16];
    246    struct pipe_sampler_state sampler;
    247    struct pipe_vertex_element ve;
    248    unsigned i;
    249 
    250    assert(filter && pipe);
    251    assert(width && height);
    252 
    253    memset(filter, 0, sizeof(*filter));
    254    filter->pipe = pipe;
    255 
    256    memset(&rs_state, 0, sizeof(rs_state));
    257    rs_state.half_pixel_center = true;
    258    rs_state.bottom_edge_rule = true;
    259    rs_state.depth_clip = 1;
    260    filter->rs_state = pipe->create_rasterizer_state(pipe, &rs_state);
    261    if (!filter->rs_state)
    262       goto error_rs_state;
    263 
    264    memset(&blend, 0, sizeof blend);
    265    blend.rt[0].rgb_func = PIPE_BLEND_ADD;
    266    blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
    267    blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
    268    blend.rt[0].alpha_func = PIPE_BLEND_ADD;
    269    blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
    270    blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
    271    blend.logicop_func = PIPE_LOGICOP_CLEAR;
    272    blend.rt[0].colormask = PIPE_MASK_RGBA;
    273    filter->blend = pipe->create_blend_state(pipe, &blend);
    274    if (!filter->blend)
    275       goto error_blend;
    276 
    277    memset(&sampler, 0, sizeof(sampler));
    278    sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
    279    sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
    280    sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
    281    sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
    282    sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
    283    sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
    284    sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
    285    sampler.compare_func = PIPE_FUNC_ALWAYS;
    286    sampler.normalized_coords = 1;
    287    filter->sampler = pipe->create_sampler_state(pipe, &sampler);
    288    if (!filter->sampler)
    289       goto error_sampler;
    290 
    291    filter->quad = vl_vb_upload_quads(pipe);
    292    if(!filter->quad.buffer)
    293       goto error_quad;
    294 
    295    memset(&ve, 0, sizeof(ve));
    296    ve.src_offset = 0;
    297    ve.instance_divisor = 0;
    298    ve.vertex_buffer_index = 0;
    299    ve.src_format = PIPE_FORMAT_R32G32_FLOAT;
    300    filter->ves = pipe->create_vertex_elements_state(pipe, 1, &ve);
    301    if (!filter->ves)
    302       goto error_ves;
    303 
    304    offsets[0].x = -1.0f; offsets[0].y = -1.0f;
    305    offsets[1].x = 0.0f; offsets[1].y = -1.0f;
    306    offsets[2].x = 1.0f; offsets[2].y = -1.0f;
    307    offsets[3].x = 2.0f; offsets[3].y = -1.0f;
    308 
    309    offsets[4].x = -1.0f; offsets[4].y = 0.0f;
    310    offsets[5].x = 0.0f; offsets[5].y = 0.0f;
    311    offsets[6].x = 1.0f; offsets[6].y = 0.0f;
    312    offsets[7].x = 2.0f; offsets[7].y = 0.0f;
    313 
    314    offsets[8].x = -1.0f; offsets[8].y = 1.0f;
    315    offsets[9].x = 0.0f; offsets[9].y = 1.0f;
    316    offsets[10].x = 1.0f; offsets[10].y = 1.0f;
    317    offsets[11].x = 2.0f; offsets[11].y = 1.0f;
    318 
    319    offsets[12].x = -1.0f; offsets[12].y = 2.0f;
    320    offsets[13].x = 0.0f; offsets[13].y = 2.0f;
    321    offsets[14].x = 1.0f; offsets[14].y = 2.0f;
    322    offsets[15].x = 2.0f; offsets[15].y = 2.0f;
    323 
    324    for (i = 0; i < 16; ++i) {
    325       offsets[i].x /= width;
    326       offsets[i].y /= height;
    327    }
    328 
    329    filter->vs = create_vert_shader(filter);
    330    if (!filter->vs)
    331       goto error_vs;
    332 
    333    filter->fs = create_frag_shader(filter, width, height, offsets);
    334    if (!filter->fs)
    335       goto error_fs;
    336 
    337    return true;
    338 
    339 error_fs:
    340    pipe->delete_vs_state(pipe, filter->vs);
    341 
    342 error_vs:
    343    pipe->delete_vertex_elements_state(pipe, filter->ves);
    344 
    345 error_ves:
    346    pipe_resource_reference(&filter->quad.buffer, NULL);
    347 
    348 error_quad:
    349    pipe->delete_sampler_state(pipe, filter->sampler);
    350 
    351 error_sampler:
    352    pipe->delete_blend_state(pipe, filter->blend);
    353 
    354 error_blend:
    355    pipe->delete_rasterizer_state(pipe, filter->rs_state);
    356 
    357 error_rs_state:
    358    return false;
    359 }
    360 
    361 void
    362 vl_bicubic_filter_cleanup(struct vl_bicubic_filter *filter)
    363 {
    364    assert(filter);
    365 
    366    filter->pipe->delete_sampler_state(filter->pipe, filter->sampler);
    367    filter->pipe->delete_blend_state(filter->pipe, filter->blend);
    368    filter->pipe->delete_rasterizer_state(filter->pipe, filter->rs_state);
    369    filter->pipe->delete_vertex_elements_state(filter->pipe, filter->ves);
    370    pipe_resource_reference(&filter->quad.buffer, NULL);
    371 
    372    filter->pipe->delete_vs_state(filter->pipe, filter->vs);
    373    filter->pipe->delete_fs_state(filter->pipe, filter->fs);
    374 }
    375 
    376 void
    377 vl_bicubic_filter_render(struct vl_bicubic_filter *filter,
    378                         struct pipe_sampler_view *src,
    379                         struct pipe_surface *dst,
    380                         struct u_rect *dst_area,
    381                         struct u_rect *dst_clip)
    382 {
    383    struct pipe_viewport_state viewport;
    384    struct pipe_framebuffer_state fb_state;
    385    struct pipe_scissor_state scissor;
    386    union pipe_color_union clear_color;
    387    struct pipe_transfer *buf_transfer;
    388    struct pipe_resource *surface_size;
    389    assert(filter && src && dst);
    390 
    391    if (dst_clip) {
    392       scissor.minx = dst_clip->x0;
    393       scissor.miny = dst_clip->y0;
    394       scissor.maxx = dst_clip->x1;
    395       scissor.maxy = dst_clip->y1;
    396    } else {
    397       scissor.minx = 0;
    398       scissor.miny = 0;
    399       scissor.maxx = dst->width;
    400       scissor.maxy = dst->height;
    401    }
    402 
    403    clear_color.f[0] = clear_color.f[1] = 0.0f;
    404    clear_color.f[2] = clear_color.f[3] = 0.0f;
    405    surface_size = pipe_buffer_create
    406    (
    407       filter->pipe->screen,
    408       PIPE_BIND_CONSTANT_BUFFER,
    409       PIPE_USAGE_DEFAULT,
    410       2*sizeof(float)
    411    );
    412 
    413 
    414    memset(&viewport, 0, sizeof(viewport));
    415    if(dst_area){
    416       viewport.scale[0] = dst_area->x1 - dst_area->x0;
    417       viewport.scale[1] = dst_area->y1 - dst_area->y0;
    418       viewport.translate[0] = dst_area->x0;
    419       viewport.translate[1] = dst_area->y0;
    420    } else {
    421       viewport.scale[0] = dst->width;
    422       viewport.scale[1] = dst->height;
    423    }
    424    viewport.scale[2] = 1;
    425 
    426    float *ptr = pipe_buffer_map(filter->pipe, surface_size,
    427                                PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE,
    428                                &buf_transfer);
    429 
    430    ptr[0] = 0.5f/viewport.scale[0];
    431    ptr[1] = 0.5f/viewport.scale[1];
    432 
    433    pipe_buffer_unmap(filter->pipe, buf_transfer);
    434 
    435    memset(&fb_state, 0, sizeof(fb_state));
    436    fb_state.width = dst->width;
    437    fb_state.height = dst->height;
    438    fb_state.nr_cbufs = 1;
    439    fb_state.cbufs[0] = dst;
    440 
    441    filter->pipe->set_scissor_states(filter->pipe, 0, 1, &scissor);
    442    filter->pipe->clear_render_target(filter->pipe, dst, &clear_color,
    443                                      0, 0, dst->width, dst->height, false);
    444    pipe_set_constant_buffer(filter->pipe, PIPE_SHADER_FRAGMENT, 0, surface_size);
    445    filter->pipe->bind_rasterizer_state(filter->pipe, filter->rs_state);
    446    filter->pipe->bind_blend_state(filter->pipe, filter->blend);
    447    filter->pipe->bind_sampler_states(filter->pipe, PIPE_SHADER_FRAGMENT,
    448                                      0, 1, &filter->sampler);
    449    filter->pipe->set_sampler_views(filter->pipe, PIPE_SHADER_FRAGMENT,
    450                                    0, 1, &src);
    451    filter->pipe->bind_vs_state(filter->pipe, filter->vs);
    452    filter->pipe->bind_fs_state(filter->pipe, filter->fs);
    453    filter->pipe->set_framebuffer_state(filter->pipe, &fb_state);
    454    filter->pipe->set_viewport_states(filter->pipe, 0, 1, &viewport);
    455    filter->pipe->set_vertex_buffers(filter->pipe, 0, 1, &filter->quad);
    456    filter->pipe->bind_vertex_elements_state(filter->pipe, filter->ves);
    457 
    458    util_draw_arrays(filter->pipe, PIPE_PRIM_QUADS, 0, 4);
    459 }
    460