1 /************************************************************************** 2 * 3 * Copyright 2016 Nayan Deshmukh. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 #include <stdio.h> 29 30 #include "pipe/p_context.h" 31 32 #include "tgsi/tgsi_ureg.h" 33 34 #include "util/u_draw.h" 35 #include "util/u_memory.h" 36 #include "util/u_math.h" 37 #include "util/u_rect.h" 38 39 #include "vl_types.h" 40 #include "vl_vertex_buffers.h" 41 #include "vl_bicubic_filter.h" 42 43 enum VS_OUTPUT 44 { 45 VS_O_VPOS = 0, 46 VS_O_VTEX = 0 47 }; 48 49 static void * 50 create_vert_shader(struct vl_bicubic_filter *filter) 51 { 52 struct ureg_program *shader; 53 struct ureg_src i_vpos; 54 struct ureg_dst o_vpos, o_vtex; 55 56 shader = ureg_create(PIPE_SHADER_VERTEX); 57 if (!shader) 58 return NULL; 59 60 i_vpos = ureg_DECL_vs_input(shader, 0); 61 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); 62 o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX); 63 64 ureg_MOV(shader, o_vpos, i_vpos); 65 ureg_MOV(shader, o_vtex, i_vpos); 66 67 ureg_END(shader); 68 69 return ureg_create_shader_and_destroy(shader, filter->pipe); 70 } 71 72 static void 73 create_frag_shader_cubic_interpolater(struct ureg_program *shader, struct ureg_src tex_a, 74 struct ureg_src tex_b, struct ureg_src tex_c, 75 struct ureg_src tex_d, struct ureg_src t, 76 struct ureg_dst o_fragment) 77 { 78 struct ureg_dst temp[11]; 79 struct ureg_dst t_2; 80 unsigned i; 81 82 for(i = 0; i < 11; ++i) 83 temp[i] = ureg_DECL_temporary(shader); 84 t_2 = ureg_DECL_temporary(shader); 85 86 /* 87 * |temp[0]| | 0 2 0 0 | |tex_a| 88 * |temp[1]| = | -1 0 1 0 |* |tex_b| 89 * |temp[2]| | 2 -5 4 -1 | |tex_c| 90 * |temp[3]| | -1 3 -3 1 | |tex_d| 91 */ 92 ureg_MUL(shader, temp[0], tex_b, ureg_imm1f(shader, 2.0f)); 93 94 ureg_MUL(shader, temp[1], tex_a, ureg_imm1f(shader, -1.0f)); 95 ureg_MAD(shader, temp[1], tex_c, ureg_imm1f(shader, 1.0f), 96 ureg_src(temp[1])); 97 98 ureg_MUL(shader, temp[2], tex_a, ureg_imm1f(shader, 2.0f)); 99 ureg_MAD(shader, temp[2], tex_b, ureg_imm1f(shader, -5.0f), 100 ureg_src(temp[2])); 101 ureg_MAD(shader, temp[2], tex_c, ureg_imm1f(shader, 4.0f), 102 ureg_src(temp[2])); 103 ureg_MAD(shader, temp[2], tex_d, ureg_imm1f(shader, -1.0f), 104 ureg_src(temp[2])); 105 106 ureg_MUL(shader, temp[3], tex_a, ureg_imm1f(shader, -1.0f)); 107 ureg_MAD(shader, temp[3], tex_b, ureg_imm1f(shader, 3.0f), 108 ureg_src(temp[3])); 109 ureg_MAD(shader, temp[3], tex_c, ureg_imm1f(shader, -3.0f), 110 ureg_src(temp[3])); 111 ureg_MAD(shader, temp[3], tex_d, ureg_imm1f(shader, 1.0f), 112 ureg_src(temp[3])); 113 114 /* 115 * t_2 = t*t 116 * o_fragment = 0.5*|1 t t^2 t^3|*|temp[0]| 117 * |temp[1]| 118 * |temp[2]| 119 * |temp[3]| 120 */ 121 122 ureg_MUL(shader, t_2, t, t); 123 ureg_MUL(shader, temp[4], ureg_src(t_2), t); 124 125 ureg_MUL(shader, temp[4], ureg_src(temp[4]), 126 ureg_src(temp[3])); 127 ureg_MUL(shader, temp[5], ureg_src(t_2), 128 ureg_src(temp[2])); 129 ureg_MUL(shader, temp[6], t, 130 ureg_src(temp[1])); 131 ureg_MUL(shader, temp[7], ureg_imm1f(shader, 1.0f), 132 ureg_src(temp[0])); 133 ureg_ADD(shader, temp[8], ureg_src(temp[4]), 134 ureg_src(temp[5])); 135 ureg_ADD(shader, temp[9], ureg_src(temp[6]), 136 ureg_src(temp[7])); 137 138 ureg_ADD(shader, temp[10], ureg_src(temp[8]), 139 ureg_src(temp[9])); 140 ureg_MUL(shader, o_fragment, ureg_src(temp[10]), 141 ureg_imm1f(shader, 0.5f)); 142 143 144 for(i = 0; i < 11; ++i) 145 ureg_release_temporary(shader, temp[i]); 146 ureg_release_temporary(shader, t_2); 147 } 148 149 static void * 150 create_frag_shader(struct vl_bicubic_filter *filter, unsigned video_width, 151 unsigned video_height, struct vertex2f *offsets) 152 { 153 struct pipe_screen *screen = filter->pipe->screen; 154 struct ureg_program *shader; 155 struct ureg_src i_vtex, vtex; 156 struct ureg_src sampler; 157 struct ureg_src half_pixel; 158 struct ureg_dst t_array[23]; 159 struct ureg_dst o_fragment; 160 struct ureg_dst t; 161 unsigned i; 162 163 if (screen->get_shader_param( 164 screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_TEMPS) < 23) { 165 166 return NULL; 167 } 168 169 shader = ureg_create(PIPE_SHADER_FRAGMENT); 170 if (!shader) { 171 return NULL; 172 } 173 174 i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR); 175 sampler = ureg_DECL_sampler(shader, 0); 176 177 for (i = 0; i < 23; ++i) 178 t_array[i] = ureg_DECL_temporary(shader); 179 t = ureg_DECL_temporary(shader); 180 181 half_pixel = ureg_DECL_constant(shader, 0); 182 o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); 183 184 /* 185 * temp = (i_vtex - (0.5/dst_size)) * i_size) 186 * t = frac(temp) 187 * vtex = floor(i_vtex)/i_size 188 */ 189 ureg_ADD(shader, ureg_writemask(t_array[21], TGSI_WRITEMASK_XY), 190 i_vtex, ureg_negate(half_pixel)); 191 ureg_MUL(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY), 192 ureg_src(t_array[21]), ureg_imm2f(shader, video_width, video_height)); 193 ureg_FRC(shader, ureg_writemask(t, TGSI_WRITEMASK_XY), 194 ureg_src(t_array[22])); 195 196 ureg_FLR(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY), 197 ureg_src(t_array[22])); 198 ureg_DIV(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY), 199 ureg_src(t_array[22]), ureg_imm2f(shader, video_width, video_height)); 200 ureg_ADD(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY), 201 ureg_src(t_array[22]), half_pixel); 202 203 /* 204 * t_array[0..*] = vtex + offset[0..*] 205 * t_array[0..*] = tex(t_array[0..*], sampler) 206 * t_array[16+i] = cubic_interpolate(t_array[4*i..4*i+3], t_x) 207 * o_fragment = cubic_interpolate(t_array[16..19], t_y) 208 */ 209 vtex = ureg_src(t_array[22]); 210 for (i = 0; i < 16; ++i) { 211 ureg_ADD(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_XY), 212 vtex, ureg_imm2f(shader, offsets[i].x, offsets[i].y)); 213 ureg_MOV(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_ZW), 214 ureg_imm1f(shader, 0.0f)); 215 } 216 217 for (i = 0; i < 16; ++i) { 218 ureg_TEX(shader, t_array[i], TGSI_TEXTURE_2D, ureg_src(t_array[i]), sampler); 219 } 220 221 for(i = 0; i < 4; ++i) 222 create_frag_shader_cubic_interpolater(shader, ureg_src(t_array[4*i]), 223 ureg_src(t_array[4*i+1]), ureg_src(t_array[4*i+2]), ureg_src(t_array[4*i+3]), 224 ureg_scalar(ureg_src(t), TGSI_SWIZZLE_X), t_array[16+i]); 225 226 create_frag_shader_cubic_interpolater(shader, ureg_src(t_array[16]), 227 ureg_src(t_array[17]), ureg_src(t_array[18]), ureg_src(t_array[19]), 228 ureg_scalar(ureg_src(t), TGSI_SWIZZLE_Y), o_fragment); 229 230 for(i = 0; i < 23; ++i) 231 ureg_release_temporary(shader, t_array[i]); 232 ureg_release_temporary(shader, t); 233 234 ureg_END(shader); 235 236 return ureg_create_shader_and_destroy(shader, filter->pipe); 237 } 238 239 bool 240 vl_bicubic_filter_init(struct vl_bicubic_filter *filter, struct pipe_context *pipe, 241 unsigned width, unsigned height) 242 { 243 struct pipe_rasterizer_state rs_state; 244 struct pipe_blend_state blend; 245 struct vertex2f offsets[16]; 246 struct pipe_sampler_state sampler; 247 struct pipe_vertex_element ve; 248 unsigned i; 249 250 assert(filter && pipe); 251 assert(width && height); 252 253 memset(filter, 0, sizeof(*filter)); 254 filter->pipe = pipe; 255 256 memset(&rs_state, 0, sizeof(rs_state)); 257 rs_state.half_pixel_center = true; 258 rs_state.bottom_edge_rule = true; 259 rs_state.depth_clip = 1; 260 filter->rs_state = pipe->create_rasterizer_state(pipe, &rs_state); 261 if (!filter->rs_state) 262 goto error_rs_state; 263 264 memset(&blend, 0, sizeof blend); 265 blend.rt[0].rgb_func = PIPE_BLEND_ADD; 266 blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; 267 blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; 268 blend.rt[0].alpha_func = PIPE_BLEND_ADD; 269 blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; 270 blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; 271 blend.logicop_func = PIPE_LOGICOP_CLEAR; 272 blend.rt[0].colormask = PIPE_MASK_RGBA; 273 filter->blend = pipe->create_blend_state(pipe, &blend); 274 if (!filter->blend) 275 goto error_blend; 276 277 memset(&sampler, 0, sizeof(sampler)); 278 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 279 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 280 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 281 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; 282 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 283 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; 284 sampler.compare_mode = PIPE_TEX_COMPARE_NONE; 285 sampler.compare_func = PIPE_FUNC_ALWAYS; 286 sampler.normalized_coords = 1; 287 filter->sampler = pipe->create_sampler_state(pipe, &sampler); 288 if (!filter->sampler) 289 goto error_sampler; 290 291 filter->quad = vl_vb_upload_quads(pipe); 292 if(!filter->quad.buffer) 293 goto error_quad; 294 295 memset(&ve, 0, sizeof(ve)); 296 ve.src_offset = 0; 297 ve.instance_divisor = 0; 298 ve.vertex_buffer_index = 0; 299 ve.src_format = PIPE_FORMAT_R32G32_FLOAT; 300 filter->ves = pipe->create_vertex_elements_state(pipe, 1, &ve); 301 if (!filter->ves) 302 goto error_ves; 303 304 offsets[0].x = -1.0f; offsets[0].y = -1.0f; 305 offsets[1].x = 0.0f; offsets[1].y = -1.0f; 306 offsets[2].x = 1.0f; offsets[2].y = -1.0f; 307 offsets[3].x = 2.0f; offsets[3].y = -1.0f; 308 309 offsets[4].x = -1.0f; offsets[4].y = 0.0f; 310 offsets[5].x = 0.0f; offsets[5].y = 0.0f; 311 offsets[6].x = 1.0f; offsets[6].y = 0.0f; 312 offsets[7].x = 2.0f; offsets[7].y = 0.0f; 313 314 offsets[8].x = -1.0f; offsets[8].y = 1.0f; 315 offsets[9].x = 0.0f; offsets[9].y = 1.0f; 316 offsets[10].x = 1.0f; offsets[10].y = 1.0f; 317 offsets[11].x = 2.0f; offsets[11].y = 1.0f; 318 319 offsets[12].x = -1.0f; offsets[12].y = 2.0f; 320 offsets[13].x = 0.0f; offsets[13].y = 2.0f; 321 offsets[14].x = 1.0f; offsets[14].y = 2.0f; 322 offsets[15].x = 2.0f; offsets[15].y = 2.0f; 323 324 for (i = 0; i < 16; ++i) { 325 offsets[i].x /= width; 326 offsets[i].y /= height; 327 } 328 329 filter->vs = create_vert_shader(filter); 330 if (!filter->vs) 331 goto error_vs; 332 333 filter->fs = create_frag_shader(filter, width, height, offsets); 334 if (!filter->fs) 335 goto error_fs; 336 337 return true; 338 339 error_fs: 340 pipe->delete_vs_state(pipe, filter->vs); 341 342 error_vs: 343 pipe->delete_vertex_elements_state(pipe, filter->ves); 344 345 error_ves: 346 pipe_resource_reference(&filter->quad.buffer, NULL); 347 348 error_quad: 349 pipe->delete_sampler_state(pipe, filter->sampler); 350 351 error_sampler: 352 pipe->delete_blend_state(pipe, filter->blend); 353 354 error_blend: 355 pipe->delete_rasterizer_state(pipe, filter->rs_state); 356 357 error_rs_state: 358 return false; 359 } 360 361 void 362 vl_bicubic_filter_cleanup(struct vl_bicubic_filter *filter) 363 { 364 assert(filter); 365 366 filter->pipe->delete_sampler_state(filter->pipe, filter->sampler); 367 filter->pipe->delete_blend_state(filter->pipe, filter->blend); 368 filter->pipe->delete_rasterizer_state(filter->pipe, filter->rs_state); 369 filter->pipe->delete_vertex_elements_state(filter->pipe, filter->ves); 370 pipe_resource_reference(&filter->quad.buffer, NULL); 371 372 filter->pipe->delete_vs_state(filter->pipe, filter->vs); 373 filter->pipe->delete_fs_state(filter->pipe, filter->fs); 374 } 375 376 void 377 vl_bicubic_filter_render(struct vl_bicubic_filter *filter, 378 struct pipe_sampler_view *src, 379 struct pipe_surface *dst, 380 struct u_rect *dst_area, 381 struct u_rect *dst_clip) 382 { 383 struct pipe_viewport_state viewport; 384 struct pipe_framebuffer_state fb_state; 385 struct pipe_scissor_state scissor; 386 union pipe_color_union clear_color; 387 struct pipe_transfer *buf_transfer; 388 struct pipe_resource *surface_size; 389 assert(filter && src && dst); 390 391 if (dst_clip) { 392 scissor.minx = dst_clip->x0; 393 scissor.miny = dst_clip->y0; 394 scissor.maxx = dst_clip->x1; 395 scissor.maxy = dst_clip->y1; 396 } else { 397 scissor.minx = 0; 398 scissor.miny = 0; 399 scissor.maxx = dst->width; 400 scissor.maxy = dst->height; 401 } 402 403 clear_color.f[0] = clear_color.f[1] = 0.0f; 404 clear_color.f[2] = clear_color.f[3] = 0.0f; 405 surface_size = pipe_buffer_create 406 ( 407 filter->pipe->screen, 408 PIPE_BIND_CONSTANT_BUFFER, 409 PIPE_USAGE_DEFAULT, 410 2*sizeof(float) 411 ); 412 413 414 memset(&viewport, 0, sizeof(viewport)); 415 if(dst_area){ 416 viewport.scale[0] = dst_area->x1 - dst_area->x0; 417 viewport.scale[1] = dst_area->y1 - dst_area->y0; 418 viewport.translate[0] = dst_area->x0; 419 viewport.translate[1] = dst_area->y0; 420 } else { 421 viewport.scale[0] = dst->width; 422 viewport.scale[1] = dst->height; 423 } 424 viewport.scale[2] = 1; 425 426 float *ptr = pipe_buffer_map(filter->pipe, surface_size, 427 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE, 428 &buf_transfer); 429 430 ptr[0] = 0.5f/viewport.scale[0]; 431 ptr[1] = 0.5f/viewport.scale[1]; 432 433 pipe_buffer_unmap(filter->pipe, buf_transfer); 434 435 memset(&fb_state, 0, sizeof(fb_state)); 436 fb_state.width = dst->width; 437 fb_state.height = dst->height; 438 fb_state.nr_cbufs = 1; 439 fb_state.cbufs[0] = dst; 440 441 filter->pipe->set_scissor_states(filter->pipe, 0, 1, &scissor); 442 filter->pipe->clear_render_target(filter->pipe, dst, &clear_color, 443 0, 0, dst->width, dst->height, false); 444 pipe_set_constant_buffer(filter->pipe, PIPE_SHADER_FRAGMENT, 0, surface_size); 445 filter->pipe->bind_rasterizer_state(filter->pipe, filter->rs_state); 446 filter->pipe->bind_blend_state(filter->pipe, filter->blend); 447 filter->pipe->bind_sampler_states(filter->pipe, PIPE_SHADER_FRAGMENT, 448 0, 1, &filter->sampler); 449 filter->pipe->set_sampler_views(filter->pipe, PIPE_SHADER_FRAGMENT, 450 0, 1, &src); 451 filter->pipe->bind_vs_state(filter->pipe, filter->vs); 452 filter->pipe->bind_fs_state(filter->pipe, filter->fs); 453 filter->pipe->set_framebuffer_state(filter->pipe, &fb_state); 454 filter->pipe->set_viewport_states(filter->pipe, 0, 1, &viewport); 455 filter->pipe->set_vertex_buffers(filter->pipe, 0, 1, &filter->quad); 456 filter->pipe->bind_vertex_elements_state(filter->pipe, filter->ves); 457 458 util_draw_arrays(filter->pipe, PIPE_PRIM_QUADS, 0, 4); 459 } 460