1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 13 #include "./vpx_config.h" 14 #include "./vpx_dsp_rtcd.h" 15 16 #include "vpx_ports/mem.h" 17 #include "vpx/vpx_integer.h" 18 19 #include "vpx_dsp/variance.h" 20 21 static const uint8_t bilinear_filters[8][2] = { 22 { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 }, 23 { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 }, 24 }; 25 26 uint32_t vpx_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b, 27 int b_stride) { 28 int distortion = 0; 29 int r, c; 30 31 for (r = 0; r < 4; ++r) { 32 for (c = 0; c < 4; ++c) { 33 int diff = a[c] - b[c]; 34 distortion += diff * diff; 35 } 36 37 a += a_stride; 38 b += b_stride; 39 } 40 41 return distortion; 42 } 43 44 uint32_t vpx_get_mb_ss_c(const int16_t *a) { 45 unsigned int i, sum = 0; 46 47 for (i = 0; i < 256; ++i) { 48 sum += a[i] * a[i]; 49 } 50 51 return sum; 52 } 53 54 static void variance(const uint8_t *a, int a_stride, const uint8_t *b, 55 int b_stride, int w, int h, uint32_t *sse, int *sum) { 56 int i, j; 57 58 *sum = 0; 59 *sse = 0; 60 61 for (i = 0; i < h; ++i) { 62 for (j = 0; j < w; ++j) { 63 const int diff = a[j] - b[j]; 64 *sum += diff; 65 *sse += diff * diff; 66 } 67 68 a += a_stride; 69 b += b_stride; 70 } 71 } 72 73 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal 74 // or vertical direction to produce the filtered output block. Used to implement 75 // the first-pass of 2-D separable filter. 76 // 77 // Produces int16_t output to retain precision for the next pass. Two filter 78 // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is 79 // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride). 80 // It defines the offset required to move from one input to the next. 81 static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b, 82 unsigned int src_pixels_per_line, 83 int pixel_step, 84 unsigned int output_height, 85 unsigned int output_width, 86 const uint8_t *filter) { 87 unsigned int i, j; 88 89 for (i = 0; i < output_height; ++i) { 90 for (j = 0; j < output_width; ++j) { 91 b[j] = ROUND_POWER_OF_TWO( 92 (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS); 93 94 ++a; 95 } 96 97 a += src_pixels_per_line - output_width; 98 b += output_width; 99 } 100 } 101 102 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal 103 // or vertical direction to produce the filtered output block. Used to implement 104 // the second-pass of 2-D separable filter. 105 // 106 // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two 107 // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the 108 // filter is applied horizontally (pixel_step = 1) or vertically 109 // (pixel_step = stride). It defines the offset required to move from one input 110 // to the next. Output is 8-bit. 111 static void var_filter_block2d_bil_second_pass(const uint16_t *a, uint8_t *b, 112 unsigned int src_pixels_per_line, 113 unsigned int pixel_step, 114 unsigned int output_height, 115 unsigned int output_width, 116 const uint8_t *filter) { 117 unsigned int i, j; 118 119 for (i = 0; i < output_height; ++i) { 120 for (j = 0; j < output_width; ++j) { 121 b[j] = ROUND_POWER_OF_TWO( 122 (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS); 123 ++a; 124 } 125 126 a += src_pixels_per_line - output_width; 127 b += output_width; 128 } 129 } 130 131 #define VAR(W, H) \ 132 uint32_t vpx_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ 133 const uint8_t *b, int b_stride, \ 134 uint32_t *sse) { \ 135 int sum; \ 136 variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ 137 return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ 138 } 139 140 #define SUBPIX_VAR(W, H) \ 141 uint32_t vpx_sub_pixel_variance##W##x##H##_c( \ 142 const uint8_t *a, int a_stride, int xoffset, int yoffset, \ 143 const uint8_t *b, int b_stride, uint32_t *sse) { \ 144 uint16_t fdata3[(H + 1) * W]; \ 145 uint8_t temp2[H * W]; \ 146 \ 147 var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \ 148 bilinear_filters[xoffset]); \ 149 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ 150 bilinear_filters[yoffset]); \ 151 \ 152 return vpx_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \ 153 } 154 155 #define SUBPIX_AVG_VAR(W, H) \ 156 uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c( \ 157 const uint8_t *a, int a_stride, int xoffset, int yoffset, \ 158 const uint8_t *b, int b_stride, uint32_t *sse, \ 159 const uint8_t *second_pred) { \ 160 uint16_t fdata3[(H + 1) * W]; \ 161 uint8_t temp2[H * W]; \ 162 DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \ 163 \ 164 var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \ 165 bilinear_filters[xoffset]); \ 166 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ 167 bilinear_filters[yoffset]); \ 168 \ 169 vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \ 170 \ 171 return vpx_variance##W##x##H##_c(temp3, W, b, b_stride, sse); \ 172 } 173 174 /* Identical to the variance call except it takes an additional parameter, sum, 175 * and returns that value using pass-by-reference instead of returning 176 * sse - sum^2 / w*h 177 */ 178 #define GET_VAR(W, H) \ 179 void vpx_get##W##x##H##var_c(const uint8_t *a, int a_stride, \ 180 const uint8_t *b, int b_stride, uint32_t *sse, \ 181 int *sum) { \ 182 variance(a, a_stride, b, b_stride, W, H, sse, sum); \ 183 } 184 185 /* Identical to the variance call except it does not calculate the 186 * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in 187 * variable. 188 */ 189 #define MSE(W, H) \ 190 uint32_t vpx_mse##W##x##H##_c(const uint8_t *a, int a_stride, \ 191 const uint8_t *b, int b_stride, \ 192 uint32_t *sse) { \ 193 int sum; \ 194 variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ 195 return *sse; \ 196 } 197 198 /* All three forms of the variance are available in the same sizes. */ 199 #define VARIANCES(W, H) \ 200 VAR(W, H) \ 201 SUBPIX_VAR(W, H) \ 202 SUBPIX_AVG_VAR(W, H) 203 204 VARIANCES(64, 64) 205 VARIANCES(64, 32) 206 VARIANCES(32, 64) 207 VARIANCES(32, 32) 208 VARIANCES(32, 16) 209 VARIANCES(16, 32) 210 VARIANCES(16, 16) 211 VARIANCES(16, 8) 212 VARIANCES(8, 16) 213 VARIANCES(8, 8) 214 VARIANCES(8, 4) 215 VARIANCES(4, 8) 216 VARIANCES(4, 4) 217 218 GET_VAR(16, 16) 219 GET_VAR(8, 8) 220 221 MSE(16, 16) 222 MSE(16, 8) 223 MSE(8, 16) 224 MSE(8, 8) 225 226 void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, 227 int height, const uint8_t *ref, int ref_stride) { 228 int i, j; 229 /* comp_pred and pred must be 16 byte aligned. */ 230 assert(((intptr_t)comp_pred & 0xf) == 0); 231 assert(((intptr_t)pred & 0xf) == 0); 232 233 for (i = 0; i < height; ++i) { 234 for (j = 0; j < width; ++j) { 235 const int tmp = pred[j] + ref[j]; 236 comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); 237 } 238 comp_pred += width; 239 pred += width; 240 ref += ref_stride; 241 } 242 } 243 244 #if CONFIG_VP9_HIGHBITDEPTH 245 static void highbd_variance64(const uint8_t *a8, int a_stride, 246 const uint8_t *b8, int b_stride, int w, int h, 247 uint64_t *sse, int64_t *sum) { 248 int i, j; 249 250 uint16_t *a = CONVERT_TO_SHORTPTR(a8); 251 uint16_t *b = CONVERT_TO_SHORTPTR(b8); 252 *sum = 0; 253 *sse = 0; 254 255 for (i = 0; i < h; ++i) { 256 for (j = 0; j < w; ++j) { 257 const int diff = a[j] - b[j]; 258 *sum += diff; 259 *sse += diff * diff; 260 } 261 a += a_stride; 262 b += b_stride; 263 } 264 } 265 266 static void highbd_8_variance(const uint8_t *a8, int a_stride, 267 const uint8_t *b8, int b_stride, int w, int h, 268 uint32_t *sse, int *sum) { 269 uint64_t sse_long = 0; 270 int64_t sum_long = 0; 271 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); 272 *sse = (uint32_t)sse_long; 273 *sum = (int)sum_long; 274 } 275 276 static void highbd_10_variance(const uint8_t *a8, int a_stride, 277 const uint8_t *b8, int b_stride, int w, int h, 278 uint32_t *sse, int *sum) { 279 uint64_t sse_long = 0; 280 int64_t sum_long = 0; 281 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); 282 *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4); 283 *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2); 284 } 285 286 static void highbd_12_variance(const uint8_t *a8, int a_stride, 287 const uint8_t *b8, int b_stride, int w, int h, 288 uint32_t *sse, int *sum) { 289 uint64_t sse_long = 0; 290 int64_t sum_long = 0; 291 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); 292 *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8); 293 *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4); 294 } 295 296 #define HIGHBD_VAR(W, H) \ 297 uint32_t vpx_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ 298 const uint8_t *b, int b_stride, \ 299 uint32_t *sse) { \ 300 int sum; \ 301 highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ 302 return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ 303 } \ 304 \ 305 uint32_t vpx_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ 306 const uint8_t *b, int b_stride, \ 307 uint32_t *sse) { \ 308 int sum; \ 309 int64_t var; \ 310 highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ 311 var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ 312 return (var >= 0) ? (uint32_t)var : 0; \ 313 } \ 314 \ 315 uint32_t vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ 316 const uint8_t *b, int b_stride, \ 317 uint32_t *sse) { \ 318 int sum; \ 319 int64_t var; \ 320 highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ 321 var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ 322 return (var >= 0) ? (uint32_t)var : 0; \ 323 } 324 325 #define HIGHBD_GET_VAR(S) \ 326 void vpx_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ 327 const uint8_t *ref, int ref_stride, \ 328 uint32_t *sse, int *sum) { \ 329 highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ 330 } \ 331 \ 332 void vpx_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ 333 const uint8_t *ref, int ref_stride, \ 334 uint32_t *sse, int *sum) { \ 335 highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ 336 } \ 337 \ 338 void vpx_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ 339 const uint8_t *ref, int ref_stride, \ 340 uint32_t *sse, int *sum) { \ 341 highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ 342 } 343 344 #define HIGHBD_MSE(W, H) \ 345 uint32_t vpx_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride, \ 346 const uint8_t *ref, int ref_stride, \ 347 uint32_t *sse) { \ 348 int sum; \ 349 highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ 350 return *sse; \ 351 } \ 352 \ 353 uint32_t vpx_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \ 354 const uint8_t *ref, int ref_stride, \ 355 uint32_t *sse) { \ 356 int sum; \ 357 highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ 358 return *sse; \ 359 } \ 360 \ 361 uint32_t vpx_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \ 362 const uint8_t *ref, int ref_stride, \ 363 uint32_t *sse) { \ 364 int sum; \ 365 highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ 366 return *sse; \ 367 } 368 369 static void highbd_var_filter_block2d_bil_first_pass( 370 const uint8_t *src_ptr8, uint16_t *output_ptr, 371 unsigned int src_pixels_per_line, int pixel_step, 372 unsigned int output_height, unsigned int output_width, 373 const uint8_t *filter) { 374 unsigned int i, j; 375 uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8); 376 for (i = 0; i < output_height; ++i) { 377 for (j = 0; j < output_width; ++j) { 378 output_ptr[j] = ROUND_POWER_OF_TWO( 379 (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], 380 FILTER_BITS); 381 382 ++src_ptr; 383 } 384 385 // Next row... 386 src_ptr += src_pixels_per_line - output_width; 387 output_ptr += output_width; 388 } 389 } 390 391 static void highbd_var_filter_block2d_bil_second_pass( 392 const uint16_t *src_ptr, uint16_t *output_ptr, 393 unsigned int src_pixels_per_line, unsigned int pixel_step, 394 unsigned int output_height, unsigned int output_width, 395 const uint8_t *filter) { 396 unsigned int i, j; 397 398 for (i = 0; i < output_height; ++i) { 399 for (j = 0; j < output_width; ++j) { 400 output_ptr[j] = ROUND_POWER_OF_TWO( 401 (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], 402 FILTER_BITS); 403 ++src_ptr; 404 } 405 406 src_ptr += src_pixels_per_line - output_width; 407 output_ptr += output_width; 408 } 409 } 410 411 #define HIGHBD_SUBPIX_VAR(W, H) \ 412 uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \ 413 const uint8_t *src, int src_stride, int xoffset, int yoffset, \ 414 const uint8_t *dst, int dst_stride, uint32_t *sse) { \ 415 uint16_t fdata3[(H + 1) * W]; \ 416 uint16_t temp2[H * W]; \ 417 \ 418 highbd_var_filter_block2d_bil_first_pass( \ 419 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \ 420 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ 421 bilinear_filters[yoffset]); \ 422 \ 423 return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ 424 dst, dst_stride, sse); \ 425 } \ 426 \ 427 uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \ 428 const uint8_t *src, int src_stride, int xoffset, int yoffset, \ 429 const uint8_t *dst, int dst_stride, uint32_t *sse) { \ 430 uint16_t fdata3[(H + 1) * W]; \ 431 uint16_t temp2[H * W]; \ 432 \ 433 highbd_var_filter_block2d_bil_first_pass( \ 434 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \ 435 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ 436 bilinear_filters[yoffset]); \ 437 \ 438 return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ 439 dst, dst_stride, sse); \ 440 } \ 441 \ 442 uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \ 443 const uint8_t *src, int src_stride, int xoffset, int yoffset, \ 444 const uint8_t *dst, int dst_stride, uint32_t *sse) { \ 445 uint16_t fdata3[(H + 1) * W]; \ 446 uint16_t temp2[H * W]; \ 447 \ 448 highbd_var_filter_block2d_bil_first_pass( \ 449 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \ 450 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ 451 bilinear_filters[yoffset]); \ 452 \ 453 return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ 454 dst, dst_stride, sse); \ 455 } 456 457 #define HIGHBD_SUBPIX_AVG_VAR(W, H) \ 458 uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \ 459 const uint8_t *src, int src_stride, int xoffset, int yoffset, \ 460 const uint8_t *dst, int dst_stride, uint32_t *sse, \ 461 const uint8_t *second_pred) { \ 462 uint16_t fdata3[(H + 1) * W]; \ 463 uint16_t temp2[H * W]; \ 464 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ 465 \ 466 highbd_var_filter_block2d_bil_first_pass( \ 467 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \ 468 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ 469 bilinear_filters[yoffset]); \ 470 \ 471 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ 472 CONVERT_TO_BYTEPTR(temp2), W); \ 473 \ 474 return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ 475 dst, dst_stride, sse); \ 476 } \ 477 \ 478 uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ 479 const uint8_t *src, int src_stride, int xoffset, int yoffset, \ 480 const uint8_t *dst, int dst_stride, uint32_t *sse, \ 481 const uint8_t *second_pred) { \ 482 uint16_t fdata3[(H + 1) * W]; \ 483 uint16_t temp2[H * W]; \ 484 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ 485 \ 486 highbd_var_filter_block2d_bil_first_pass( \ 487 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \ 488 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ 489 bilinear_filters[yoffset]); \ 490 \ 491 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ 492 CONVERT_TO_BYTEPTR(temp2), W); \ 493 \ 494 return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ 495 dst, dst_stride, sse); \ 496 } \ 497 \ 498 uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \ 499 const uint8_t *src, int src_stride, int xoffset, int yoffset, \ 500 const uint8_t *dst, int dst_stride, uint32_t *sse, \ 501 const uint8_t *second_pred) { \ 502 uint16_t fdata3[(H + 1) * W]; \ 503 uint16_t temp2[H * W]; \ 504 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ 505 \ 506 highbd_var_filter_block2d_bil_first_pass( \ 507 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \ 508 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ 509 bilinear_filters[yoffset]); \ 510 \ 511 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ 512 CONVERT_TO_BYTEPTR(temp2), W); \ 513 \ 514 return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ 515 dst, dst_stride, sse); \ 516 } 517 518 /* All three forms of the variance are available in the same sizes. */ 519 #define HIGHBD_VARIANCES(W, H) \ 520 HIGHBD_VAR(W, H) \ 521 HIGHBD_SUBPIX_VAR(W, H) \ 522 HIGHBD_SUBPIX_AVG_VAR(W, H) 523 524 HIGHBD_VARIANCES(64, 64) 525 HIGHBD_VARIANCES(64, 32) 526 HIGHBD_VARIANCES(32, 64) 527 HIGHBD_VARIANCES(32, 32) 528 HIGHBD_VARIANCES(32, 16) 529 HIGHBD_VARIANCES(16, 32) 530 HIGHBD_VARIANCES(16, 16) 531 HIGHBD_VARIANCES(16, 8) 532 HIGHBD_VARIANCES(8, 16) 533 HIGHBD_VARIANCES(8, 8) 534 HIGHBD_VARIANCES(8, 4) 535 HIGHBD_VARIANCES(4, 8) 536 HIGHBD_VARIANCES(4, 4) 537 538 HIGHBD_GET_VAR(8) 539 HIGHBD_GET_VAR(16) 540 541 HIGHBD_MSE(16, 16) 542 HIGHBD_MSE(16, 8) 543 HIGHBD_MSE(8, 16) 544 HIGHBD_MSE(8, 8) 545 546 void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8, 547 int width, int height, const uint8_t *ref8, 548 int ref_stride) { 549 int i, j; 550 uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); 551 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); 552 for (i = 0; i < height; ++i) { 553 for (j = 0; j < width; ++j) { 554 const int tmp = pred[j] + ref[j]; 555 comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); 556 } 557 comp_pred += width; 558 pred += width; 559 ref += ref_stride; 560 } 561 } 562 #endif // CONFIG_VP9_HIGHBITDEPTH 563