1 /* 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 13 #include "./vpx_config.h" 14 #include "./vp9_rtcd.h" 15 #include "vp9/common/vp9_common.h" 16 #include "vp9/common/vp9_convolve.h" 17 #include "vp9/common/vp9_filter.h" 18 #include "vpx/vpx_integer.h" 19 #include "vpx_ports/mem.h" 20 21 static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, 22 uint8_t *dst, ptrdiff_t dst_stride, 23 const InterpKernel *x_filters, 24 int x0_q4, int x_step_q4, int w, int h) { 25 int x, y; 26 src -= SUBPEL_TAPS / 2 - 1; 27 for (y = 0; y < h; ++y) { 28 int x_q4 = x0_q4; 29 for (x = 0; x < w; ++x) { 30 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; 31 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; 32 int k, sum = 0; 33 for (k = 0; k < SUBPEL_TAPS; ++k) 34 sum += src_x[k] * x_filter[k]; 35 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); 36 x_q4 += x_step_q4; 37 } 38 src += src_stride; 39 dst += dst_stride; 40 } 41 } 42 43 static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, 44 uint8_t *dst, ptrdiff_t dst_stride, 45 const InterpKernel *x_filters, 46 int x0_q4, int x_step_q4, int w, int h) { 47 int x, y; 48 src -= SUBPEL_TAPS / 2 - 1; 49 for (y = 0; y < h; ++y) { 50 int x_q4 = x0_q4; 51 for (x = 0; x < w; ++x) { 52 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; 53 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; 54 int k, sum = 0; 55 for (k = 0; k < SUBPEL_TAPS; ++k) 56 sum += src_x[k] * x_filter[k]; 57 dst[x] = ROUND_POWER_OF_TWO(dst[x] + 58 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); 59 x_q4 += x_step_q4; 60 } 61 src += src_stride; 62 dst += dst_stride; 63 } 64 } 65 66 static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, 67 uint8_t *dst, ptrdiff_t dst_stride, 68 const InterpKernel *y_filters, 69 int y0_q4, int y_step_q4, int w, int h) { 70 int x, y; 71 src -= src_stride * (SUBPEL_TAPS / 2 - 1); 72 73 for (x = 0; x < w; ++x) { 74 int y_q4 = y0_q4; 75 for (y = 0; y < h; ++y) { 76 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; 77 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; 78 int k, sum = 0; 79 for (k = 0; k < SUBPEL_TAPS; ++k) 80 sum += src_y[k * src_stride] * y_filter[k]; 81 dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); 82 y_q4 += y_step_q4; 83 } 84 ++src; 85 ++dst; 86 } 87 } 88 89 static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, 90 uint8_t *dst, ptrdiff_t dst_stride, 91 const InterpKernel *y_filters, 92 int y0_q4, int y_step_q4, int w, int h) { 93 int x, y; 94 src -= src_stride * (SUBPEL_TAPS / 2 - 1); 95 96 for (x = 0; x < w; ++x) { 97 int y_q4 = y0_q4; 98 for (y = 0; y < h; ++y) { 99 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; 100 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; 101 int k, sum = 0; 102 for (k = 0; k < SUBPEL_TAPS; ++k) 103 sum += src_y[k * src_stride] * y_filter[k]; 104 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + 105 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); 106 y_q4 += y_step_q4; 107 } 108 ++src; 109 ++dst; 110 } 111 } 112 113 static void convolve(const uint8_t *src, ptrdiff_t src_stride, 114 uint8_t *dst, ptrdiff_t dst_stride, 115 const InterpKernel *const x_filters, 116 int x0_q4, int x_step_q4, 117 const InterpKernel *const y_filters, 118 int y0_q4, int y_step_q4, 119 int w, int h) { 120 // Fixed size intermediate buffer places limits on parameters. 121 // Maximum intermediate_height is 324, for y_step_q4 == 80, 122 // h == 64, taps == 8. 123 // y_step_q4 of 80 allows for 1/10 scale for 5 layer svc 124 uint8_t temp[64 * 324]; 125 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS; 126 127 assert(w <= 64); 128 assert(h <= 64); 129 assert(y_step_q4 <= 80); 130 assert(x_step_q4 <= 80); 131 132 if (intermediate_height < h) 133 intermediate_height = h; 134 135 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, 136 x_filters, x0_q4, x_step_q4, w, intermediate_height); 137 convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, 138 y_filters, y0_q4, y_step_q4, w, h); 139 } 140 141 static const InterpKernel *get_filter_base(const int16_t *filter) { 142 // NOTE: This assumes that the filter table is 256-byte aligned. 143 // TODO(agrange) Modify to make independent of table alignment. 144 return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); 145 } 146 147 static int get_filter_offset(const int16_t *f, const InterpKernel *base) { 148 return (int)((const InterpKernel *)(intptr_t)f - base); 149 } 150 151 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, 152 uint8_t *dst, ptrdiff_t dst_stride, 153 const int16_t *filter_x, int x_step_q4, 154 const int16_t *filter_y, int y_step_q4, 155 int w, int h) { 156 const InterpKernel *const filters_x = get_filter_base(filter_x); 157 const int x0_q4 = get_filter_offset(filter_x, filters_x); 158 (void)filter_y; 159 (void)y_step_q4; 160 convolve_horiz(src, src_stride, dst, dst_stride, filters_x, 161 x0_q4, x_step_q4, w, h); 162 } 163 164 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, 165 uint8_t *dst, ptrdiff_t dst_stride, 166 const int16_t *filter_x, int x_step_q4, 167 const int16_t *filter_y, int y_step_q4, 168 int w, int h) { 169 const InterpKernel *const filters_x = get_filter_base(filter_x); 170 const int x0_q4 = get_filter_offset(filter_x, filters_x); 171 (void)filter_y; 172 (void)y_step_q4; 173 174 convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, 175 x0_q4, x_step_q4, w, h); 176 } 177 178 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, 179 uint8_t *dst, ptrdiff_t dst_stride, 180 const int16_t *filter_x, int x_step_q4, 181 const int16_t *filter_y, int y_step_q4, 182 int w, int h) { 183 const InterpKernel *const filters_y = get_filter_base(filter_y); 184 const int y0_q4 = get_filter_offset(filter_y, filters_y); 185 (void)x_step_q4; 186 (void)filter_x; 187 convolve_vert(src, src_stride, dst, dst_stride, filters_y, 188 y0_q4, y_step_q4, w, h); 189 } 190 191 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, 192 uint8_t *dst, ptrdiff_t dst_stride, 193 const int16_t *filter_x, int x_step_q4, 194 const int16_t *filter_y, int y_step_q4, 195 int w, int h) { 196 const InterpKernel *const filters_y = get_filter_base(filter_y); 197 const int y0_q4 = get_filter_offset(filter_y, filters_y); 198 (void)x_step_q4; 199 (void)filter_x; 200 convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, 201 y0_q4, y_step_q4, w, h); 202 } 203 204 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, 205 uint8_t *dst, ptrdiff_t dst_stride, 206 const int16_t *filter_x, int x_step_q4, 207 const int16_t *filter_y, int y_step_q4, 208 int w, int h) { 209 const InterpKernel *const filters_x = get_filter_base(filter_x); 210 const int x0_q4 = get_filter_offset(filter_x, filters_x); 211 212 const InterpKernel *const filters_y = get_filter_base(filter_y); 213 const int y0_q4 = get_filter_offset(filter_y, filters_y); 214 215 convolve(src, src_stride, dst, dst_stride, 216 filters_x, x0_q4, x_step_q4, 217 filters_y, y0_q4, y_step_q4, w, h); 218 } 219 220 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, 221 uint8_t *dst, ptrdiff_t dst_stride, 222 const int16_t *filter_x, int x_step_q4, 223 const int16_t *filter_y, int y_step_q4, 224 int w, int h) { 225 /* Fixed size intermediate buffer places limits on parameters. */ 226 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64); 227 assert(w <= 64); 228 assert(h <= 64); 229 230 vp9_convolve8_c(src, src_stride, temp, 64, 231 filter_x, x_step_q4, filter_y, y_step_q4, w, h); 232 vp9_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h); 233 } 234 235 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, 236 uint8_t *dst, ptrdiff_t dst_stride, 237 const int16_t *filter_x, int filter_x_stride, 238 const int16_t *filter_y, int filter_y_stride, 239 int w, int h) { 240 int r; 241 (void)filter_x; 242 (void)filter_y; 243 (void)filter_x_stride; 244 (void)filter_y_stride; 245 for (r = h; r > 0; --r) { 246 vpx_memcpy(dst, src, w); 247 src += src_stride; 248 dst += dst_stride; 249 } 250 } 251 252 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, 253 uint8_t *dst, ptrdiff_t dst_stride, 254 const int16_t *filter_x, int filter_x_stride, 255 const int16_t *filter_y, int filter_y_stride, 256 int w, int h) { 257 int x, y; 258 (void)filter_x; 259 (void)filter_y; 260 (void)filter_x_stride; 261 (void)filter_y_stride; 262 263 for (y = 0; y < h; ++y) { 264 for (x = 0; x < w; ++x) 265 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); 266 267 src += src_stride; 268 dst += dst_stride; 269 } 270 } 271