1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12 #include <float.h> 13 #include <math.h> 14 #include <stdio.h> 15 #include "vpx_mem/vpx_mem.h" 16 #include "vpxscale_arbitrary.h" 17 18 extern BICUBIC_SCALER_STRUCT g_b_scaler; 19 20 int bicubic_scale_c64(int in_width, int in_height, int in_stride, 21 int out_width, int out_height, int out_stride, 22 unsigned char *input_image, unsigned char *output_image) 23 { 24 short *restrict l_w, * restrict l_h; 25 short *restrict c_w, * restrict c_h; 26 unsigned char *restrict ip, * restrict op, *restrict op_w; 27 unsigned char *restrict hbuf; 28 int h, w, lw, lh; 29 int phase_offset_w, phase_offset_h; 30 double coeff; 31 int max_phase; 32 33 c_w = g_b_scaler.c_w; 34 c_h = g_b_scaler.c_h; 35 36 op = output_image; 37 38 l_w = g_b_scaler.l_w; 39 l_h = g_b_scaler.l_h; 40 41 phase_offset_h = 0; 42 43 for (h = 0; h < out_height; h++) 44 { 45 // select the row to work on 46 lh = l_h[h]; 47 ip = input_image + (in_stride * lh); 48 49 coeff = _memd8_const(&c_h[phase_offset_h*4]); 50 51 // vp8_filter the row vertically into an temporary buffer. 52 // If the phase offset == 0 then all the multiplication 53 // is going to result in the output equalling the input. 54 // So instead point the temporary buffer to the input. 55 // Also handle the boundry condition of not being able to 56 // filter that last lines. 57 if (phase_offset_h && (lh < in_height - 2)) 58 { 59 hbuf = g_b_scaler.hbuf; 60 61 for (w = 0; w < in_width; w += 4) 62 { 63 int ip1, ip2, ip3, ip4; 64 int y13_12, y11_10, y23_22, y21_20, y33_32, y31_30, y43_42, y41_40; 65 int y10_20, y11_21, y12_22, y13_23, y30_40, y31_41, y32_42, y33_43; 66 int s1, s2, s3, s4; 67 68 ip1 = _mem4_const(&ip[w - in_stride]); 69 ip2 = _mem4_const(&ip[w]); 70 ip3 = _mem4_const(&ip[w + in_stride]); 71 ip4 = _mem4_const(&ip[w + 2*in_stride]); 72 73 // realignment of data. Unpack the data so that it is in short 74 // format instead of bytes. 75 y13_12 = _unpkhu4(ip1); 76 y11_10 = _unpklu4(ip1); 77 y23_22 = _unpkhu4(ip2); 78 y21_20 = _unpklu4(ip2); 79 y33_32 = _unpkhu4(ip3); 80 y31_30 = _unpklu4(ip3); 81 y43_42 = _unpkhu4(ip4); 82 y41_40 = _unpklu4(ip4); 83 84 // repack the data so that elements 1 and 2 are together. this 85 // lines up so that a dot product with the coefficients can be 86 // done. 87 y10_20 = _pack2(y11_10, y21_20); 88 y11_21 = _packh2(y11_10, y21_20); 89 y12_22 = _pack2(y13_12, y23_22); 90 y13_23 = _packh2(y13_12, y23_22); 91 92 s1 = _dotp2(_hi(coeff), y10_20); 93 s2 = _dotp2(_hi(coeff), y11_21); 94 s3 = _dotp2(_hi(coeff), y12_22); 95 s4 = _dotp2(_hi(coeff), y13_23); 96 97 y30_40 = _pack2(y31_30, y41_40); 98 y31_41 = _packh2(y31_30, y41_40); 99 y32_42 = _pack2(y33_32, y43_42); 100 y33_43 = _packh2(y33_32, y43_42); 101 102 // now repack elements 3 and 4 together. 103 s1 += _dotp2(_lo(coeff), y30_40); 104 s2 += _dotp2(_lo(coeff), y31_41); 105 s3 += _dotp2(_lo(coeff), y32_42); 106 s4 += _dotp2(_lo(coeff), y33_43); 107 108 s1 = s1 >> 12; 109 s2 = s2 >> 12; 110 s3 = s3 >> 12; 111 s4 = s4 >> 12; 112 113 s1 = _pack2(s2, s1); 114 s2 = _pack2(s4, s3); 115 116 _amem4(&hbuf[w]) = _spacku4(s2, s1); 117 } 118 } 119 else 120 hbuf = ip; 121 122 // increase the phase offset for the next time around. 123 if (++phase_offset_h >= g_b_scaler.nh) 124 phase_offset_h = 0; 125 126 op_w = op; 127 128 // will never be able to interpolate first pixel, so just copy it 129 // over here. 130 phase_offset_w = 1; 131 *op_w++ = hbuf[0]; 132 133 if (1 >= g_b_scaler.nw) phase_offset_w = 0; 134 135 max_phase = g_b_scaler.nw; 136 137 for (w = 1; w < out_width; w++) 138 { 139 double coefficients; 140 int hbuf_high, hbuf_low, hbuf_both; 141 int sum_high, sum_low, sum; 142 143 // get the index to use to expand the image 144 lw = l_w[w]; 145 coefficients = _amemd8_const(&c_w[phase_offset_w*4]); 146 hbuf_both = _mem4_const(&hbuf[lw-1]); 147 148 hbuf_high = _unpkhu4(hbuf_both); 149 hbuf_low = _unpklu4(hbuf_both); 150 151 sum_high = _dotp2(_hi(coefficients), hbuf_high); 152 sum_low = _dotp2(_lo(coefficients), hbuf_low); 153 154 sum = (sum_high + sum_low) >> 12; 155 156 if (++phase_offset_w >= max_phase) 157 phase_offset_w = 0; 158 159 if ((lw + 2) >= in_width) 160 sum = hbuf[lw]; 161 162 *op_w++ = sum; 163 } 164 165 op += out_stride; 166 } 167 168 return 0; 169 } 170 171 void bicubic_scale_frame_c64(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, 172 int new_width, int new_height) 173 { 174 175 dst->y_width = new_width; 176 dst->y_height = new_height; 177 dst->uv_width = new_width / 2; 178 dst->uv_height = new_height / 2; 179 180 dst->y_stride = dst->y_width; 181 dst->uv_stride = dst->uv_width; 182 183 bicubic_scale_c64(src->y_width, src->y_height, src->y_stride, 184 new_width, new_height, dst->y_stride, 185 src->y_buffer, dst->y_buffer); 186 187 bicubic_scale_c64(src->uv_width, src->uv_height, src->uv_stride, 188 new_width / 2, new_height / 2, dst->uv_stride, 189 src->u_buffer, dst->u_buffer); 190 191 bicubic_scale_c64(src->uv_width, src->uv_height, src->uv_stride, 192 new_width / 2, new_height / 2, dst->uv_stride, 193 src->v_buffer, dst->v_buffer); 194 } 195