Home | History | Annotate | Download | only in dm642
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include <float.h>
     13 #include <math.h>
     14 #include <stdio.h>
     15 #include "vpx_mem/vpx_mem.h"
     16 #include "vpxscale_arbitrary.h"
     17 
     18 extern BICUBIC_SCALER_STRUCT g_b_scaler;
     19 
     20 int bicubic_scale_c64(int in_width, int in_height, int in_stride,
     21                       int out_width, int out_height, int out_stride,
     22                       unsigned char *input_image, unsigned char *output_image)
     23 {
     24     short *restrict l_w, * restrict l_h;
     25     short *restrict c_w, * restrict c_h;
     26     unsigned char *restrict ip, * restrict op, *restrict op_w;
     27     unsigned char *restrict hbuf;
     28     int h, w, lw, lh;
     29     int phase_offset_w, phase_offset_h;
     30     double coeff;
     31     int max_phase;
     32 
     33     c_w = g_b_scaler.c_w;
     34     c_h = g_b_scaler.c_h;
     35 
     36     op = output_image;
     37 
     38     l_w = g_b_scaler.l_w;
     39     l_h = g_b_scaler.l_h;
     40 
     41     phase_offset_h = 0;
     42 
     43     for (h = 0; h < out_height; h++)
     44     {
     45         // select the row to work on
     46         lh = l_h[h];
     47         ip = input_image + (in_stride * lh);
     48 
     49         coeff = _memd8_const(&c_h[phase_offset_h*4]);
     50 
     51         // vp8_filter the row vertically into an temporary buffer.
     52         //  If the phase offset == 0 then all the multiplication
     53         //  is going to result in the output equalling the input.
     54         //  So instead point the temporary buffer to the input.
     55         //  Also handle the boundry condition of not being able to
     56         //  filter that last lines.
     57         if (phase_offset_h && (lh < in_height - 2))
     58         {
     59             hbuf = g_b_scaler.hbuf;
     60 
     61             for (w = 0; w < in_width; w += 4)
     62             {
     63                 int ip1, ip2, ip3, ip4;
     64                 int y13_12, y11_10, y23_22, y21_20, y33_32, y31_30, y43_42, y41_40;
     65                 int y10_20, y11_21, y12_22, y13_23, y30_40, y31_41, y32_42, y33_43;
     66                 int s1, s2, s3, s4;
     67 
     68                 ip1 = _mem4_const(&ip[w - in_stride]);
     69                 ip2 = _mem4_const(&ip[w]);
     70                 ip3 = _mem4_const(&ip[w + in_stride]);
     71                 ip4 = _mem4_const(&ip[w + 2*in_stride]);
     72 
     73                 // realignment of data.  Unpack the data so that it is in short
     74                 //  format instead of bytes.
     75                 y13_12 = _unpkhu4(ip1);
     76                 y11_10 = _unpklu4(ip1);
     77                 y23_22 = _unpkhu4(ip2);
     78                 y21_20 = _unpklu4(ip2);
     79                 y33_32 = _unpkhu4(ip3);
     80                 y31_30 = _unpklu4(ip3);
     81                 y43_42 = _unpkhu4(ip4);
     82                 y41_40 = _unpklu4(ip4);
     83 
     84                 // repack the data so that elements 1 and 2 are together.  this
     85                 //  lines up so that a dot product with the coefficients can be
     86                 //  done.
     87                 y10_20 = _pack2(y11_10, y21_20);
     88                 y11_21 = _packh2(y11_10, y21_20);
     89                 y12_22 = _pack2(y13_12, y23_22);
     90                 y13_23 = _packh2(y13_12, y23_22);
     91 
     92                 s1 = _dotp2(_hi(coeff), y10_20);
     93                 s2 = _dotp2(_hi(coeff), y11_21);
     94                 s3 = _dotp2(_hi(coeff), y12_22);
     95                 s4 = _dotp2(_hi(coeff), y13_23);
     96 
     97                 y30_40 = _pack2(y31_30, y41_40);
     98                 y31_41 = _packh2(y31_30, y41_40);
     99                 y32_42 = _pack2(y33_32, y43_42);
    100                 y33_43 = _packh2(y33_32, y43_42);
    101 
    102                 // now repack elements 3 and 4 together.
    103                 s1 += _dotp2(_lo(coeff), y30_40);
    104                 s2 += _dotp2(_lo(coeff), y31_41);
    105                 s3 += _dotp2(_lo(coeff), y32_42);
    106                 s4 += _dotp2(_lo(coeff), y33_43);
    107 
    108                 s1 = s1 >> 12;
    109                 s2 = s2 >> 12;
    110                 s3 = s3 >> 12;
    111                 s4 = s4 >> 12;
    112 
    113                 s1 = _pack2(s2, s1);
    114                 s2 = _pack2(s4, s3);
    115 
    116                 _amem4(&hbuf[w])  = _spacku4(s2, s1);
    117             }
    118         }
    119         else
    120             hbuf = ip;
    121 
    122         // increase the phase offset for the next time around.
    123         if (++phase_offset_h >= g_b_scaler.nh)
    124             phase_offset_h = 0;
    125 
    126         op_w = op;
    127 
    128         // will never be able to interpolate first pixel, so just copy it
    129         // over here.
    130         phase_offset_w = 1;
    131         *op_w++ = hbuf[0];
    132 
    133         if (1 >= g_b_scaler.nw) phase_offset_w = 0;
    134 
    135         max_phase = g_b_scaler.nw;
    136 
    137         for (w = 1; w < out_width; w++)
    138         {
    139             double coefficients;
    140             int hbuf_high, hbuf_low, hbuf_both;
    141             int sum_high, sum_low, sum;
    142 
    143             // get the index to use to expand the image
    144             lw = l_w[w];
    145             coefficients = _amemd8_const(&c_w[phase_offset_w*4]);
    146             hbuf_both = _mem4_const(&hbuf[lw-1]);
    147 
    148             hbuf_high = _unpkhu4(hbuf_both);
    149             hbuf_low  = _unpklu4(hbuf_both);
    150 
    151             sum_high = _dotp2(_hi(coefficients), hbuf_high);
    152             sum_low  = _dotp2(_lo(coefficients), hbuf_low);
    153 
    154             sum = (sum_high + sum_low) >> 12;
    155 
    156             if (++phase_offset_w >= max_phase)
    157                 phase_offset_w = 0;
    158 
    159             if ((lw + 2) >= in_width)
    160                 sum = hbuf[lw];
    161 
    162             *op_w++ = sum;
    163         }
    164 
    165         op += out_stride;
    166     }
    167 
    168     return 0;
    169 }
    170 
    171 void bicubic_scale_frame_c64(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
    172                              int new_width, int new_height)
    173 {
    174 
    175     dst->y_width = new_width;
    176     dst->y_height = new_height;
    177     dst->uv_width = new_width / 2;
    178     dst->uv_height = new_height / 2;
    179 
    180     dst->y_stride = dst->y_width;
    181     dst->uv_stride = dst->uv_width;
    182 
    183     bicubic_scale_c64(src->y_width, src->y_height, src->y_stride,
    184                       new_width, new_height, dst->y_stride,
    185                       src->y_buffer, dst->y_buffer);
    186 
    187     bicubic_scale_c64(src->uv_width, src->uv_height, src->uv_stride,
    188                       new_width / 2, new_height / 2, dst->uv_stride,
    189                       src->u_buffer, dst->u_buffer);
    190 
    191     bicubic_scale_c64(src->uv_width, src->uv_height, src->uv_stride,
    192                       new_width / 2, new_height / 2, dst->uv_stride,
    193                       src->v_buffer, dst->v_buffer);
    194 }
    195