Home | History | Annotate | Download | only in simd
      1 /*
      2  * AltiVec optimizations for libjpeg-turbo
      3  *
      4  * Copyright (C) 2015, D. R. Commander.  All Rights Reserved.
      5  *
      6  * This software is provided 'as-is', without any express or implied
      7  * warranty.  In no event will the authors be held liable for any damages
      8  * arising from the use of this software.
      9  *
     10  * Permission is granted to anyone to use this software for any purpose,
     11  * including commercial applications, and to alter it and redistribute it
     12  * freely, subject to the following restrictions:
     13  *
     14  * 1. The origin of this software must not be misrepresented; you must not
     15  *    claim that you wrote the original software. If you use this software
     16  *    in a product, an acknowledgment in the product documentation would be
     17  *    appreciated but is not required.
     18  * 2. Altered source versions must be plainly marked as such, and must not be
     19  *    misrepresented as being the original software.
     20  * 3. This notice may not be removed or altered from any source distribution.
     21  */
     22 
     23 /* CHROMA DOWNSAMPLING */
     24 
     25 #include "jsimd_altivec.h"
     26 #include "jcsample.h"
     27 
     28 
     29 void
     30 jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor,
     31                                JDIMENSION v_samp_factor,
     32                                JDIMENSION width_blocks,
     33                                JSAMPARRAY input_data, JSAMPARRAY output_data)
     34 {
     35   int outrow, outcol;
     36   JDIMENSION output_cols = width_blocks * DCTSIZE;
     37   JSAMPROW inptr, outptr;
     38 
     39   __vector unsigned char this0, next0, out;
     40   __vector unsigned short this0e, this0o, next0e, next0o, outl, outh;
     41 
     42   /* Constants */
     43   __vector unsigned short pw_bias = { __4X2(0, 1) },
     44     pw_one = { __8X(1) };
     45   __vector unsigned char even_odd_index =
     46     {0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15},
     47     pb_zero = { __16X(0) };
     48 
     49   expand_right_edge(input_data, max_v_samp_factor, image_width,
     50                     output_cols * 2);
     51 
     52   for (outrow = 0; outrow < v_samp_factor; outrow++) {
     53     outptr = output_data[outrow];
     54     inptr = input_data[outrow];
     55 
     56     for (outcol = output_cols; outcol > 0;
     57          outcol -= 16, inptr += 32, outptr += 16) {
     58 
     59       this0 = vec_ld(0, inptr);
     60       this0 = vec_perm(this0, this0, even_odd_index);
     61       this0e = (__vector unsigned short)VEC_UNPACKHU(this0);
     62       this0o = (__vector unsigned short)VEC_UNPACKLU(this0);
     63       outl = vec_add(this0e, this0o);
     64       outl = vec_add(outl, pw_bias);
     65       outl = vec_sr(outl, pw_one);
     66 
     67       if (outcol > 8) {
     68         next0 = vec_ld(16, inptr);
     69         next0 = vec_perm(next0, next0, even_odd_index);
     70         next0e = (__vector unsigned short)VEC_UNPACKHU(next0);
     71         next0o = (__vector unsigned short)VEC_UNPACKLU(next0);
     72         outh = vec_add(next0e, next0o);
     73         outh = vec_add(outh, pw_bias);
     74         outh = vec_sr(outh, pw_one);
     75       } else
     76         outh = vec_splat_u16(0);
     77 
     78       out = vec_pack(outl, outh);
     79       vec_st(out, 0, outptr);
     80     }
     81   }
     82 }
     83 
     84 
     85 void
     86 jsimd_h2v2_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor,
     87                                JDIMENSION v_samp_factor,
     88                                JDIMENSION width_blocks,
     89                                JSAMPARRAY input_data, JSAMPARRAY output_data)
     90 {
     91   int inrow, outrow, outcol;
     92   JDIMENSION output_cols = width_blocks * DCTSIZE;
     93   JSAMPROW inptr0, inptr1, outptr;
     94 
     95   __vector unsigned char this0, next0, this1, next1, out;
     96   __vector unsigned short this0e, this0o, next0e, next0o, this1e, this1o,
     97     next1e, next1o, out0l, out0h, out1l, out1h, outl, outh;
     98 
     99   /* Constants */
    100   __vector unsigned short pw_bias = { __4X2(1, 2) },
    101     pw_two = { __8X(2) };
    102   __vector unsigned char even_odd_index =
    103     { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 },
    104     pb_zero = { __16X(0) };
    105 
    106   expand_right_edge(input_data, max_v_samp_factor, image_width,
    107                     output_cols * 2);
    108 
    109   for (inrow = 0, outrow = 0; outrow < v_samp_factor;
    110        inrow += 2, outrow++) {
    111 
    112     inptr0 = input_data[inrow];
    113     inptr1 = input_data[inrow + 1];
    114     outptr = output_data[outrow];
    115 
    116     for (outcol = output_cols; outcol > 0;
    117          outcol -= 16, inptr0 += 32, inptr1 += 32, outptr += 16) {
    118 
    119       this0 = vec_ld(0, inptr0);
    120       this0 = vec_perm(this0, this0, even_odd_index);
    121       this0e = (__vector unsigned short)VEC_UNPACKHU(this0);
    122       this0o = (__vector unsigned short)VEC_UNPACKLU(this0);
    123       out0l = vec_add(this0e, this0o);
    124 
    125       this1 = vec_ld(0, inptr1);
    126       this1 = vec_perm(this1, this1, even_odd_index);
    127       this1e = (__vector unsigned short)VEC_UNPACKHU(this1);
    128       this1o = (__vector unsigned short)VEC_UNPACKLU(this1);
    129       out1l = vec_add(this1e, this1o);
    130 
    131       outl = vec_add(out0l, out1l);
    132       outl = vec_add(outl, pw_bias);
    133       outl = vec_sr(outl, pw_two);
    134 
    135       if (outcol > 8) {
    136         next0 = vec_ld(16, inptr0);
    137         next0 = vec_perm(next0, next0, even_odd_index);
    138         next0e = (__vector unsigned short)VEC_UNPACKHU(next0);
    139         next0o = (__vector unsigned short)VEC_UNPACKLU(next0);
    140         out0h = vec_add(next0e, next0o);
    141 
    142         next1 = vec_ld(16, inptr1);
    143         next1 = vec_perm(next1, next1, even_odd_index);
    144         next1e = (__vector unsigned short)VEC_UNPACKHU(next1);
    145         next1o = (__vector unsigned short)VEC_UNPACKLU(next1);
    146         out1h = vec_add(next1e, next1o);
    147 
    148         outh = vec_add(out0h, out1h);
    149         outh = vec_add(outh, pw_bias);
    150         outh = vec_sr(outh, pw_two);
    151       } else
    152         outh = vec_splat_u16(0);
    153 
    154       out = vec_pack(outl, outh);
    155       vec_st(out, 0, outptr);
    156     }
    157   }
    158 }
    159