Home | History | Annotate | Download | only in src
      1 /* ------------------------------------------------------------------
      2  * Copyright (C) 1998-2009 PacketVideo
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
     13  * express or implied.
     14  * See the License for the specific language governing permissions
     15  * and limitations under the License.
     16  * -------------------------------------------------------------------
     17  */
     18 /*  Filename: dct_inline.h                                                      */
     19 /*  Description: Implementation for in-line functions used in dct.cpp           */
     20 /*  Modified:                                                                   */
     21 /*********************************************************************************/
     22 #ifndef _DCT_INLINE_H_
     23 #define _DCT_INLINE_H_
     24 
     25 #if !defined(PV_ARM_GCC_V5) && !defined(PV_ARM_GCC_V4)
     26 
     27 __inline int32 mla724(int32 op1, int32 op2, int32 op3)
     28 {
     29     int32 out;
     30 
     31     OSCL_UNUSED_ARG(op1);
     32 
     33     out = op2 * 724 + op3; /* op1 is not used here */
     34 
     35     return out;
     36 }
     37 
     38 __inline int32 mla392(int32 k0, int32 k14, int32 round)
     39 {
     40     int32 k1;
     41 
     42     OSCL_UNUSED_ARG(k14);
     43 
     44     k1 = k0 * 392 + round;
     45 
     46     return k1;
     47 }
     48 
     49 __inline int32 mla554(int32 k4, int32 k12, int32 k1)
     50 {
     51     int32 k0;
     52 
     53     OSCL_UNUSED_ARG(k12);
     54 
     55     k0 = k4 * 554 + k1;
     56 
     57     return k0;
     58 }
     59 
     60 __inline int32 mla1338(int32 k6, int32 k14, int32 k1)
     61 {
     62     int32 out;
     63 
     64     OSCL_UNUSED_ARG(k14);
     65 
     66     out = k6 * 1338 + k1;
     67 
     68     return out;
     69 }
     70 
     71 __inline int32 mla946(int32 k6, int32 k14, int32 k1)
     72 {
     73     int32 out;
     74 
     75     OSCL_UNUSED_ARG(k14);
     76 
     77     out = k6 * 946 + k1;
     78 
     79     return out;
     80 }
     81 
     82 __inline int32 sum_abs(int32 k0, int32 k1, int32 k2, int32 k3,
     83                        int32 k4, int32 k5, int32 k6, int32 k7)
     84 {
     85     int32 carry, abs_sum;
     86 
     87     carry = k0 >> 31;
     88     abs_sum = (k0 ^ carry);
     89     carry = k1 >> 31;
     90     abs_sum += (k1 ^ carry) - carry;
     91     carry = k2 >> 31;
     92     abs_sum += (k2 ^ carry) - carry;
     93     carry = k3 >> 31;
     94     abs_sum += (k3 ^ carry) - carry;
     95     carry = k4 >> 31;
     96     abs_sum += (k4 ^ carry) - carry;
     97     carry = k5 >> 31;
     98     abs_sum += (k5 ^ carry) - carry;
     99     carry = k6 >> 31;
    100     abs_sum += (k6 ^ carry) - carry;
    101     carry = k7 >> 31;
    102     abs_sum += (k7 ^ carry) - carry;
    103 
    104     return abs_sum;
    105 }
    106 
    107 #elif defined(__CC_ARM)  /* only work with arm v5 */
    108 
    109 #if defined(__TARGET_ARCH_5TE)
    110 
    111 __inline int32 mla724(int32 op1, int32 op2, int32 op3)
    112 {
    113     int32 out;
    114 
    115     __asm
    116     {
    117         smlabb out, op1, op2, op3
    118     }
    119 
    120     return out;
    121 }
    122 
    123 __inline int32 mla392(int32 k0, int32 k14, int32 round)
    124 {
    125     int32 k1;
    126 
    127     __asm
    128     {
    129         smlabt k1, k0, k14, round
    130     }
    131 
    132     return k1;
    133 }
    134 
    135 __inline int32 mla554(int32 k4, int32 k12, int32 k1)
    136 {
    137     int32 k0;
    138 
    139     __asm
    140     {
    141         smlabt k0, k4, k12, k1
    142     }
    143 
    144     return k0;
    145 }
    146 
    147 __inline int32 mla1338(int32 k6, int32 k14, int32 k1)
    148 {
    149     int32 out;
    150 
    151     __asm
    152     {
    153         smlabb out, k6, k14, k1
    154     }
    155 
    156     return out;
    157 }
    158 
    159 __inline int32 mla946(int32 k6, int32 k14, int32 k1)
    160 {
    161     int32 out;
    162 
    163     __asm
    164     {
    165         smlabb out, k6, k14, k1
    166     }
    167 
    168     return out;
    169 }
    170 
    171 #else // not ARM5TE
    172 
    173 
    174 __inline int32 mla724(int32 op1, int32 op2, int32 op3)
    175 {
    176     int32 out;
    177 
    178     __asm
    179     {
    180         and out, op2, #0xFFFF
    181         mla out, op1, out, op3
    182     }
    183 
    184     return out;
    185 }
    186 
    187 __inline int32 mla392(int32 k0, int32 k14, int32 round)
    188 {
    189     int32 k1;
    190 
    191     __asm
    192     {
    193         mov k1, k14, asr #16
    194         mla k1, k0, k1, round
    195     }
    196 
    197     return k1;
    198 }
    199 
    200 __inline int32 mla554(int32 k4, int32 k12, int32 k1)
    201 {
    202     int32 k0;
    203 
    204     __asm
    205     {
    206         mov  k0, k12, asr #16
    207         mla k0, k4, k0, k1
    208     }
    209 
    210     return k0;
    211 }
    212 
    213 __inline int32 mla1338(int32 k6, int32 k14, int32 k1)
    214 {
    215     int32 out;
    216 
    217     __asm
    218     {
    219         and out, k14, 0xFFFF
    220         mla out, k6, out, k1
    221     }
    222 
    223     return out;
    224 }
    225 
    226 __inline int32 mla946(int32 k6, int32 k14, int32 k1)
    227 {
    228     int32 out;
    229 
    230     __asm
    231     {
    232         and out, k14, 0xFFFF
    233         mla out, k6, out, k1
    234     }
    235 
    236     return out;
    237 }
    238 
    239 #endif
    240 
    241 __inline int32 sum_abs(int32 k0, int32 k1, int32 k2, int32 k3,
    242                        int32 k4, int32 k5, int32 k6, int32 k7)
    243 {
    244     int32 carry, abs_sum;
    245     __asm
    246     {
    247         eor     carry, k0, k0, asr #31 ;
    248         eors    abs_sum, k1, k1, asr #31 ;
    249         adc     abs_sum, abs_sum, carry ;
    250         eors    carry,  k2, k2, asr #31 ;
    251         adc     abs_sum, abs_sum, carry ;
    252         eors    carry,  k3, k3, asr #31 ;
    253         adc     abs_sum, abs_sum, carry ;
    254         eors    carry,  k4, k4, asr #31 ;
    255         adc     abs_sum, abs_sum, carry ;
    256         eors    carry,  k5, k5, asr #31 ;
    257         adc     abs_sum, abs_sum, carry ;
    258         eors    carry,  k6, k6, asr #31 ;
    259         adc     abs_sum, abs_sum, carry ;
    260         eors    carry,  k7, k7, asr #31 ;
    261         adc     abs_sum, abs_sum, carry ;
    262     }
    263 
    264     return abs_sum;
    265 }
    266 
    267 #elif ( defined(PV_ARM_GCC_V5) || defined(PV_ARM_GCC_V4) )  /* ARM GNU COMPILER  */
    268 
    269 __inline int32 mla724(int32 op1, int32 op2, int32 op3)
    270 {
    271     register int32 out;
    272     register int32 aa = (int32)op1;
    273     register int32 bb = (int32)op2;
    274     register int32 cc = (int32)op3;
    275 
    276     asm volatile("smlabb %0, %1, %2, %3"
    277              : "=&r"(out)
    278                          : "r"(aa),
    279                          "r"(bb),
    280                          "r"(cc));
    281     return out;
    282 }
    283 
    284 
    285 __inline int32 mla392(int32 k0, int32 k14, int32 round)
    286 {
    287     register int32 out;
    288     register int32 aa = (int32)k0;
    289     register int32 bb = (int32)k14;
    290     register int32 cc = (int32)round;
    291 
    292     asm volatile("smlabt %0, %1, %2, %3"
    293              : "=&r"(out)
    294                          : "r"(aa),
    295                          "r"(bb),
    296                          "r"(cc));
    297 
    298     return out;
    299 }
    300 
    301 __inline int32 mla554(int32 k4, int32 k12, int32 k1)
    302 {
    303     register int32 out;
    304     register int32 aa = (int32)k4;
    305     register int32 bb = (int32)k12;
    306     register int32 cc = (int32)k1;
    307 
    308     asm volatile("smlabt %0, %1, %2, %3"
    309              : "=&r"(out)
    310                          : "r"(aa),
    311                          "r"(bb),
    312                          "r"(cc));
    313 
    314     return out;
    315 }
    316 
    317 __inline int32 mla1338(int32 k6, int32 k14, int32 k1)
    318 {
    319     register int32 out;
    320     register int32 aa = (int32)k6;
    321     register int32 bb = (int32)k14;
    322     register int32 cc = (int32)k1;
    323 
    324     asm volatile("smlabb %0, %1, %2, %3"
    325              : "=&r"(out)
    326                          : "r"(aa),
    327                          "r"(bb),
    328                          "r"(cc));
    329     return out;
    330 }
    331 
    332 __inline int32 mla946(int32 k6, int32 k14, int32 k1)
    333 {
    334     register int32 out;
    335     register int32 aa = (int32)k6;
    336     register int32 bb = (int32)k14;
    337     register int32 cc = (int32)k1;
    338 
    339     asm volatile("smlabb %0, %1, %2, %3"
    340              : "=&r"(out)
    341                          : "r"(aa),
    342                          "r"(bb),
    343                          "r"(cc));
    344     return out;
    345 }
    346 
    347 __inline int32 sum_abs(int32 k0, int32 k1, int32 k2, int32 k3,
    348                        int32 k4, int32 k5, int32 k6, int32 k7)
    349 {
    350     register int32 carry;
    351     register int32 abs_sum;
    352     register int32 aa = (int32)k0;
    353     register int32 bb = (int32)k1;
    354     register int32 cc = (int32)k2;
    355     register int32 dd = (int32)k3;
    356     register int32 ee = (int32)k4;
    357     register int32 ff = (int32)k5;
    358     register int32 gg = (int32)k6;
    359     register int32 hh = (int32)k7;
    360 
    361     asm volatile("eor  %0, %2, %2, asr #31\n\t"
    362                  "eors %1, %3, %3, asr #31\n\t"
    363                  "adc  %1, %1, %0\n\t"
    364                  "eors %0, %4, %4, asr #31\n\t"
    365                  "adc  %1, %1, %0\n\t"
    366                  "eors %0, %5, %5, asr #31\n\t"
    367                  "adc  %1, %1, %0\n\t"
    368                  "eors %0, %6, %6, asr #31\n\t"
    369                  "adc  %1, %1, %0\n\t"
    370                  "eors %0, %7, %7, asr #31\n\t"
    371                  "adc  %1, %1, %0\n\t"
    372                  "eors %0, %8, %8, asr #31\n\t"
    373                  "adc  %1, %1, %0\n\t"
    374                  "eors %0, %9, %9, asr #31\n\t"
    375                  "adc  %1, %1, %0\n\t"
    376 
    377              : "=&r"(carry),
    378                  "=&r"(abs_sum):
    379                          "r"(aa),
    380                          "r"(bb),
    381                          "r"(cc),
    382                          "r"(dd),
    383                          "r"(ee),
    384                          "r"(ff),
    385                          "r"(gg),
    386                          "r"(hh));
    387 
    388     return abs_sum;
    389 }
    390 
    391 #endif // Diff. OS
    392 
    393 #endif //_DCT_INLINE_H_
    394 
    395 
    396