Home | History | Annotate | Download | only in arm
      1 @/*****************************************************************************
      2 @*
      3 @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 @*
      5 @* Licensed under the Apache License, Version 2.0 (the "License");
      6 @* you may not use this file except in compliance with the License.
      7 @* You may obtain a copy of the License at:
      8 @*
      9 @* http://www.apache.org/licenses/LICENSE-2.0
     10 @*
     11 @* Unless required by applicable law or agreed to in writing, software
     12 @* distributed under the License is distributed on an "AS IS" BASIS,
     13 @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @* See the License for the specific language governing permissions and
     15 @* limitations under the License.
     16 @*
     17 @*****************************************************************************/
     18 @/**
     19 @/*******************************************************************************
     20 @* @file
     21 @*  ihevcd_itrans_recon_dc_chroma.s
     22 @*
     23 @* @brief
     24 @*  contains function definitions itrans and recon for dc only case
     25 @*
     26 @* @author
     27 @*  ittiam
     28 @*
     29 @* @par list of functions:
     30 @*
     31 @*
     32 @* @remarks
     33 @*  none
     34 @*
     35 @*******************************************************************************/
     36 
     37 .text
     38 
     39 
     40 .globl ihevcd_itrans_recon_dc_chroma_a9q
     41 
     42 .type ihevcd_itrans_recon_dc_chroma_a9q, %function
     43 
     44 ihevcd_itrans_recon_dc_chroma_a9q:
     45 
     46 @void ihevcd_itrans_recon_dc_chroma(uword8 *pu1_pred,
     47 @                            uword8 *pu1_dst,
     48 @                            word32 pred_strd,
     49 @                            word32 dst_strd,
     50 @                            word32 log2_trans_size,
     51 @                            word16 i2_coeff_value)
     52 
     53 @r0:pu1_pred
     54 @r1:pu1_dest
     55 @r2:pred_strd
     56 @r3:dst_strd
     57 
     58 
     59 
     60     push        {r0-r11,lr}
     61     ldr         r4,[sp,#0x34]               @loads log2_trans_size
     62     ldr         r5,[sp,#0x38]               @ loads i2_coeff_value
     63 
     64     mov         r10,#1
     65     lsl         r4,r10,r4                   @    trans_size = (1 << log2_trans_size)@
     66     mov         r6,#64 @ 1 << (shift1 - 1)@
     67     mov         r7,#2048                    @ 1<<(shift2-1)
     68 
     69     add         r8,r6,r5,lsl #6
     70     ssat        r8,#16,r8,asr #7
     71     add         r5,r7,r8,lsl #6
     72     ssat        r6,#16,r5,asr #12
     73     mov         r9,r4
     74     mov         r8,r4
     75 
     76     @ r6 has the dc_value
     77     @ r4 has the trans_size value
     78     @ r8 has the row value
     79     @ r9 has the col value
     80     vdup.s16    q0,r6
     81     cmp         r4,#4
     82     beq         row_loop_4chroma
     83 
     84 
     85 row_loop_chroma:
     86     mov         r9,r4
     87 
     88 
     89 col_loop_chroma:
     90 
     91     mov         r7,r0
     92     vld2.8      {d2,d3},[r7],r2
     93     vld2.8      {d4,d5},[r7],r2
     94     vld2.8      {d6,d7},[r7],r2
     95     vld2.8      {d8,d9},[r7],r2
     96 
     97     vld2.8      {d10,d11},[r7],r2
     98     vld2.8      {d12,d13},[r7],r2
     99     vld2.8      {d14,d15},[r7],r2
    100     vld2.8      {d16,d17},[r7]
    101 
    102     add         r0,r0,#16
    103 
    104 
    105     vaddw.u8    q15,q0,d2
    106     vaddw.u8    q14,q0,d4
    107     vaddw.u8    q13,q0,d6
    108     vaddw.u8    q12,q0,d8
    109     vaddw.u8    q11,q0,d10
    110     vaddw.u8    q10,q0,d12
    111     vaddw.u8    q9,q0,d14
    112 
    113 
    114     mov         r11,r1
    115     vqmovun.s16 d2,q15
    116     vqmovun.s16 d4,q14
    117     vqmovun.s16 d6,q13
    118     vqmovun.s16 d8,q12
    119 
    120     vaddw.u8    q15,q0,d16
    121 
    122     vqmovun.s16 d10,q11
    123     vqmovun.s16 d12,q10
    124     vqmovun.s16 d14,q9
    125     vqmovun.s16 d16,q15
    126 
    127     vst2.8      {d2,d3},[r11],r3
    128     vst2.8      {d4,d5},[r11],r3
    129     vst2.8      {d6,d7},[r11],r3
    130     vst2.8      {d8,d9},[r11],r3
    131 
    132     vst2.8      {d10,d11},[r11],r3
    133     vst2.8      {d12,d13},[r11],r3
    134     vst2.8      {d14,d15},[r11],r3
    135     vst2.8      {d16,d17},[r11]
    136 
    137     add         r1,r1,#16
    138 
    139     subs        r9,r9,#8
    140     bgt         col_loop_chroma
    141 
    142     subs        r8,r8,#8
    143 
    144     add         r0,r0,r2,lsl #3
    145     add         r1,r1,r3,lsl #3
    146     sub         r0,r0,r4,lsl #1
    147     sub         r1,r1,r4,lsl #1
    148     bgt         row_loop_chroma
    149     b           end_loops_chroma
    150 
    151 
    152 row_loop_4chroma:
    153     mov         r9,r10
    154 
    155 
    156 col_loop_4chroma:
    157 
    158 
    159     vld2.8      {d2,d3},[r0],r2
    160     vld2.8      {d4,d5},[r0],r2
    161     vld2.8      {d6,d7},[r0],r2
    162     vld2.8      {d8,d9},[r0]
    163 
    164 
    165 
    166 
    167     vaddw.u8    q15,q0,d2
    168     vaddw.u8    q14,q0,d4
    169     vaddw.u8    q13,q0,d6
    170     vaddw.u8    q12,q0,d8
    171 
    172 
    173 
    174     vqmovun.s16 d2,q15
    175     vqmovun.s16 d4,q14
    176     vqmovun.s16 d6,q13
    177     vqmovun.s16 d8,q12
    178 
    179 
    180     vzip.8      d2,d3
    181     vzip.8      d4,d5
    182     vzip.8      d6,d7
    183     vzip.8      d8,d9
    184 
    185     vst1.u32    {d2},[r1],r3
    186     vst1.u32    {d4},[r1],r3
    187     vst1.u32    {d6},[r1],r3
    188     vst1.u32    {d8},[r1]
    189 
    190 end_loops_chroma:
    191     pop         {r0-r11,pc}
    192 
    193 
    194