Home | History | Annotate | Download | only in arm
      1 @/*****************************************************************************
      2 @*
      3 @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 @*
      5 @* Licensed under the Apache License, Version 2.0 (the "License");
      6 @* you may not use this file except in compliance with the License.
      7 @* You may obtain a copy of the License at:
      8 @*
      9 @* http://www.apache.org/licenses/LICENSE-2.0
     10 @*
     11 @* Unless required by applicable law or agreed to in writing, software
     12 @* distributed under the License is distributed on an "AS IS" BASIS,
     13 @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @* See the License for the specific language governing permissions and
     15 @* limitations under the License.
     16 @*
     17 @*****************************************************************************/
     18 @/**
     19 @/*******************************************************************************
     20 @* @file
     21 @*  ihevcd_itrans_recon_dc_luma.s
     22 @*
     23 @* @brief
     24 @*  contains function definitions itrans and recon for dc only case
     25 @*
     26 @* @author
     27 @*  ittiam
     28 @*
     29 @* @par list of functions:
     30 @*
     31 @*
     32 @* @remarks
     33 @*  none
     34 @*
     35 @*******************************************************************************/
     36 
     37 .text
     38 
     39 
     40 
     41 .globl ihevcd_itrans_recon_dc_luma_a9q
     42 
     43 .type ihevcd_itrans_recon_dc_luma_a9q, %function
     44 
     45 ihevcd_itrans_recon_dc_luma_a9q:
     46 
     47 @void ihevcd_itrans_recon_dc_luma(uword8 *pu1_pred,
     48 @                            uword8 *pu1_dst,
     49 @                            word32 pred_strd,
     50 @                            word32 dst_strd,
     51 @                            word32 log2_trans_size,
     52 @                            word16 i2_coeff_value)
     53 
     54 @r0:pu1_pred
     55 @r1:pu1_dest
     56 @r2:pred_strd
     57 @r3:dst_strd
     58 
     59 
     60 
     61     push        {r0-r11,lr}
     62     ldr         r4,[sp,#0x34]               @loads log2_trans_size
     63     ldr         r5,[sp,#0x38]               @ loads i2_coeff_value
     64 
     65     mov         r10,#1
     66     lsl         r4,r10,r4                   @    trans_size = (1 << log2_trans_size)@
     67     mov         r6,#64 @ 1 << (shift1 - 1)@
     68     mov         r7,#2048                    @ 1<<(shift2-1)
     69 
     70     add         r8,r6,r5,lsl #6
     71     ssat        r8,#16,r8,asr #7
     72     add         r5,r7,r8,lsl #6
     73     ssat        r6,#16,r5,asr #12
     74     mov         r9,r4
     75     mov         r8,r4
     76 
     77     @ r6 has the dc_value
     78     @ r4 has the trans_size value
     79     @ r8 has the row value
     80     @ r9 has the col value
     81     vdup.s16    q0,r6
     82     cmp         r4,#4
     83     beq         row_loop_4
     84 
     85 
     86 row_loop:
     87     mov         r9,r4
     88 
     89 
     90 col_loop:
     91 
     92     mov         r7,r0
     93     vld1.8      d2,[r7],r2
     94     vld1.8      d3,[r7],r2
     95     vld1.8      d4,[r7],r2
     96     vld1.8      d5,[r7],r2
     97 
     98     vld1.8      d6,[r7],r2
     99     vld1.8      d7,[r7],r2
    100     vld1.8      d8,[r7],r2
    101     vld1.8      d9,[r7]
    102 
    103     add         r0,r0,#8
    104 
    105 
    106     vaddw.u8    q15,q0,d2
    107     vaddw.u8    q14,q0,d3
    108     vaddw.u8    q13,q0,d4
    109     vaddw.u8    q12,q0,d5
    110     vaddw.u8    q11,q0,d6
    111     vaddw.u8    q10,q0,d7
    112     vaddw.u8    q9,q0,d8
    113     vaddw.u8    q8,q0,d9
    114 
    115     mov         r11,r1
    116     vqmovun.s16 d2,q15
    117     vqmovun.s16 d3,q14
    118     vqmovun.s16 d4,q13
    119     vqmovun.s16 d5,q12
    120     vqmovun.s16 d6,q11
    121     vqmovun.s16 d7,q10
    122     vqmovun.s16 d8,q9
    123     vqmovun.s16 d9,q8
    124 
    125 
    126     vst1.u32    {d2},[r11],r3
    127     vst1.u32    {d3},[r11],r3
    128     vst1.u32    {d4},[r11],r3
    129     vst1.u32    {d5},[r11],r3
    130     vst1.u32    {d6},[r11],r3
    131     vst1.u32    {d7},[r11],r3
    132     vst1.u32    {d8},[r11],r3
    133     vst1.u32    {d9},[r11]
    134 
    135     add         r1,r1,#8
    136 
    137     subs        r9,r9,#8
    138     bgt         col_loop
    139 
    140     subs        r8,r8,#8
    141 
    142     add         r0,r0,r2,lsl #3
    143     add         r1,r1,r3,lsl #3
    144     sub         r0,r0,r4
    145     sub         r1,r1,r4
    146     bgt         row_loop
    147     b           end_loops
    148 
    149 
    150 row_loop_4:
    151     mov         r9,r10
    152 
    153 
    154 col_loop_4:
    155 
    156 
    157     vld1.8      d2,[r0],r2
    158     vld1.8      d3,[r0],r2
    159     vld1.8      d4,[r0],r2
    160     vld1.8      d5,[r0]
    161 
    162 
    163 
    164 
    165     vaddw.u8    q15,q0,d2
    166     vaddw.u8    q14,q0,d3
    167     vaddw.u8    q13,q0,d4
    168     vaddw.u8    q12,q0,d5
    169 
    170 
    171 
    172     vqmovun.s16 d2,q15
    173     vqmovun.s16 d3,q14
    174     vqmovun.s16 d4,q13
    175     vqmovun.s16 d5,q12
    176 
    177 
    178 
    179     vst1.u32    {d2[0]},[r1],r3
    180     vst1.u32    {d3[0]},[r1],r3
    181     vst1.u32    {d4[0]},[r1],r3
    182     vst1.u32    {d5[0]},[r1]
    183 
    184 end_loops:
    185     pop         {r0-r11,pc}
    186 
    187 
    188 
    189 
    190 
    191 
    192 
    193 
    194