Home | History | Annotate | Download | only in arm
      1 @/*****************************************************************************
      2 @*
      3 @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 @*
      5 @* Licensed under the Apache License, Version 2.0 (the "License");
      6 @* you may not use this file except in compliance with the License.
      7 @* You may obtain a copy of the License at:
      8 @*
      9 @* http://www.apache.org/licenses/LICENSE-2.0
     10 @*
     11 @* Unless required by applicable law or agreed to in writing, software
     12 @* distributed under the License is distributed on an "AS IS" BASIS,
     13 @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @* See the License for the specific language governing permissions and
     15 @* limitations under the License.
     16 @*
     17 @*****************************************************************************/
     18 @/**
     19 @/*******************************************************************************
     20 @* @file
     21 @*  ihevcd_fmt_conv_420sp_to_420p.s
     22 @*
     23 @* @brief
     24 @*  contains function definitions for format conversions
     25 @*
     26 @* @author
     27 @*  ittiam
     28 @*
     29 @* @par list of functions:
     30 @*
     31 @*
     32 @* @remarks
     33 @*  none
     34 @*
     35 @*******************************************************************************/
     36 
     37 
     38 
     39 
     40 
     41 
     42 
     43 
     44 .text
     45 
     46 
     47 
     48 
     49 
     50 @/*****************************************************************************
     51 @*                                                                            *
     52 @*  Function Name    : neon_copy_yuv420sp_to_yuv420p()                       *
     53 @*                                                                            *
     54 @*  Description      : This function conversts the image from YUV420sP color  *
     55 @*                     space to 420SP color space(UV interleaved).            *
     56 @*                                                                            *
     57 @*  Arguments        : R0           pu1_src_y                                 *
     58 @*                     R1           pu1_src_uv                                *
     59 @*                     R2           pu1_dest_y                                *
     60 @*                     R3           pu1_dest_u                               *
     61 @*                     [R13 #40]    pu1_dest_v                               *
     62 @*                     [R13 #44]    u2_width                                 *
     63 @*                     [R13 #48]    u2_height                                   *
     64 @*                     [R13 #52]    u2_stridey                                *
     65 @*                     [R13 #56]    u2_strideuv                               *
     66 @*                     [R13 #60]    u2_dest_stridey                           *
     67 @*                     [R13 #64]    u2_dest_strideuv                          *
     68 @*                     [R13 #68]    is_u_first                                *
     69 @*                     [R13 #72]    disable_luma_copy                         *
     70 @*                                                                            *
     71 @*  Values Returned  : None                                                   *
     72 @*                                                                            *
     73 @*  Register Usage   : R0 - R14                                               *
     74 @*                                                                            *
     75 @*  Stack Usage      : 40 Bytes                                               *
     76 @*                                                                            *
     77 @*  Interruptibility : Interruptible                                          *
     78 @*                                                                            *
     79 @*  Known Limitations                                                         *
     80 @*       Assumptions: Image Width:     Assumed to be multiple of 2 and       *
     81 @*                     Image Height:    Assumed to be even.                   *
     82 @*                                                                            *
     83 @*  Revision History :                                                        *
     84 @*         DD MM YYYY   Author(s)       Changes (Describe the changes made)   *
     85 @*         16 05 2012   Naveen SR     draft                                   *
     86 @*                                                                            *
     87 @*****************************************************************************/
     88 
     89 .globl ihevcd_fmt_conv_420sp_to_420p_a9q
     90 
     91 .type ihevcd_fmt_conv_420sp_to_420p_a9q, %function
     92 
     93 ihevcd_fmt_conv_420sp_to_420p_a9q:
     94     STMFD       sp!,{r4-r12, lr}
     95 
     96     LDR         r5,[sp,#60]                 @//Load u2_dest_stridey
     97 @   LDR     r6,[sp,#56]             @//Load u2_strideuv
     98     LDR         r7,[sp,#52]                 @//Load u2_stridey
     99     LDR         r8,[sp,#44]                 @//Load u2_width
    100     LDR         r9,[sp,#48]                 @//Load u2_height
    101 
    102     SUB         r10,r7,r8                   @// Src Y increment
    103     SUB         r11,r5,r8                   @// Dst Y increment
    104 
    105     LDR         r5,[sp,#72]                 @//Load disable_luma_copy flag
    106     CMP         r5,#0                       @//skip luma if disable_luma_copy is non-zero
    107     BNE         uv_copy_start
    108 
    109     @/* Copy Y */
    110 
    111     MOV         r4,r9                       @// Copying height
    112 y_row_loop:
    113     MOV         r6,r8                       @// Copying width
    114 
    115 y_col_loop:
    116 
    117     SUB         r6,r6,#16
    118     vld1.8      {d0,d1},[r0]!
    119     vst1.8      {d0,d1},[r2]!
    120     CMP         r6,#16
    121     BGE         y_col_loop
    122     CMP         r6,#0
    123     BEQ         y_col_loop_end
    124     @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
    125     @//Ex if width is 162, above loop will process 160 pixels. And
    126     @//Both source and destination will point to 146th pixel and then 16 bytes will be read
    127     @// and written using VLD1 and VST1
    128     RSB         r6,r6,#16
    129     SUB         r0,r0,r6
    130     SUB         r2,r2,r6
    131     vld1.8      {d0,d1}, [r0]!
    132     vst1.8      {d0,d1}, [r2]!
    133 
    134 y_col_loop_end:
    135     ADD         r0, r0, r10
    136     ADD         r2, r2, r11
    137     SUBS        r4, r4, #1
    138     BGT         y_row_loop
    139 
    140 
    141     @/* Copy UV */
    142 uv_copy_start:
    143 
    144     LDR         r5,[sp,#64]                 @//Load u2_dest_strideuv
    145     LDR         r7,[sp,#56]                 @//Load u2_strideuv
    146 
    147     MOV         r9,r9,LSR #1                @// height/2
    148 @   MOV     r8,r8,LSR #1            @// Width/2
    149 
    150     SUB         r10,r7,r8                   @// Src UV increment
    151     MOV         r11,r8,LSR #1
    152     SUB         r11,r5,r11                  @// Dst U and V increment
    153 
    154     LDR         r5,[sp,#40]                 @//Load pu1_dest_v
    155 
    156     LDR         r4,[sp,#68]                 @//Load is_u_first_flag
    157     CMP         r4,#0                       @//Swap U and V dest if is_u_first_flag is zero
    158     MOVEQ       r4,r5
    159     MOVEQ       r5,r3
    160     MOVEQ       r3,r4
    161 
    162     MOV         r4,r9                       @// Copying height
    163 uv_row_loop:
    164     MOV         r6,r8                       @// Copying width
    165 
    166 uv_col_loop:
    167 
    168     SUB         r6,r6,#16
    169 
    170     PLD         [r1,#128]
    171     vld2.8      {d0,d1},[r1]!
    172     VST1.8      D0,[r3]!
    173     VST1.8      D1,[r5]!
    174     CMP         r6,#16
    175     BGE         uv_col_loop
    176     CMP         r6,#0
    177     BEQ         uv_col_loop_end
    178     @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
    179     @//Ex if width is 162, above loop will process 160 pixels. And
    180     @//Both source and destination will point to 146th pixel and then 16 bytes will be read
    181     @// and written using VLD1 and VST1
    182     RSB         r6,r6,#16
    183     SUB         r1,r1,r6
    184     SUB         r3,r3,r6,LSR #1
    185     SUB         r5,r5,r6,LSR #1
    186     vld2.8      {d0,d1}, [r1]!
    187     VST1.8      D0, [r3]!
    188     VST1.8      D1, [r5]!
    189 uv_col_loop_end:
    190     ADD         r1, r1, r10
    191     ADD         r3, r3, r11
    192     ADD         r5, r5, r11
    193     SUBS        r4, r4, #1
    194     BGT         uv_row_loop
    195 
    196 exit:
    197     LDMFD       sp!,{r4-r12, pc}
    198 
    199 
    200 
    201 
    202 
    203 
    204