Home | History | Annotate | Download | only in arm
      1 @/*****************************************************************************
      2 @*
      3 @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 @*
      5 @* Licensed under the Apache License, Version 2.0 (the "License");
      6 @* you may not use this file except in compliance with the License.
      7 @* You may obtain a copy of the License at:
      8 @*
      9 @* http://www.apache.org/licenses/LICENSE-2.0
     10 @*
     11 @* Unless required by applicable law or agreed to in writing, software
     12 @* distributed under the License is distributed on an "AS IS" BASIS,
     13 @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @* See the License for the specific language governing permissions and
     15 @* limitations under the License.
     16 @*
     17 @*****************************************************************************/
     18 @/**
     19 @/*******************************************************************************
     20 @* @file
     21 @*  ihevcd_fmt_conv_420sp_to_420sp.s
     22 @*
     23 @* @brief
     24 @*  contains function definitions for format conversions
     25 @*
     26 @* @author
     27 @*  ittiam
     28 @*
     29 @* @par list of functions:
     30 @*
     31 @*
     32 @* @remarks
     33 @*  none
     34 @*
     35 @*******************************************************************************/
     36     .equ DO1STROUNDING, 0
     37 
     38     @ ARM
     39     @
     40     @ PRESERVE8
     41 
     42 .text
     43 .p2align 2
     44 
     45 
     46 
     47 
     48 
     49 @/*****************************************************************************
     50 @*                                                                            *
     51 @*  Function Name    : ihevcd_fmt_conv_420sp_to_420sp()                       *
     52 @*                                                                            *
     53 @*  Description      : This function conversts the image from YUV420SP color  *
     54 @*                     space to 420SP color space(UV interleaved).            *
     55 @*                                                                            *
     56 @*  Arguments        : R0           pu1_y                                     *
     57 @*                     R1           pu1_uv                                    *
     58 @*                     R2           pu1_dest_y                                *
     59 @*                     R3           pu1_dest_uv                               *
     60 @*                     [R13 #40]    u2_width                                  *
     61 @*                     [R13 #44]    u2_height                                 *
     62 @*                     [R13 #48]    u2_stridey                                *
     63 @*                     [R13 #52]    u2_stridechroma                           *
     64 @*                     [R13 #56]    u2_dest_stridey                           *
     65 @*                     [R13 #60]    u2_dest_stridechroma                      *
     66 @*                                                                            *
     67 @*  Values Returned  : None                                                   *
     68 @*                                                                            *
     69 @*  Register Usage   : R0 - R14                                               *
     70 @*                                                                            *
     71 @*  Stack Usage      : 40 Bytes                                               *
     72 @*                                                                            *
     73 @*  Interruptibility : Interruptible                                          *
     74 @*                                                                            *
     75 @*  Known Limitations                                                         *
     76 @*       Assumptions: Image Width:     Assumed to be multiple of 2 and       *
     77 @*                     Image Height:    Assumed to be even.                   *
     78 @*                                                                            *
     79 @*  Revision History :                                                        *
     80 @*         DD MM YYYY   Author(s)       Changes (Describe the changes made)   *
     81 @*         16 05 2012   Naveen SR     draft                                   *
     82 @*                                                                            *
     83 @*****************************************************************************/
     84 
     85     .global ihevcd_fmt_conv_420sp_to_420sp_a9q
     86 .type ihevcd_fmt_conv_420sp_to_420sp_a9q, %function
     87 ihevcd_fmt_conv_420sp_to_420sp_a9q:
     88 
     89     STMFD       sp!,{r4-r12, lr}
     90 
     91 
     92     LDR         r5,[sp,#56]                 @//Load u2_dest_stridey
     93 
     94     LDR         r7,[sp,#48]                 @//Load u2_stridey
     95     LDR         r8,[sp,#40]                 @//Load u2_width
     96     LDR         r9,[sp,#44]                 @//Load u2_height
     97 
     98     SUB         r10,r7,r8                   @// Src Y increment
     99     SUB         r11,r5,r8                   @// Dst Y increment
    100 
    101     @/* Copy Y */
    102 
    103     MOV         r4,r9                       @// Copying height
    104 y_row_loop:
    105     MOV         r6,r8                       @// Copying width
    106 
    107 y_col_loop:
    108     PLD         [r0, #128]
    109     SUB         r6,r6,#32
    110     VLD1.8      D0,[r0]!
    111     VLD1.8      D1,[r0]!
    112     VLD1.8      D2,[r0]!
    113     VLD1.8      D3,[r0]!
    114     VST1.8      D0,[R2]!
    115     VST1.8      D1,[R2]!
    116     VST1.8      D2,[R2]!
    117     VST1.8      D3,[R2]!
    118     CMP         r6,#32
    119     BGE         y_col_loop
    120     CMP         r6,#0
    121     BEQ         y_col_loop_end
    122     @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
    123     @//Ex if width is 162, above loop will process 160 pixels. And
    124     @//Both source and destination will point to 146th pixel and then 16 bytes will be read
    125     @// and written using VLD1 and VST1
    126     RSB         r6,r6,#32
    127     SUB         r0,r0,r6
    128     SUB         R2,R2,r6
    129     VLD1.8      D0,[r0]!
    130     VLD1.8      D1,[r0]!
    131     VLD1.8      D2,[r0]!
    132     VLD1.8      D3,[r0]!
    133     VST1.8      D0,[R2]!
    134     VST1.8      D1,[R2]!
    135     VST1.8      D2,[R2]!
    136     VST1.8      D3,[R2]!
    137 
    138 y_col_loop_end:
    139     ADD         r0, r0, r10
    140     ADD         R2, R2, r11
    141     SUBS        r4, r4, #1
    142     BGT         y_row_loop
    143 
    144 
    145 
    146     @/* Copy UV */
    147 
    148     LDR         r5,[sp,#60]                 @//Load u2_dest_stridechroma
    149     LDR         r7,[sp,#52]                 @//Load u2_stridechroma
    150 
    151     MOV         r9,r9,LSR #1                @// height/2
    152 @   MOV     r8,r8,LSR #1            @// Width/2
    153 
    154     MOV         R2,R3                       @pu1_dest_uv
    155 
    156     SUB         r10,r7,r8                   @// Src UV increment
    157     SUB         r11,r5,r8                   @// Dst UV increment
    158 
    159     MOV         r4,r9                       @// Copying height
    160 uv_row_loop:
    161     MOV         r6,r8                       @// Copying width
    162 
    163 uv_col_loop:
    164 
    165     PLD         [r1, #128]
    166     SUB         r6,r6,#16
    167     VLD1.8      D0,[r1]!
    168     VLD1.8      D1,[r1]!
    169     VST1.8      D0,[R2]!
    170     VST1.8      D1,[R2]!
    171     CMP         r6,#16
    172     BGE         uv_col_loop
    173     CMP         r6,#0
    174     BEQ         u_col_loop_end
    175     @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
    176     @//Ex if width is 162, above loop will process 160 pixels. And
    177     @//Both source and destination will point to 146th pixel and then 16 bytes will be read
    178     @// and written using VLD1 and VST1
    179     RSB         r6,r6,#16
    180     SUB         r1,r1,r6
    181     SUB         R2,R2,r6
    182     VLD1.8      D0, [r1]!
    183     VLD1.8      D1, [r1]!
    184     VST1.8      D0, [R2]!
    185     VST1.8      D1, [R2]!
    186 
    187 u_col_loop_end:
    188     ADD         r1, r1, r10
    189     ADD         R2, R2, r11
    190     SUBS        r4, r4, #1
    191     BGT         uv_row_loop
    192 
    193 exit:
    194     LDMFD       sp!,{r4-r12, pc}
    195 
    196 
    197     .section .note.GNU-stack,"",%progbits
    198 
    199