Home | History | Annotate | Download | only in libswconverter
      1 /*
      2  *
      3  * Copyright 2012 Samsung Electronics S.LSI Co. LTD
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License")
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at
      8  *
      9  *      http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 /*
     19  * @file    csc_linear_to_tiled_crop_neon.s
     20  * @brief   SEC_OMX specific define
     21  * @author  ShinWon Lee (shinwon.lee (at) samsung.com)
     22  * @version 1.0
     23  * @history
     24  *   2012.02.01 : Create
     25  */
     26 
     27 /*
     28  * Interleave src1, src2 to dst
     29  *
     30  * @param dest
     31  *   dst address[out]
     32  *
     33  * @param src1
     34  *   src1 address[in]
     35  *
     36  * @param src2
     37  *   src2 address[in]
     38  *
     39  * @param src_size
     40  *   src_size or src1
     41  */
     42 
     43     .arch armv7-a
     44     .text
     45     .global csc_interleave_memcpy_neon
     46     .type   csc_interleave_memcpy_neon, %function
     47 csc_interleave_memcpy_neon:
     48     .fnstart
     49 
     50     @r0     dest
     51     @r1     src1
     52     @r2     src2
     53     @r3     src_size
     54     @r4
     55     @r5
     56     @r6
     57     @r7
     58     @r8     temp1
     59     @r9     temp2
     60     @r10    dest_addr
     61     @r11    src1_addr
     62     @r12    src2_addr
     63     @r14    i
     64 
     65     stmfd       sp!, {r8-r12,r14}       @ backup registers
     66 
     67     mov         r10, r0
     68     mov         r11, r1
     69     mov         r12, r2
     70     mov         r14, r3
     71 
     72     cmp         r14, #128
     73     blt         LESS_THAN_128
     74 
     75 LOOP_128:
     76     vld1.8      {q0}, [r11]!
     77     vld1.8      {q2}, [r11]!
     78     vld1.8      {q4}, [r11]!
     79     vld1.8      {q6}, [r11]!
     80     vld1.8      {q8}, [r11]!
     81     vld1.8      {q10}, [r11]!
     82     vld1.8      {q12}, [r11]!
     83     vld1.8      {q14}, [r11]!
     84     vld1.8      {q1}, [r12]!
     85     vld1.8      {q3}, [r12]!
     86     vld1.8      {q5}, [r12]!
     87     vld1.8      {q7}, [r12]!
     88     vld1.8      {q9}, [r12]!
     89     vld1.8      {q11}, [r12]!
     90     vld1.8      {q13}, [r12]!
     91     vld1.8      {q15}, [r12]!
     92 
     93     vst2.8      {q0, q1}, [r10]!
     94     vst2.8      {q2, q3}, [r10]!
     95     vst2.8      {q4, q5}, [r10]!
     96     vst2.8      {q6, q7}, [r10]!
     97     vst2.8      {q8, q9}, [r10]!
     98     vst2.8      {q10, q11}, [r10]!
     99     vst2.8      {q12, q13}, [r10]!
    100     vst2.8      {q14, q15}, [r10]!
    101 
    102     sub         r14, #128
    103     cmp         r14, #128
    104     bgt         LOOP_128
    105 
    106 LESS_THAN_128:
    107     cmp         r14, #0
    108     beq         RESTORE_REG
    109 
    110 LOOP_1:
    111     ldrb        r8, [r11], #1
    112     ldrb        r9, [r12], #1
    113     strb        r8, [r10], #1
    114     strb        r9, [r10], #1
    115     subs        r14, #1
    116     bne         LOOP_1
    117 
    118 RESTORE_REG:
    119     ldmfd       sp!, {r8-r12,r15}       @ restore registers
    120     .fnend
    121