Home | History | Annotate | Download | only in ARMV7
      1 @/*
      2 @ ** Copyright 2003-2010, VisualOn, Inc.
      3 @ **
      4 @ ** Licensed under the Apache License, Version 2.0 (the "License");
      5 @ ** you may not use this file except in compliance with the License.
      6 @ ** You may obtain a copy of the License at
      7 @ **
      8 @ **     http://www.apache.org/licenses/LICENSE-2.0
      9 @ **
     10 @ ** Unless required by applicable law or agreed to in writing, software
     11 @ ** distributed under the License is distributed on an "AS IS" BASIS,
     12 @ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 @ ** See the License for the specific language governing permissions and
     14 @ ** limitations under the License.
     15 @ */
     16 @
     17 @**********************************************************************/
     18 @Word32 Dot_product12(                      /* (o) Q31: normalized result (1 < val <= -1) */
     19 @       Word16 x[],                           /* (i) 12bits: x vector                       */
     20 @       Word16 y[],                           /* (i) 12bits: y vector                       */
     21 @       Word16 lg,                            /* (i)    : vector length                     */
     22 @       Word16 * exp                          /* (o)    : exponent of result (0..+30)       */
     23 @)
     24 @************************************************************************
     25 @  x[]   ---  r0
     26 @  y[]   ---  r1
     27 @  lg    ---  r2
     28 @  *exp  ---  r3
     29 
     30           .section   .text
     31           .global    Dot_product12_asm
     32 
     33 Dot_product12_asm:
     34 
     35           STMFD   	    r13!, {r4 - r12, r14}
     36 	  CMP               r0, r1
     37 	  BEQ               LOOP_EQ
     38 
     39           VLD1.S16          {Q0, Q1}, [r0]!               @load 16 Word16 x[]
     40           VLD1.S16          {Q2, Q3}, [r0]!               @load 16 Word16 x[]
     41           VLD1.S16          {Q4, Q5}, [r0]!               @load 16 Word16 x[]
     42           VLD1.S16          {Q6, Q7}, [r0]!               @load 16 Word16 x[]
     43 	  VLD1.S16          {Q8, Q9}, [r1]!               @load 16 Word16 y[]
     44 	  VLD1.S16          {Q10, Q11}, [r1]!             @load 16 Word16 y[]
     45 	  VLD1.S16          {Q12, Q13}, [r1]!             @load 16 Word16 y[]
     46 
     47           VMULL.S16         Q15, D16, D0
     48           VMLAL.S16         Q15, D17, D1
     49           VMLAL.S16         Q15, D18, D2
     50           VMLAL.S16         Q15, D19, D3
     51 	  VLD1.S16          {Q0, Q1}, [r1]!               @load 16 Word16 y[]
     52           VMLAL.S16         Q15, D20, D4
     53           VMLAL.S16         Q15, D21, D5
     54           VMLAL.S16         Q15, D22, D6
     55           VMLAL.S16         Q15, D23, D7
     56           VMLAL.S16         Q15, D24, D8
     57           VMLAL.S16         Q15, D25, D9
     58           VMLAL.S16         Q15, D26, D10
     59           VMLAL.S16         Q15, D27, D11
     60           VMLAL.S16         Q15, D0, D12
     61           VMLAL.S16         Q15, D1, D13
     62           VMLAL.S16         Q15, D2, D14
     63           VMLAL.S16         Q15, D3, D15
     64 
     65           CMP               r2, #64
     66           BEQ               Lable1
     67           VLD1.S16          {Q0, Q1}, [r0]!               @load 16 Word16 x[]
     68 	  VLD1.S16          {Q2, Q3}, [r1]!
     69           VMLAL.S16         Q15, D4, D0
     70           VMLAL.S16         Q15, D5, D1
     71           VMLAL.S16         Q15, D6, D2
     72           VMLAL.S16         Q15, D7, D3
     73 	  BL                Lable1
     74 
     75 LOOP_EQ:
     76           VLD1.S16          {Q0, Q1}, [r0]!
     77 	  VLD1.S16          {Q2, Q3}, [r0]!
     78 	  VLD1.S16          {Q4, Q5}, [r0]!
     79 	  VLD1.S16          {Q6, Q7}, [r0]!
     80 	  VMULL.S16         Q15, D0, D0
     81 	  VMLAL.S16         Q15, D1, D1
     82 	  VMLAL.S16         Q15, D2, D2
     83 	  VMLAL.S16         Q15, D3, D3
     84 	  VMLAL.S16         Q15, D4, D4
     85 	  VMLAL.S16         Q15, D5, D5
     86 	  VMLAL.S16         Q15, D6, D6
     87 	  VMLAL.S16         Q15, D7, D7
     88 	  VMLAL.S16         Q15, D8, D8
     89 	  VMLAL.S16         Q15, D9, D9
     90 	  VMLAL.S16         Q15, D10, D10
     91 	  VMLAL.S16         Q15, D11, D11
     92 	  VMLAL.S16         Q15, D12, D12
     93 	  VMLAL.S16         Q15, D13, D13
     94 	  VMLAL.S16         Q15, D14, D14
     95 	  VMLAL.S16         Q15, D15, D15
     96 
     97 	  CMP               r2, #64
     98 	  BEQ               Lable1
     99 	  VLD1.S16          {Q0, Q1}, [r0]!
    100 	  VMLAL.S16         Q15, D0, D0
    101 	  VMLAL.S16         Q15, D1, D1
    102 	  VMLAL.S16         Q15, D2, D2
    103 	  VMLAL.S16         Q15, D3, D3
    104 
    105 Lable1:
    106 
    107           VQADD.S32         D30, D30, D31
    108           VPADD.S32         D30, D30, D30
    109           VMOV.S32          r12, D30[0]
    110 
    111 	  ADD               r12, r12, r12
    112           ADD               r12, r12, #1                         @ L_sum = (L_sum << 1)  + 1
    113 	  MOV               r4, r12
    114 	  CMP               r12, #0
    115 	  RSBLT             r4, r12, #0
    116           CLZ               r10, r4
    117           SUB               r10, r10, #1                         @ sft = norm_l(L_sum)
    118           MOV               r0, r12, LSL r10                     @ L_sum = L_sum << sft
    119           RSB               r11, r10, #30                        @ *exp = 30 - sft
    120           STRH              r11, [r3]
    121 
    122 Dot_product12_end:
    123 
    124           LDMFD   	    r13!, {r4 - r12, r15}
    125 
    126           .END
    127 
    128