Home | History | Annotate | Download | only in neon
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT  |vp8_fast_quantize_b_neon_func|
     13 
     14     ARM
     15     REQUIRE8
     16     PRESERVE8
     17 
     18     AREA ||.text||, CODE, READONLY, ALIGN=2
     19 
     20 ; r0        short *coeff_ptr
     21 ; r1        short *zbin_ptr
     22 ; r2        short *qcoeff_ptr
     23 ; r3        short *dqcoeff_ptr
     24 ; stack     short *dequant_ptr
     25 ; stack     short *scan_mask
     26 ; stack     short *round_ptr
     27 ; stack     short *quant_ptr
     28 
     29 ; return    int * eob
     30 |vp8_fast_quantize_b_neon_func| PROC
     31     vld1.16         {q0, q1}, [r0]              ;load z
     32     vld1.16         {q10, q11}, [r1]            ;load zbin
     33 
     34     vabs.s16        q4, q0                      ;calculate x = abs(z)
     35     vabs.s16        q5, q1
     36 
     37     vcge.s16        q10, q4, q10                ;x>=zbin
     38     vcge.s16        q11, q5, q11
     39 
     40     ;if x<zbin (q10 & q11 are all 0), go to zero_output
     41     vorr.s16        q6, q10, q11
     42     vorr.s16        d12, d12, d13
     43     vmov            r0, r1, d12
     44     orr             r0, r0, r1
     45     cmp             r0, #0
     46     beq             zero_output
     47 
     48     ldr             r0, [sp, #8]                ;load round_ptr
     49     ldr             r12, [sp, #12]              ;load quant_ptr
     50 
     51     ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
     52     vshr.s16        q2, q0, #15                 ; sz
     53     vshr.s16        q3, q1, #15
     54 
     55     vld1.s16        {q6, q7}, [r0]              ;load round_ptr [0-15]
     56     vld1.s16        {q8, q9}, [r12]             ;load quant_ptr [0-15]
     57 
     58     vadd.s16        q4, q6                      ;x + Round
     59     vadd.s16        q5, q7
     60 
     61     ldr             r0, [sp, #4]                ;load rvsplus1_scan_order ptr
     62 
     63     vqdmulh.s16     q4, q8                      ;y = ((Round + abs(z)) * Quant) >> 16
     64     vqdmulh.s16     q5, q9
     65 
     66     vld1.16         {q0, q1}, [r0]              ;load rvsplus1_scan_order
     67     vceq.s16        q8, q8                      ;set q8 to all 1
     68 
     69     vshr.s16        q4, #1                      ;right shift 1 after vqdmulh
     70     vshr.s16        q5, #1
     71 
     72     ;modify data to have its original sign
     73     veor.s16        q4, q2                      ; y^sz
     74     veor.s16        q5, q3
     75 
     76     ldr             r12, [sp]                   ;load dequant_ptr
     77 
     78     vsub.s16        q4, q2                      ; x1 = (y^sz) - sz = (y^sz) - (-1) (two's complement)
     79     vsub.s16        q5, q3
     80 
     81     vand.s16        q4, q10                     ;mask off x1 elements
     82     vand.s16        q5, q11
     83 
     84     vld1.s16        {q6, q7}, [r12]             ;load dequant_ptr[i]
     85 
     86     vtst.16         q14, q4, q8                 ;now find eob
     87     vtst.16         q15, q5, q8                 ;non-zero element is set to all 1 in q4, q5
     88 
     89     vst1.s16        {q4, q5}, [r2]              ;store: qcoeff = x1
     90 
     91     vand            q0, q0, q14                 ;get all valid number from rvsplus1_scan_order array
     92     vand            q1, q1, q15
     93 
     94     vmax.u16        q0, q0, q1                  ;find maximum value in q0, q1
     95     vmax.u16        d0, d0, d1
     96     vmovl.u16       q0, d0
     97 
     98     vmul.s16        q6, q4                      ;x * Dequant
     99     vmul.s16        q7, q5
    100 
    101     vmax.u32        d0, d0, d1
    102     vpmax.u32       d0, d0, d0
    103 
    104     vst1.s16        {q6, q7}, [r3]              ;store dqcoeff = x * Dequant
    105 
    106     vmov.32         r0, d0[0]
    107     bx              lr
    108 
    109 zero_output
    110     vst1.s16        {q10, q11}, [r2]        ; qcoeff = 0
    111     vst1.s16        {q10, q11}, [r3]        ; dqcoeff = 0
    112     mov             r0, #0
    113 
    114     bx              lr
    115 
    116     ENDP
    117 
    118     END
    119