Home | History | Annotate | Download | only in signal_processing
      1 @
      2 @ Written by Wilco Dijkstra, 1996. The following email exchange establishes the
      3 @ license.
      4 @
      5 @ From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
      6 @ Date: Fri, Jun 24, 2011 at 3:20 AM
      7 @ Subject: Re: sqrt routine
      8 @ To: Kevin Ma <kma@google.com>
      9 @ Hi Kevin,
     10 @ Thanks for asking. Those routines are public domain (originally posted to
     11 @ comp.sys.arm a long time ago), so you can use them freely for any purpose.
     12 @ Cheers,
     13 @ Wilco
     14 @
     15 @ ----- Original Message -----
     16 @ From: "Kevin Ma" <kma@google.com>
     17 @ To: <Wilco.Dijkstra@ntlworld.com>
     18 @ Sent: Thursday, June 23, 2011 11:44 PM
     19 @ Subject: Fwd: sqrt routine
     20 @ Hi Wilco,
     21 @ I saw your sqrt routine from several web sites, including
     22 @ http://www.finesse.demon.co.uk/steven/sqrt.html.
     23 @ Just wonder if there's any copyright information with your Successive
     24 @ approximation routines, or if I can freely use it for any purpose.
     25 @ Thanks.
     26 @ Kevin
     27 
     28 @ Minor modifications in code style for WebRTC, 2012.
     29 @ Output is bit-exact with the reference C code in spl_sqrt_floor.c.
     30 
     31 @ Input :             r0 32 bit unsigned integer
     32 @ Output:             r0 = INT (SQRT (r0)), precision is 16 bits
     33 @ Registers touched:  r1, r2
     34 
     35 #include "webrtc/system_wrappers/include/asm_defines.h"
     36 
     37 GLOBAL_FUNCTION WebRtcSpl_SqrtFloor
     38 .align  2
     39 DEFINE_FUNCTION WebRtcSpl_SqrtFloor
     40   mov    r1, #3 << 30
     41   mov    r2, #1 << 30
     42 
     43   @ unroll for i = 0 .. 15
     44 
     45   cmp    r0, r2, ror #2 * 0
     46   subhs  r0, r0, r2, ror #2 * 0
     47   adc    r2, r1, r2, lsl #1
     48 
     49   cmp    r0, r2, ror #2 * 1
     50   subhs  r0, r0, r2, ror #2 * 1
     51   adc    r2, r1, r2, lsl #1
     52 
     53   cmp    r0, r2, ror #2 * 2
     54   subhs  r0, r0, r2, ror #2 * 2
     55   adc    r2, r1, r2, lsl #1
     56 
     57   cmp    r0, r2, ror #2 * 3
     58   subhs  r0, r0, r2, ror #2 * 3
     59   adc    r2, r1, r2, lsl #1
     60 
     61   cmp    r0, r2, ror #2 * 4
     62   subhs  r0, r0, r2, ror #2 * 4
     63   adc    r2, r1, r2, lsl #1
     64 
     65   cmp    r0, r2, ror #2 * 5
     66   subhs  r0, r0, r2, ror #2 * 5
     67   adc    r2, r1, r2, lsl #1
     68 
     69   cmp    r0, r2, ror #2 * 6
     70   subhs  r0, r0, r2, ror #2 * 6
     71   adc    r2, r1, r2, lsl #1
     72 
     73   cmp    r0, r2, ror #2 * 7
     74   subhs  r0, r0, r2, ror #2 * 7
     75   adc    r2, r1, r2, lsl #1
     76 
     77   cmp    r0, r2, ror #2 * 8
     78   subhs  r0, r0, r2, ror #2 * 8
     79   adc    r2, r1, r2, lsl #1
     80 
     81   cmp    r0, r2, ror #2 * 9
     82   subhs  r0, r0, r2, ror #2 * 9
     83   adc    r2, r1, r2, lsl #1
     84 
     85   cmp    r0, r2, ror #2 * 10
     86   subhs  r0, r0, r2, ror #2 * 10
     87   adc    r2, r1, r2, lsl #1
     88 
     89   cmp    r0, r2, ror #2 * 11
     90   subhs  r0, r0, r2, ror #2 * 11
     91   adc    r2, r1, r2, lsl #1
     92 
     93   cmp    r0, r2, ror #2 * 12
     94   subhs  r0, r0, r2, ror #2 * 12
     95   adc    r2, r1, r2, lsl #1
     96 
     97   cmp    r0, r2, ror #2 * 13
     98   subhs  r0, r0, r2, ror #2 * 13
     99   adc    r2, r1, r2, lsl #1
    100 
    101   cmp    r0, r2, ror #2 * 14
    102   subhs  r0, r0, r2, ror #2 * 14
    103   adc    r2, r1, r2, lsl #1
    104 
    105   cmp    r0, r2, ror #2 * 15
    106   subhs  r0, r0, r2, ror #2 * 15
    107   adc    r2, r1, r2, lsl #1
    108 
    109   bic    r0, r2, #3 << 30  @ for rounding add: cmp r0, r2  adc r2, #1
    110   bx lr
    111