Home | History | Annotate | Download | only in signal_processing
      1 /*
      2  * Written by Wilco Dijkstra, 1996. The following email exchange establishes the
      3  * license.
      4  *
      5  * From: Wilco Dijkstra <Wilco.Dijkstra (at) ntlworld.com>
      6  * Date: Fri, Jun 24, 2011 at 3:20 AM
      7  * Subject: Re: sqrt routine
      8  * To: Kevin Ma <kma (at) google.com>
      9  * Hi Kevin,
     10  * Thanks for asking. Those routines are public domain (originally posted to
     11  * comp.sys.arm a long time ago), so you can use them freely for any purpose.
     12  * Cheers,
     13  * Wilco
     14  *
     15  * ----- Original Message -----
     16  * From: "Kevin Ma" <kma (at) google.com>
     17  * To: <Wilco.Dijkstra (at) ntlworld.com>
     18  * Sent: Thursday, June 23, 2011 11:44 PM
     19  * Subject: Fwd: sqrt routine
     20  * Hi Wilco,
     21  * I saw your sqrt routine from several web sites, including
     22  * http://www.finesse.demon.co.uk/steven/sqrt.html.
     23  * Just wonder if there's any copyright information with your Successive
     24  * approximation routines, or if I can freely use it for any purpose.
     25  * Thanks.
     26  * Kevin
     27  */
     28 
     29 // Minor modifications in code style for WebRTC, 2012.
     30 // Code optimizations for MIPS, 2013.
     31 
     32 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
     33 
     34 /*
     35  * Algorithm:
     36  * Successive approximation of the equation (root + delta) ^ 2 = N
     37  * until delta < 1. If delta < 1 we have the integer part of SQRT (N).
     38  * Use delta = 2^i for i = 15 .. 0.
     39  *
     40  * Output precision is 16 bits. Note for large input values (close to
     41  * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
     42  * contains the MSB information (a non-sign value). Do with caution
     43  * if you need to cast the output to int16_t type.
     44  *
     45  * If the input value is negative, it returns 0.
     46  */
     47 
     48 
     49 int32_t WebRtcSpl_SqrtFloor(int32_t value)
     50 {
     51   int32_t root = 0, tmp1, tmp2, tmp3, tmp4;
     52 
     53   __asm __volatile(
     54     ".set   push                                       \n\t"
     55     ".set   noreorder                                  \n\t"
     56 
     57     "lui    %[tmp1],      0x4000                       \n\t"
     58     "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
     59     "sub    %[tmp3],      %[value],     %[tmp1]        \n\t"
     60     "lui    %[tmp1],      0x1                          \n\t"
     61     "or     %[tmp4],      %[root],      %[tmp1]        \n\t"
     62     "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
     63     "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
     64 
     65     "addiu  %[tmp1],      $0,           0x4000         \n\t"
     66     "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
     67     "sll    %[tmp1],      14                           \n\t"
     68     "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
     69     "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
     70     "ori    %[tmp4],      %[root],      0x8000         \n\t"
     71     "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
     72     "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
     73 
     74     "addiu  %[tmp1],      $0,           0x2000         \n\t"
     75     "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
     76     "sll    %[tmp1],      13                           \n\t"
     77     "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
     78     "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
     79     "ori    %[tmp4],      %[root],      0x4000         \n\t"
     80     "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
     81     "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
     82 
     83     "addiu  %[tmp1],      $0,           0x1000         \n\t"
     84     "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
     85     "sll    %[tmp1],      12                           \n\t"
     86     "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
     87     "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
     88     "ori    %[tmp4],      %[root],      0x2000         \n\t"
     89     "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
     90     "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
     91 
     92     "addiu  %[tmp1],      $0,           0x800          \n\t"
     93     "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
     94     "sll    %[tmp1],      11                           \n\t"
     95     "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
     96     "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
     97     "ori    %[tmp4],      %[root],      0x1000         \n\t"
     98     "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
     99     "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
    100 
    101     "addiu  %[tmp1],      $0,           0x400          \n\t"
    102     "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
    103     "sll    %[tmp1],      10                           \n\t"
    104     "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
    105     "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
    106     "ori    %[tmp4],      %[root],      0x800          \n\t"
    107     "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
    108     "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
    109 
    110     "addiu  %[tmp1],      $0,           0x200          \n\t"
    111     "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
    112     "sll    %[tmp1],      9                            \n\t"
    113     "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
    114     "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
    115     "ori    %[tmp4],      %[root],       0x400         \n\t"
    116     "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
    117     "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
    118 
    119     "addiu  %[tmp1],      $0,           0x100          \n\t"
    120     "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
    121     "sll    %[tmp1],      8                            \n\t"
    122     "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
    123     "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
    124     "ori    %[tmp4],      %[root],      0x200          \n\t"
    125     "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
    126     "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
    127 
    128     "addiu  %[tmp1],      $0,           0x80           \n\t"
    129     "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
    130     "sll    %[tmp1],      7                            \n\t"
    131     "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
    132     "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
    133     "ori    %[tmp4],      %[root],      0x100          \n\t"
    134     "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
    135     "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
    136 
    137     "addiu  %[tmp1],      $0,           0x40           \n\t"
    138     "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
    139     "sll    %[tmp1],      6                            \n\t"
    140     "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
    141     "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
    142     "ori    %[tmp4],      %[root],      0x80           \n\t"
    143     "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
    144     "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
    145 
    146     "addiu  %[tmp1],      $0,           0x20           \n\t"
    147     "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
    148     "sll    %[tmp1],      5                            \n\t"
    149     "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
    150     "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
    151     "ori    %[tmp4],      %[root],      0x40           \n\t"
    152     "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
    153     "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
    154 
    155     "addiu  %[tmp1],      $0,           0x10           \n\t"
    156     "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
    157     "sll    %[tmp1],      4                            \n\t"
    158     "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
    159     "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
    160     "ori    %[tmp4],      %[root],      0x20           \n\t"
    161     "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
    162     "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
    163 
    164     "addiu  %[tmp1],      $0,           0x8            \n\t"
    165     "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
    166     "sll    %[tmp1],      3                            \n\t"
    167     "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
    168     "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
    169     "ori    %[tmp4],      %[root],      0x10           \n\t"
    170     "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
    171     "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
    172 
    173     "addiu  %[tmp1],      $0,           0x4            \n\t"
    174     "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
    175     "sll    %[tmp1],      2                            \n\t"
    176     "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
    177     "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
    178     "ori    %[tmp4],      %[root],      0x8            \n\t"
    179     "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
    180     "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
    181 
    182     "addiu  %[tmp1],      $0,           0x2            \n\t"
    183     "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
    184     "sll    %[tmp1],      1                            \n\t"
    185     "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
    186     "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
    187     "ori    %[tmp4],      %[root],      0x4            \n\t"
    188     "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
    189     "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
    190 
    191     "addiu  %[tmp1],      $0,           0x1            \n\t"
    192     "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
    193     "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
    194     "ori    %[tmp4],      %[root],      0x2            \n\t"
    195     "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
    196 
    197     ".set   pop                                        \n\t"
    198 
    199     : [root] "+r" (root), [value] "+r" (value),
    200       [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2),
    201       [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4)
    202     :
    203   );
    204 
    205   return root >> 1;
    206 }
    207 
    208