Home | History | Annotate | Download | only in arm
      1 /* -----------------------------------------------------------------------------
      2 Software License for The Fraunhofer FDK AAC Codec Library for Android
      3 
      4  Copyright  1995 - 2018 Fraunhofer-Gesellschaft zur Frderung der angewandten
      5 Forschung e.V. All rights reserved.
      6 
      7  1.    INTRODUCTION
      8 The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
      9 that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
     10 scheme for digital audio. This FDK AAC Codec software is intended to be used on
     11 a wide variety of Android devices.
     12 
     13 AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
     14 general perceptual audio codecs. AAC-ELD is considered the best-performing
     15 full-bandwidth communications codec by independent studies and is widely
     16 deployed. AAC has been standardized by ISO and IEC as part of the MPEG
     17 specifications.
     18 
     19 Patent licenses for necessary patent claims for the FDK AAC Codec (including
     20 those of Fraunhofer) may be obtained through Via Licensing
     21 (www.vialicensing.com) or through the respective patent owners individually for
     22 the purpose of encoding or decoding bit streams in products that are compliant
     23 with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
     24 Android devices already license these patent claims through Via Licensing or
     25 directly from the patent owners, and therefore FDK AAC Codec software may
     26 already be covered under those patent licenses when it is used for those
     27 licensed purposes only.
     28 
     29 Commercially-licensed AAC software libraries, including floating-point versions
     30 with enhanced sound quality, are also available from Fraunhofer. Users are
     31 encouraged to check the Fraunhofer website for additional applications
     32 information and documentation.
     33 
     34 2.    COPYRIGHT LICENSE
     35 
     36 Redistribution and use in source and binary forms, with or without modification,
     37 are permitted without payment of copyright license fees provided that you
     38 satisfy the following conditions:
     39 
     40 You must retain the complete text of this software license in redistributions of
     41 the FDK AAC Codec or your modifications thereto in source code form.
     42 
     43 You must retain the complete text of this software license in the documentation
     44 and/or other materials provided with redistributions of the FDK AAC Codec or
     45 your modifications thereto in binary form. You must make available free of
     46 charge copies of the complete source code of the FDK AAC Codec and your
     47 modifications thereto to recipients of copies in binary form.
     48 
     49 The name of Fraunhofer may not be used to endorse or promote products derived
     50 from this library without prior written permission.
     51 
     52 You may not charge copyright license fees for anyone to use, copy or distribute
     53 the FDK AAC Codec software or your modifications thereto.
     54 
     55 Your modified versions of the FDK AAC Codec must carry prominent notices stating
     56 that you changed the software and the date of any change. For modified versions
     57 of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
     58 must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
     59 AAC Codec Library for Android."
     60 
     61 3.    NO PATENT LICENSE
     62 
     63 NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
     64 limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
     65 Fraunhofer provides no warranty of patent non-infringement with respect to this
     66 software.
     67 
     68 You may use this FDK AAC Codec software or modifications thereto only for
     69 purposes that are authorized by appropriate patent licenses.
     70 
     71 4.    DISCLAIMER
     72 
     73 This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
     74 holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
     75 including but not limited to the implied warranties of merchantability and
     76 fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
     77 CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
     78 or consequential damages, including but not limited to procurement of substitute
     79 goods or services; loss of use, data, or profits, or business interruption,
     80 however caused and on any theory of liability, whether in contract, strict
     81 liability, or tort (including negligence), arising in any way out of the use of
     82 this software, even if advised of the possibility of such damage.
     83 
     84 5.    CONTACT INFORMATION
     85 
     86 Fraunhofer Institute for Integrated Circuits IIS
     87 Attention: Audio and Multimedia Departments - FDK AAC LL
     88 Am Wolfsmantel 33
     89 91058 Erlangen, Germany
     90 
     91 www.iis.fraunhofer.de/amm
     92 amm-info (at) iis.fraunhofer.de
     93 ----------------------------------------------------------------------------- */
     94 
     95 /******************* Library for basic calculation routines ********************
     96 
     97    Author(s):
     98 
     99    Description:
    100 
    101 *******************************************************************************/
    102 
    103 #ifdef FUNCTION_dct_IV_func1
    104 
    105 /*
    106    Note: This assembler routine is here, because the ARM926 compiler does
    107          not encode the inline assembler with optimal speed.
    108          With this version, we save 2 cycles per loop iteration.
    109 */
    110 
    111 __asm void dct_IV_func1(int i, const FIXP_SPK *twiddle,
    112                         FIXP_DBL *RESTRICT pDat_0, FIXP_DBL *RESTRICT pDat_1) {
    113   /* Register map:
    114      r0   i
    115      r1   twiddle
    116      r2   pDat_0
    117      r3   pDat_1
    118      r4   accu1
    119      r5   accu2
    120      r6   accu3
    121      r7   accu4
    122      r8   val_tw
    123      r9   accuX
    124   */
    125   PUSH{r4 - r9}
    126 
    127   /* 44 cycles for 2 iterations = 22 cycles/iteration */
    128   dct_IV_loop1_start
    129       /*  First iteration */
    130       LDR r8,
    131       [r1],
    132 # 4 // val_tw = *twiddle++;
    133       LDR r5,
    134       [ r2, #0 ]  // accu2 = pDat_0[0]
    135       LDR r4,
    136       [ r3, #0 ]  // accu1 = pDat_1[0]
    137 
    138       SMULWT r9,
    139       r5,
    140       r8  // accuX = accu2*val_tw.l
    141           SMULWB r5,
    142       r5,
    143       r8  // accu2 = accu2*val_tw.h
    144           RSB r9,
    145       r9,
    146 # 0 // accuX =-accu2*val_tw.l
    147       SMLAWT r5, r4, r8,
    148       r5  // accu2 = accu2*val_tw.h + accu1*val_tw.l
    149           SMLAWB r4,
    150       r4, r8,
    151       r9  // accu1 = accu1*val_tw.h - accu2*val_tw.l
    152 
    153           LDR r8,
    154       [r1],
    155 # 4 // val_tw = *twiddle++;
    156       LDR r7,
    157       [ r3, # - 4 ]  // accu4 = pDat_1[-1]
    158       LDR r6,
    159       [ r2, #4 ]  // accu3 = pDat_0[1]
    160 
    161       SMULWB r9,
    162       r7,
    163       r8  // accuX = accu4*val_tw.h
    164           SMULWT r7,
    165       r7,
    166       r8  // accu4 = accu4*val_tw.l
    167           RSB r9,
    168       r9,
    169 # 0 // accuX =-accu4*val_tw.h
    170       SMLAWB r7, r6, r8,
    171       r7  // accu4 = accu4*val_tw.l+accu3*val_tw.h
    172           SMLAWT r6,
    173       r6, r8,
    174       r9  // accu3 = accu3*val_tw.l-accu4*val_tw.h
    175 
    176           STR r5,
    177       [r2],
    178 # 4 // *pDat_0++ = accu2
    179       STR r4, [r2],
    180 # 4 // *pDat_0++ = accu1
    181       STR r6, [r3],
    182 #- 4 // *pDat_1-- = accu3
    183       STR r7, [r3],
    184 #- 4 // *pDat_1-- = accu4
    185 
    186       /*  Second iteration */
    187       LDR r8, [r1],
    188 # 4 // val_tw = *twiddle++;
    189       LDR r5,
    190       [ r2, #0 ]  // accu2 = pDat_0[0]
    191       LDR r4,
    192       [ r3, #0 ]  // accu1 = pDat_1[0]
    193 
    194       SMULWT r9,
    195       r5,
    196       r8  // accuX = accu2*val_tw.l
    197           SMULWB r5,
    198       r5,
    199       r8  // accu2 = accu2*val_tw.h
    200           RSB r9,
    201       r9,
    202 # 0 // accuX =-accu2*val_tw.l
    203       SMLAWT r5, r4, r8,
    204       r5  // accu2 = accu2*val_tw.h + accu1*val_tw.l
    205           SMLAWB r4,
    206       r4, r8,
    207       r9  // accu1 = accu1*val_tw.h - accu2*val_tw.l
    208 
    209           LDR r8,
    210       [r1],
    211 # 4 // val_tw = *twiddle++;
    212       LDR r7,
    213       [ r3, # - 4 ]  // accu4 = pDat_1[-1]
    214       LDR r6,
    215       [ r2, #4 ]  // accu3 = pDat_0[1]
    216 
    217       SMULWB r9,
    218       r7,
    219       r8  // accuX = accu4*val_tw.h
    220           SMULWT r7,
    221       r7,
    222       r8  // accu4 = accu4*val_tw.l
    223           RSB r9,
    224       r9,
    225 # 0 // accuX =-accu4*val_tw.h
    226       SMLAWB r7, r6, r8,
    227       r7  // accu4 = accu4*val_tw.l+accu3*val_tw.h
    228           SMLAWT r6,
    229       r6, r8,
    230       r9  // accu3 = accu3*val_tw.l-accu4*val_tw.h
    231 
    232           STR r5,
    233       [r2],
    234 # 4 // *pDat_0++ = accu2
    235       STR r4, [r2],
    236 # 4 // *pDat_0++ = accu1
    237       STR r6, [r3],
    238 #- 4 // *pDat_1-- = accu3
    239       STR r7, [r3],
    240 #- 4 // *pDat_1-- = accu4
    241 
    242       SUBS r0, r0,
    243 # 1 BNE dct_IV_loop1_start
    244 
    245       POP { r4 - r9 }
    246 
    247   BX lr
    248 }
    249 
    250 #endif /* FUNCTION_dct_IV_func1 */
    251 
    252 #ifdef FUNCTION_dct_IV_func2
    253 
    254 /* __attribute__((noinline)) */
    255 static inline void dct_IV_func2(int i, const FIXP_SPK *twiddle,
    256                                 FIXP_DBL *pDat_0, FIXP_DBL *pDat_1, int inc) {
    257   FIXP_DBL accu1, accu2, accu3, accu4, accuX;
    258   LONG val_tw;
    259 
    260   accu1 = pDat_1[-2];
    261   accu2 = pDat_1[-1];
    262 
    263   *--pDat_1 = -(pDat_0[1] >> 1);
    264   *pDat_0++ = (pDat_0[0] >> 1);
    265 
    266   twiddle += inc;
    267 
    268   __asm {
    269     LDR     val_tw, [twiddle], inc, LSL #2  // val_tw = *twiddle; twiddle += inc
    270     B       dct_IV_loop2_2nd_part
    271 
    272         /* 42 cycles for 2 iterations = 21 cycles/iteration */
    273 dct_IV_loop2:
    274     SMULWT  accuX, accu2, val_tw
    275     SMULWB  accu2, accu2, val_tw
    276     RSB     accuX, accuX, #0
    277     SMLAWB  accuX, accu1, val_tw, accuX
    278     SMLAWT  accu2, accu1, val_tw, accu2
    279     STR     accuX, [pDat_0], #4
    280     STR     accu2, [pDat_1, #-4] !
    281 
    282     LDR     accu4, [pDat_0, #4]
    283     LDR     accu3, [pDat_0]
    284     SMULWB  accuX, accu4, val_tw
    285     SMULWT  accu4, accu4, val_tw
    286     RSB     accuX, accuX, #0
    287     SMLAWT  accuX, accu3, val_tw, accuX
    288     SMLAWB  accu4, accu3, val_tw, accu4
    289 
    290     LDR     accu1, [pDat_1, #-8]
    291     LDR     accu2, [pDat_1, #-4]
    292 
    293     LDR     val_tw, [twiddle], inc, LSL #2  // val_tw = *twiddle; twiddle += inc
    294 
    295     STR     accuX, [pDat_1, #-4] !
    296     STR     accu4, [pDat_0], #4
    297 
    298 dct_IV_loop2_2nd_part:
    299     SMULWT  accuX, accu2, val_tw
    300     SMULWB  accu2, accu2, val_tw
    301     RSB     accuX, accuX, #0
    302     SMLAWB  accuX, accu1, val_tw, accuX
    303     SMLAWT  accu2, accu1, val_tw, accu2
    304     STR     accuX, [pDat_0], #4
    305     STR     accu2, [pDat_1, #-4] !
    306 
    307     LDR     accu4, [pDat_0, #4]
    308     LDR     accu3, [pDat_0]
    309     SMULWB  accuX, accu4, val_tw
    310     SMULWT  accu4, accu4, val_tw
    311     RSB     accuX, accuX, #0
    312     SMLAWT  accuX, accu3, val_tw, accuX
    313     SMLAWB  accu4, accu3, val_tw, accu4
    314 
    315     LDR     accu1, [pDat_1, #-8]
    316     LDR     accu2, [pDat_1, #-4]
    317 
    318     STR     accuX, [pDat_1, #-4] !
    319     STR     accu4, [pDat_0], #4
    320 
    321     LDR     val_tw, [twiddle], inc, LSL #2  // val_tw = *twiddle; twiddle += inc
    322 
    323     SUBS    i, i, #1
    324     BNE     dct_IV_loop2
    325   }
    326 
    327   /* Last Sin and Cos value pair are the same */
    328   accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
    329   accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
    330 
    331   *--pDat_1 = accu1 + accu2;
    332   *pDat_0++ = accu1 - accu2;
    333 }
    334 #endif /* FUNCTION_dct_IV_func2 */
    335 
    336 #ifdef FUNCTION_dst_IV_func1
    337 
    338 __asm void dst_IV_func1(int i, const FIXP_SPK *twiddle, FIXP_DBL *pDat_0,
    339                         FIXP_DBL *pDat_1) {
    340   /* Register map:
    341      r0   i
    342      r1   twiddle
    343      r2   pDat_0
    344      r3   pDat_1
    345      r4   accu1
    346      r5   accu2
    347      r6   accu3
    348      r7   accu4
    349      r8   val_tw
    350      r9   accuX
    351   */
    352   PUSH{r4 - r9}
    353 
    354   dst_IV_loop1 LDR r8,
    355       [r1],
    356 # 4 // val_tw = *twiddle++
    357       LDR r5,
    358       [r2]  // accu2 = pDat_0[0]
    359       LDR r6,
    360       [ r2, #4 ]  // accu3 = pDat_0[1]
    361       RSB r5,
    362       r5,
    363 # 0 // accu2 = -accu2
    364       SMULWT r9, r5,
    365       r8  // accuX = (-accu2)*val_tw.l
    366           LDR r4,
    367       [ r3, # - 4 ]  // accu1 = pDat_1[-1]
    368       RSB r9,
    369       r9,
    370 # 0 // accuX = -(-accu2)*val_tw.l
    371       SMLAWB r9, r4, r8,
    372       r9  // accuX = accu1*val_tw.h-(-accu2)*val_tw.l
    373           SMULWT r4,
    374       r4,
    375       r8  // accu1 = accu1*val_tw.l
    376           LDR r7,
    377       [ r3, # - 8 ]  // accu4 = pDat_1[-2]
    378       SMLAWB r5,
    379       r5, r8,
    380       r4  // accu2 = (-accu2)*val_tw.t+accu1*val_tw.l
    381           LDR r8,
    382       [r1],
    383 # 4 // val_tw = *twiddle++
    384       STR r5, [r2],
    385 # 4 // *pDat_0++ = accu2
    386       STR r9, [r2],
    387 # 4 // *pDat_0++ = accu1 (accuX)
    388       RSB r7, r7,
    389 # 0 // accu4 = -accu4
    390       SMULWB r5, r7,
    391       r8  // accu2 = (-accu4)*val_tw.h
    392           SMULWB r4,
    393       r6,
    394       r8  // accu1 = (-accu4)*val_tw.l
    395           RSB r5,
    396       r5,
    397 # 0 // accu2 = -(-accu4)*val_tw.h
    398       SMLAWT r6, r6, r8,
    399       r5  // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
    400           SMLAWT r7,
    401       r7, r8,
    402       r4  // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
    403           STR r6,
    404       [ r3, # - 4 ] !  // *--pDat_1 = accu3
    405       STR r7,
    406       [ r3, # - 4 ] !  // *--pDat_1 = accu4
    407 
    408       LDR r8,
    409       [r1],
    410 # 4 // val_tw = *twiddle++
    411       LDR r5,
    412       [r2]  // accu2 = pDat_0[0]
    413       LDR r6,
    414       [ r2, #4 ]  // accu3 = pDat_0[1]
    415       RSB r5,
    416       r5,
    417 # 0 // accu2 = -accu2
    418       SMULWT r9, r5,
    419       r8  // accuX = (-accu2)*val_tw.l
    420           LDR r4,
    421       [ r3, # - 4 ]  // accu1 = pDat_1[-1]
    422       RSB r9,
    423       r9,
    424 # 0 // accuX = -(-accu2)*val_tw.l
    425       SMLAWB r9, r4, r8,
    426       r9  // accuX = accu1*val_tw.h-(-accu2)*val_tw.l
    427           SMULWT r4,
    428       r4,
    429       r8  // accu1 = accu1*val_tw.l
    430           LDR r7,
    431       [ r3, # - 8 ]  // accu4 = pDat_1[-2]
    432       SMLAWB r5,
    433       r5, r8,
    434       r4  // accu2 = (-accu2)*val_tw.t+accu1*val_tw.l
    435           LDR r8,
    436       [r1],
    437 # 4 // val_tw = *twiddle++
    438       STR r5, [r2],
    439 # 4 // *pDat_0++ = accu2
    440       STR r9, [r2],
    441 # 4 // *pDat_0++ = accu1 (accuX)
    442       RSB r7, r7,
    443 # 0 // accu4 = -accu4
    444       SMULWB r5, r7,
    445       r8  // accu2 = (-accu4)*val_tw.h
    446           SMULWB r4,
    447       r6,
    448       r8  // accu1 = (-accu4)*val_tw.l
    449           RSB r5,
    450       r5,
    451 # 0 // accu2 = -(-accu4)*val_tw.h
    452       SMLAWT r6, r6, r8,
    453       r5  // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
    454           SMLAWT r7,
    455       r7, r8,
    456       r4  // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
    457           STR r6,
    458       [ r3, # - 4 ] !  // *--pDat_1 = accu3
    459       STR r7,
    460       [ r3, # - 4 ] !  // *--pDat_1 = accu4
    461 
    462       SUBS r0,
    463       r0,
    464 # 4 // i-= 4
    465       BNE dst_IV_loop1
    466 
    467           POP{r4 - r9} BX lr
    468 }
    469 #endif /* FUNCTION_dst_IV_func1 */
    470 
    471 #ifdef FUNCTION_dst_IV_func2
    472 
    473 /* __attribute__((noinline)) */
    474 static inline void dst_IV_func2(int i, const FIXP_SPK *twiddle,
    475                                 FIXP_DBL *RESTRICT pDat_0,
    476                                 FIXP_DBL *RESTRICT pDat_1, int inc) {
    477   FIXP_DBL accu1, accu2, accu3, accu4;
    478   LONG val_tw;
    479 
    480   accu4 = pDat_0[0];
    481   accu3 = pDat_0[1];
    482   accu4 >>= 1;
    483   accu3 >>= 1;
    484   accu4 = -accu4;
    485 
    486   accu1 = pDat_1[-1];
    487   accu2 = pDat_1[0];
    488 
    489   *pDat_0++ = accu3;
    490   *pDat_1-- = accu4;
    491 
    492   __asm {
    493     B       dst_IV_loop2_2nd_part
    494 
    495         /* 50 cycles for 2 iterations = 25 cycles/iteration */
    496 
    497 dst_IV_loop2:
    498 
    499     LDR     val_tw, [twiddle], inc, LSL #2  // val_tw = *twiddle; twiddle += inc
    500 
    501     RSB     accu2, accu2, #0  // accu2 = -accu2
    502     RSB     accu1, accu1, #0  // accu1 = -accu1
    503     SMULWT  accu3, accu2, val_tw  // accu3 = (-accu2)*val_tw.l
    504     SMULWT  accu4, accu1, val_tw  // accu4 = (-accu1)*val_tw.l
    505     RSB     accu3, accu3, #0  // accu3 = -accu2*val_tw.l
    506     SMLAWB  accu1, accu1, val_tw, accu3  // accu1 = -accu1*val_tw.h-(-accu2)*val_tw.l
    507     SMLAWB  accu2, accu2, val_tw, accu4  // accu2 = (-accu1)*val_tw.l+(-accu2)*val_tw.h
    508     STR     accu1, [pDat_1], #-4  // *pDat_1-- = accu1
    509     STR     accu2, [pDat_0], #4  // *pDat_0++ = accu2
    510 
    511     LDR     accu4, [pDat_0]  // accu4 = pDat_0[0]
    512     LDR     accu3, [pDat_0, #4]  // accu3 = pDat_0[1]
    513 
    514     RSB     accu4, accu4, #0  // accu4 = -accu4
    515     RSB     accu3, accu3, #0  // accu3 = -accu3
    516 
    517     SMULWB  accu1, accu3, val_tw  // accu1 = (-accu3)*val_tw.h
    518     SMULWT  accu2, accu3, val_tw  // accu2 = (-accu3)*val_tw.l
    519     RSB     accu1, accu1, #0  // accu1 = -(-accu3)*val_tw.h
    520     SMLAWT  accu3, accu4, val_tw, accu1  // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
    521     SMLAWB  accu4, accu4, val_tw, accu2  // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
    522 
    523     LDR     accu1, [pDat_1, #-4]  // accu1 = pDat_1[-1]
    524     LDR     accu2, [pDat_1]  // accu2 = pDat_1[0]
    525 
    526     STR     accu3, [pDat_0], #4  // *pDat_0++ = accu3
    527     STR     accu4, [pDat_1], #-4  // *pDat_1-- = accu4
    528 
    529 dst_IV_loop2_2nd_part:
    530 
    531     LDR     val_tw, [twiddle], inc, LSL #2  // val_tw = *twiddle; twiddle += inc
    532 
    533     RSB     accu2, accu2, #0  // accu2 = -accu2
    534     RSB     accu1, accu1, #0  // accu1 = -accu1
    535     SMULWT  accu3, accu2, val_tw  // accu3 = (-accu2)*val_tw.l
    536     SMULWT  accu4, accu1, val_tw  // accu4 = (-accu1)*val_tw.l
    537     RSB     accu3, accu3, #0  // accu3 = -accu2*val_tw.l
    538     SMLAWB  accu1, accu1, val_tw, accu3  // accu1 = -accu1*val_tw.h-(-accu2)*val_tw.l
    539     SMLAWB  accu2, accu2, val_tw, accu4  // accu2 = (-accu1)*val_tw.l+(-accu2)*val_tw.h
    540     STR     accu1, [pDat_1], #-4  // *pDat_1-- = accu1
    541     STR     accu2, [pDat_0], #4  // *pDat_0++ = accu2
    542 
    543     LDR     accu4, [pDat_0]  // accu4 = pDat_0[0]
    544     LDR     accu3, [pDat_0, #4]  // accu3 = pDat_0[1]
    545 
    546     RSB     accu4, accu4, #0  // accu4 = -accu4
    547     RSB     accu3, accu3, #0  // accu3 = -accu3
    548 
    549     SMULWB  accu1, accu3, val_tw  // accu1 = (-accu3)*val_tw.h
    550     SMULWT  accu2, accu3, val_tw  // accu2 = (-accu3)*val_tw.l
    551     RSB     accu1, accu1, #0  // accu1 = -(-accu3)*val_tw.h
    552     SMLAWT  accu3, accu4, val_tw, accu1  // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
    553     SMLAWB  accu4, accu4, val_tw, accu2  // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
    554 
    555     LDR     accu1, [pDat_1, #-4]  // accu1 = pDat_1[-1]
    556     LDR     accu2, [pDat_1]  // accu2 = pDat_1[0]
    557 
    558     STR     accu3, [pDat_0], #4  // *pDat_0++ = accu3
    559     STR     accu4, [pDat_1], #-4  // *pDat_1-- = accu4
    560 
    561     SUBS    i, i, #1
    562     BNE     dst_IV_loop2
    563   }
    564 
    565   /* Last Sin and Cos value pair are the same */
    566   accu1 = fMultDiv2(-accu1, WTC(0x5a82799a));
    567   accu2 = fMultDiv2(-accu2, WTC(0x5a82799a));
    568 
    569   *pDat_0 = accu1 + accu2;
    570   *pDat_1 = accu1 - accu2;
    571 }
    572 #endif /* FUNCTION_dst_IV_func2 */
    573