Home | History | Annotate | Download | only in signal_processing
      1 /*
      2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 /*
     13  * This file contains the function WebRtcSpl_CrossCorrelation().
     14  * The description header can be found in signal_processing_library.h
     15  *
     16  */
     17 
     18 #include "signal_processing_library.h"
     19 
     20 void WebRtcSpl_CrossCorrelation(WebRtc_Word32* cross_correlation, WebRtc_Word16* seq1,
     21                                 WebRtc_Word16* seq2, WebRtc_Word16 dim_seq,
     22                                 WebRtc_Word16 dim_cross_correlation,
     23                                 WebRtc_Word16 right_shifts,
     24                                 WebRtc_Word16 step_seq2)
     25 {
     26     int i, j;
     27     WebRtc_Word16* seq1Ptr;
     28     WebRtc_Word16* seq2Ptr;
     29     WebRtc_Word32* CrossCorrPtr;
     30 
     31 #ifdef _XSCALE_OPT_
     32 
     33 #ifdef _WIN32
     34 #pragma message("NOTE: _XSCALE_OPT_ optimizations are used (overrides _ARM_OPT_ and requires /QRxscale compiler flag)")
     35 #endif
     36 
     37     __int64 macc40;
     38 
     39     int iseq1[250];
     40     int iseq2[250];
     41     int iseq3[250];
     42     int * iseq1Ptr;
     43     int * iseq2Ptr;
     44     int * iseq3Ptr;
     45     int len, i_len;
     46 
     47     seq1Ptr = seq1;
     48     iseq1Ptr = iseq1;
     49     for(i = 0; i < ((dim_seq + 1) >> 1); i++)
     50     {
     51         *iseq1Ptr = (unsigned short)*seq1Ptr++;
     52         *iseq1Ptr++ |= (WebRtc_Word32)*seq1Ptr++ << 16;
     53 
     54     }
     55 
     56     if(dim_seq%2)
     57     {
     58         *(iseq1Ptr-1) &= 0x0000ffff;
     59     }
     60     *iseq1Ptr = 0;
     61     iseq1Ptr++;
     62     *iseq1Ptr = 0;
     63     iseq1Ptr++;
     64     *iseq1Ptr = 0;
     65 
     66     if(step_seq2 < 0)
     67     {
     68         seq2Ptr = seq2 - dim_cross_correlation + 1;
     69         CrossCorrPtr = &cross_correlation[dim_cross_correlation - 1];
     70     }
     71     else
     72     {
     73         seq2Ptr = seq2;
     74         CrossCorrPtr = cross_correlation;
     75     }
     76 
     77     len = dim_seq + dim_cross_correlation - 1;
     78     i_len = (len + 1) >> 1;
     79     iseq2Ptr = iseq2;
     80 
     81     iseq3Ptr = iseq3;
     82     for(i = 0; i < i_len; i++)
     83     {
     84         *iseq2Ptr = (unsigned short)*seq2Ptr++;
     85         *iseq3Ptr = (unsigned short)*seq2Ptr;
     86         *iseq2Ptr++ |= (WebRtc_Word32)*seq2Ptr++ << 16;
     87         *iseq3Ptr++ |= (WebRtc_Word32)*seq2Ptr << 16;
     88     }
     89 
     90     if(len % 2)
     91     {
     92         iseq2[i_len - 1] &= 0x0000ffff;
     93         iseq3[i_len - 1] = 0;
     94     }
     95     else
     96     iseq3[i_len - 1] &= 0x0000ffff;
     97 
     98     iseq2[i_len] = 0;
     99     iseq3[i_len] = 0;
    100     iseq2[i_len + 1] = 0;
    101     iseq3[i_len + 1] = 0;
    102     iseq2[i_len + 2] = 0;
    103     iseq3[i_len + 2] = 0;
    104 
    105     // Set pointer to start value
    106     iseq2Ptr = iseq2;
    107     iseq3Ptr = iseq3;
    108 
    109     i_len = (dim_seq + 7) >> 3;
    110     for (i = 0; i < dim_cross_correlation; i++)
    111     {
    112 
    113         iseq1Ptr = iseq1;
    114 
    115         macc40 = 0;
    116 
    117         _WriteCoProcessor(macc40, 0);
    118 
    119         if((i & 1))
    120         {
    121             iseq3Ptr = iseq3 + (i >> 1);
    122             for (j = i_len; j > 0; j--)
    123             {
    124                 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++);
    125                 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++);
    126                 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++);
    127                 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++);
    128             }
    129         }
    130         else
    131         {
    132             iseq2Ptr = iseq2 + (i >> 1);
    133             for (j = i_len; j > 0; j--)
    134             {
    135                 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++);
    136                 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++);
    137                 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++);
    138                 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++);
    139             }
    140 
    141         }
    142 
    143         macc40 = _ReadCoProcessor(0);
    144         *CrossCorrPtr = (WebRtc_Word32)(macc40 >> right_shifts);
    145         CrossCorrPtr += step_seq2;
    146     }
    147 #else // #ifdef _XSCALE_OPT_
    148 #ifdef _ARM_OPT_
    149     WebRtc_Word16 dim_seq8 = (dim_seq >> 3) << 3;
    150 #endif
    151 
    152     CrossCorrPtr = cross_correlation;
    153 
    154     for (i = 0; i < dim_cross_correlation; i++)
    155     {
    156         // Set the pointer to the static vector, set the pointer to the sliding vector
    157         // and initialize cross_correlation
    158         seq1Ptr = seq1;
    159         seq2Ptr = seq2 + (step_seq2 * i);
    160         (*CrossCorrPtr) = 0;
    161 
    162 #ifndef _ARM_OPT_
    163 #ifdef _WIN32
    164 #pragma message("NOTE: default implementation is used")
    165 #endif
    166         // Perform the cross correlation
    167         for (j = 0; j < dim_seq; j++)
    168         {
    169             (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr), right_shifts);
    170             seq1Ptr++;
    171             seq2Ptr++;
    172         }
    173 #else
    174 #ifdef _WIN32
    175 #pragma message("NOTE: _ARM_OPT_ optimizations are used")
    176 #endif
    177         if (right_shifts == 0)
    178         {
    179             // Perform the optimized cross correlation
    180             for (j = 0; j < dim_seq8; j = j + 8)
    181             {
    182                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
    183                 seq1Ptr++;
    184                 seq2Ptr++;
    185                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
    186                 seq1Ptr++;
    187                 seq2Ptr++;
    188                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
    189                 seq1Ptr++;
    190                 seq2Ptr++;
    191                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
    192                 seq1Ptr++;
    193                 seq2Ptr++;
    194                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
    195                 seq1Ptr++;
    196                 seq2Ptr++;
    197                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
    198                 seq1Ptr++;
    199                 seq2Ptr++;
    200                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
    201                 seq1Ptr++;
    202                 seq2Ptr++;
    203                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
    204                 seq1Ptr++;
    205                 seq2Ptr++;
    206             }
    207 
    208             for (j = dim_seq8; j < dim_seq; j++)
    209             {
    210                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
    211                 seq1Ptr++;
    212                 seq2Ptr++;
    213             }
    214         }
    215         else // right_shifts != 0
    216 
    217         {
    218             // Perform the optimized cross correlation
    219             for (j = 0; j < dim_seq8; j = j + 8)
    220             {
    221                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
    222                                                              right_shifts);
    223                 seq1Ptr++;
    224                 seq2Ptr++;
    225                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
    226                                                              right_shifts);
    227                 seq1Ptr++;
    228                 seq2Ptr++;
    229                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
    230                                                              right_shifts);
    231                 seq1Ptr++;
    232                 seq2Ptr++;
    233                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
    234                                                              right_shifts);
    235                 seq1Ptr++;
    236                 seq2Ptr++;
    237                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
    238                                                              right_shifts);
    239                 seq1Ptr++;
    240                 seq2Ptr++;
    241                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
    242                                                              right_shifts);
    243                 seq1Ptr++;
    244                 seq2Ptr++;
    245                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
    246                                                              right_shifts);
    247                 seq1Ptr++;
    248                 seq2Ptr++;
    249                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
    250                                                              right_shifts);
    251                 seq1Ptr++;
    252                 seq2Ptr++;
    253             }
    254 
    255             for (j = dim_seq8; j < dim_seq; j++)
    256             {
    257                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
    258                                                              right_shifts);
    259                 seq1Ptr++;
    260                 seq2Ptr++;
    261             }
    262         }
    263 #endif
    264         CrossCorrPtr++;
    265     }
    266 #endif
    267 }
    268