Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS. All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "libyuv/basic_types.h"
     12 #include "libyuv/row.h"
     13 
     14 #ifdef __cplusplus
     15 namespace libyuv {
     16 extern "C" {
     17 #endif
     18 
     19 #if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
     20 
     21 uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
     22   uint32 sse;
     23   asm volatile (  // NOLINT
     24     "pxor      %%xmm0,%%xmm0                   \n"
     25     "pxor      %%xmm5,%%xmm5                   \n"
     26     LABELALIGN
     27   "1:                                          \n"
     28     "movdqa    " MEMACCESS(0) ",%%xmm1         \n"
     29     "lea       " MEMLEA(0x10, 0) ",%0          \n"
     30     "movdqa    " MEMACCESS(1) ",%%xmm2         \n"
     31     "lea       " MEMLEA(0x10, 1) ",%1          \n"
     32     "sub       $0x10,%2                        \n"
     33     "movdqa    %%xmm1,%%xmm3                   \n"
     34     "psubusb   %%xmm2,%%xmm1                   \n"
     35     "psubusb   %%xmm3,%%xmm2                   \n"
     36     "por       %%xmm2,%%xmm1                   \n"
     37     "movdqa    %%xmm1,%%xmm2                   \n"
     38     "punpcklbw %%xmm5,%%xmm1                   \n"
     39     "punpckhbw %%xmm5,%%xmm2                   \n"
     40     "pmaddwd   %%xmm1,%%xmm1                   \n"
     41     "pmaddwd   %%xmm2,%%xmm2                   \n"
     42     "paddd     %%xmm1,%%xmm0                   \n"
     43     "paddd     %%xmm2,%%xmm0                   \n"
     44     "jg        1b                              \n"
     45 
     46     "pshufd    $0xee,%%xmm0,%%xmm1             \n"
     47     "paddd     %%xmm1,%%xmm0                   \n"
     48     "pshufd    $0x1,%%xmm0,%%xmm1              \n"
     49     "paddd     %%xmm1,%%xmm0                   \n"
     50     "movd      %%xmm0,%3                       \n"
     51 
     52   : "+r"(src_a),      // %0
     53     "+r"(src_b),      // %1
     54     "+r"(count),      // %2
     55     "=g"(sse)         // %3
     56   :
     57   : "memory", "cc"
     58 #if defined(__SSE2__)
     59     , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
     60 #endif
     61   );  // NOLINT
     62   return sse;
     63 }
     64 
     65 #endif  // defined(__x86_64__) || defined(__i386__)
     66 
     67 #if !defined(LIBYUV_DISABLE_X86) && \
     68     (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
     69 #define HAS_HASHDJB2_SSE41
     70 static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
     71 static uvec32 kHashMul0 = {
     72   0x0c3525e1,  // 33 ^ 15
     73   0xa3476dc1,  // 33 ^ 14
     74   0x3b4039a1,  // 33 ^ 13
     75   0x4f5f0981,  // 33 ^ 12
     76 };
     77 static uvec32 kHashMul1 = {
     78   0x30f35d61,  // 33 ^ 11
     79   0x855cb541,  // 33 ^ 10
     80   0x040a9121,  // 33 ^ 9
     81   0x747c7101,  // 33 ^ 8
     82 };
     83 static uvec32 kHashMul2 = {
     84   0xec41d4e1,  // 33 ^ 7
     85   0x4cfa3cc1,  // 33 ^ 6
     86   0x025528a1,  // 33 ^ 5
     87   0x00121881,  // 33 ^ 4
     88 };
     89 static uvec32 kHashMul3 = {
     90   0x00008c61,  // 33 ^ 3
     91   0x00000441,  // 33 ^ 2
     92   0x00000021,  // 33 ^ 1
     93   0x00000001,  // 33 ^ 0
     94 };
     95 
     96 uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
     97   uint32 hash;
     98   asm volatile (  // NOLINT
     99     "movd      %2,%%xmm0                       \n"
    100     "pxor      %%xmm7,%%xmm7                   \n"
    101     "movdqa    %4,%%xmm6                       \n"
    102     LABELALIGN
    103   "1:                                          \n"
    104     "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
    105     "lea       " MEMLEA(0x10, 0) ",%0          \n"
    106     "pmulld    %%xmm6,%%xmm0                   \n"
    107     "movdqa    %5,%%xmm5                       \n"
    108     "movdqa    %%xmm1,%%xmm2                   \n"
    109     "punpcklbw %%xmm7,%%xmm2                   \n"
    110     "movdqa    %%xmm2,%%xmm3                   \n"
    111     "punpcklwd %%xmm7,%%xmm3                   \n"
    112     "pmulld    %%xmm5,%%xmm3                   \n"
    113     "movdqa    %6,%%xmm5                       \n"
    114     "movdqa    %%xmm2,%%xmm4                   \n"
    115     "punpckhwd %%xmm7,%%xmm4                   \n"
    116     "pmulld    %%xmm5,%%xmm4                   \n"
    117     "movdqa    %7,%%xmm5                       \n"
    118     "punpckhbw %%xmm7,%%xmm1                   \n"
    119     "movdqa    %%xmm1,%%xmm2                   \n"
    120     "punpcklwd %%xmm7,%%xmm2                   \n"
    121     "pmulld    %%xmm5,%%xmm2                   \n"
    122     "movdqa    %8,%%xmm5                       \n"
    123     "punpckhwd %%xmm7,%%xmm1                   \n"
    124     "pmulld    %%xmm5,%%xmm1                   \n"
    125     "paddd     %%xmm4,%%xmm3                   \n"
    126     "paddd     %%xmm2,%%xmm1                   \n"
    127     "sub       $0x10,%1                        \n"
    128     "paddd     %%xmm3,%%xmm1                   \n"
    129     "pshufd    $0xe,%%xmm1,%%xmm2              \n"
    130     "paddd     %%xmm2,%%xmm1                   \n"
    131     "pshufd    $0x1,%%xmm1,%%xmm2              \n"
    132     "paddd     %%xmm2,%%xmm1                   \n"
    133     "paddd     %%xmm1,%%xmm0                   \n"
    134     "jg        1b                              \n"
    135     "movd      %%xmm0,%3                       \n"
    136   : "+r"(src),        // %0
    137     "+r"(count),      // %1
    138     "+rm"(seed),      // %2
    139     "=g"(hash)        // %3
    140   : "m"(kHash16x33),  // %4
    141     "m"(kHashMul0),   // %5
    142     "m"(kHashMul1),   // %6
    143     "m"(kHashMul2),   // %7
    144     "m"(kHashMul3)    // %8
    145   : "memory", "cc"
    146 #if defined(__SSE2__)
    147     , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
    148 #endif
    149   );  // NOLINT
    150   return hash;
    151 }
    152 #endif  // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
    153 
    154 #ifdef __cplusplus
    155 }  // extern "C"
    156 }  // namespace libyuv
    157 #endif
    158 
    159