Home | History | Annotate | Download | only in desktop_capture
      1 /*
      2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "webrtc/modules/desktop_capture/differ_block_sse2.h"
     12 
     13 #if defined(_MSC_VER)
     14 #include <intrin.h>
     15 #else
     16 #include <mmintrin.h>
     17 #include <emmintrin.h>
     18 #endif
     19 
     20 #include "webrtc/modules/desktop_capture/differ_block.h"
     21 
     22 namespace webrtc {
     23 
     24 extern bool BlockDifference_SSE2_W16(const uint8_t* image1,
     25                                      const uint8_t* image2,
     26                                      int stride) {
     27   __m128i acc = _mm_setzero_si128();
     28   __m128i v0;
     29   __m128i v1;
     30   __m128i sad;
     31   for (int y = 0; y < kBlockSize; ++y) {
     32     const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
     33     const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
     34     v0 = _mm_loadu_si128(i1);
     35     v1 = _mm_loadu_si128(i2);
     36     sad = _mm_sad_epu8(v0, v1);
     37     acc = _mm_adds_epu16(acc, sad);
     38     v0 = _mm_loadu_si128(i1 + 1);
     39     v1 = _mm_loadu_si128(i2 + 1);
     40     sad = _mm_sad_epu8(v0, v1);
     41     acc = _mm_adds_epu16(acc, sad);
     42     v0 = _mm_loadu_si128(i1 + 2);
     43     v1 = _mm_loadu_si128(i2 + 2);
     44     sad = _mm_sad_epu8(v0, v1);
     45     acc = _mm_adds_epu16(acc, sad);
     46     v0 = _mm_loadu_si128(i1 + 3);
     47     v1 = _mm_loadu_si128(i2 + 3);
     48     sad = _mm_sad_epu8(v0, v1);
     49     acc = _mm_adds_epu16(acc, sad);
     50 
     51     // This essential means sad = acc >> 64. We only care about the lower 16
     52     // bits.
     53     sad = _mm_shuffle_epi32(acc, 0xEE);
     54     sad = _mm_adds_epu16(sad, acc);
     55     int diff = _mm_cvtsi128_si32(sad);
     56     if (diff)
     57       return true;
     58     image1 += stride;
     59     image2 += stride;
     60   }
     61   return false;
     62 }
     63 
     64 extern bool BlockDifference_SSE2_W32(const uint8_t* image1,
     65                                      const uint8_t* image2,
     66                                      int stride) {
     67   __m128i acc = _mm_setzero_si128();
     68   __m128i v0;
     69   __m128i v1;
     70   __m128i sad;
     71   for (int y = 0; y < kBlockSize; ++y) {
     72     const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
     73     const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
     74     v0 = _mm_loadu_si128(i1);
     75     v1 = _mm_loadu_si128(i2);
     76     sad = _mm_sad_epu8(v0, v1);
     77     acc = _mm_adds_epu16(acc, sad);
     78     v0 = _mm_loadu_si128(i1 + 1);
     79     v1 = _mm_loadu_si128(i2 + 1);
     80     sad = _mm_sad_epu8(v0, v1);
     81     acc = _mm_adds_epu16(acc, sad);
     82     v0 = _mm_loadu_si128(i1 + 2);
     83     v1 = _mm_loadu_si128(i2 + 2);
     84     sad = _mm_sad_epu8(v0, v1);
     85     acc = _mm_adds_epu16(acc, sad);
     86     v0 = _mm_loadu_si128(i1 + 3);
     87     v1 = _mm_loadu_si128(i2 + 3);
     88     sad = _mm_sad_epu8(v0, v1);
     89     acc = _mm_adds_epu16(acc, sad);
     90     v0 = _mm_loadu_si128(i1 + 4);
     91     v1 = _mm_loadu_si128(i2 + 4);
     92     sad = _mm_sad_epu8(v0, v1);
     93     acc = _mm_adds_epu16(acc, sad);
     94     v0 = _mm_loadu_si128(i1 + 5);
     95     v1 = _mm_loadu_si128(i2 + 5);
     96     sad = _mm_sad_epu8(v0, v1);
     97     acc = _mm_adds_epu16(acc, sad);
     98     v0 = _mm_loadu_si128(i1 + 6);
     99     v1 = _mm_loadu_si128(i2 + 6);
    100     sad = _mm_sad_epu8(v0, v1);
    101     acc = _mm_adds_epu16(acc, sad);
    102     v0 = _mm_loadu_si128(i1 + 7);
    103     v1 = _mm_loadu_si128(i2 + 7);
    104     sad = _mm_sad_epu8(v0, v1);
    105     acc = _mm_adds_epu16(acc, sad);
    106 
    107     // This essential means sad = acc >> 64. We only care about the lower 16
    108     // bits.
    109     sad = _mm_shuffle_epi32(acc, 0xEE);
    110     sad = _mm_adds_epu16(sad, acc);
    111     int diff = _mm_cvtsi128_si32(sad);
    112     if (diff)
    113       return true;
    114     image1 += stride;
    115     image2 += stride;
    116   }
    117   return false;
    118 }
    119 
    120 }  // namespace webrtc
    121