Home | History | Annotate | Download | only in renderer
      1 #include "precompiled.h"
      2 //
      3 // Copyright (c) 2002-2012 The ANGLE Project Authors. All rights reserved.
      4 // Use of this source code is governed by a BSD-style license that can be
      5 // found in the LICENSE file.
      6 //
      7 
      8 // ImageSSE2.cpp: Implements SSE2-based functions of rx::Image class. It's
      9 // in a separated file for GCC, which can enable SSE usage only per-file,
     10 // not for code blocks that use SSE2 explicitly.
     11 
     12 #include "libGLESv2/Texture.h"
     13 #include "libGLESv2/renderer/Image.h"
     14 
     15 namespace rx
     16 {
     17 
     18 void Image::loadRGBAUByteDataToBGRASSE2(GLsizei width, GLsizei height,
     19                                         int inputPitch, const void *input, size_t outputPitch, void *output)
     20 {
     21     const unsigned int *source = NULL;
     22     unsigned int *dest = NULL;
     23     __m128i brMask = _mm_set1_epi32(0x00ff00ff);
     24 
     25     for (int y = 0; y < height; y++)
     26     {
     27         source = reinterpret_cast<const unsigned int*>(static_cast<const unsigned char*>(input) + y * inputPitch);
     28         dest = reinterpret_cast<unsigned int*>(static_cast<unsigned char*>(output) + y * outputPitch);
     29         int x = 0;
     30 
     31         // Make output writes aligned
     32         for (x = 0; ((reinterpret_cast<intptr_t>(&dest[x]) & 15) != 0) && x < width; x++)
     33         {
     34             unsigned int rgba = source[x];
     35             dest[x] = (_rotl(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
     36         }
     37 
     38         for (; x + 3 < width; x += 4)
     39         {
     40             __m128i sourceData = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&source[x]));
     41             // Mask out g and a, which don't change
     42             __m128i gaComponents = _mm_andnot_si128(brMask, sourceData);
     43             // Mask out b and r
     44             __m128i brComponents = _mm_and_si128(sourceData, brMask);
     45             // Swap b and r
     46             __m128i brSwapped = _mm_shufflehi_epi16(_mm_shufflelo_epi16(brComponents, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));
     47             __m128i result = _mm_or_si128(gaComponents, brSwapped);
     48             _mm_store_si128(reinterpret_cast<__m128i*>(&dest[x]), result);
     49         }
     50 
     51         // Perform leftover writes
     52         for (; x < width; x++)
     53         {
     54             unsigned int rgba = source[x];
     55             dest[x] = (_rotl(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
     56         }
     57     }
     58 }
     59 
     60 void Image::loadAlphaDataToBGRASSE2(GLsizei width, GLsizei height,
     61                                     int inputPitch, const void *input, size_t outputPitch, void *output)
     62 {
     63     const unsigned char *source = NULL;
     64     unsigned int *dest = NULL;
     65     __m128i zeroWide = _mm_setzero_si128();
     66 
     67     for (int y = 0; y < height; y++)
     68     {
     69         source = static_cast<const unsigned char*>(input) + y * inputPitch;
     70         dest = reinterpret_cast<unsigned int*>(static_cast<unsigned char*>(output) + y * outputPitch);
     71 
     72         int x;
     73         // Make output writes aligned
     74         for (x = 0; ((reinterpret_cast<intptr_t>(&dest[x]) & 0xF) != 0 && x < width); x++)
     75         {
     76             dest[x] = static_cast<unsigned int>(source[x]) << 24;
     77         }
     78 
     79         for (; x + 7 < width; x += 8)
     80         {
     81             __m128i sourceData = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(&source[x]));
     82             // Interleave each byte to 16bit, make the lower byte to zero
     83             sourceData = _mm_unpacklo_epi8(zeroWide, sourceData);
     84             // Interleave each 16bit to 32bit, make the lower 16bit to zero
     85             __m128i lo = _mm_unpacklo_epi16(zeroWide, sourceData);
     86             __m128i hi = _mm_unpackhi_epi16(zeroWide, sourceData);
     87 
     88             _mm_store_si128(reinterpret_cast<__m128i*>(&dest[x]), lo);
     89             _mm_store_si128(reinterpret_cast<__m128i*>(&dest[x + 4]), hi);
     90         }
     91 
     92         // Handle the remainder
     93         for (; x < width; x++)
     94         {
     95             dest[x] = static_cast<unsigned int>(source[x]) << 24;
     96         }
     97     }
     98 }
     99 
    100 }
    101