Home | History | Annotate | Download | only in renderer
      1 #include "precompiled.h"
      2 //
      3 // Copyright (c) 2002-2012 The ANGLE Project Authors. All rights reserved.
      4 // Use of this source code is governed by a BSD-style license that can be
      5 // found in the LICENSE file.
      6 //
      7 
      8 // loadimage.cpp: Defines image loading functions. It's
      9 // in a separated file for GCC, which can enable SSE usage only per-file,
     10 // not for code blocks that use SSE2 explicitly.
     11 
     12 #include "libGLESv2/renderer/loadimage.h"
     13 
     14 namespace rx
     15 {
     16 
     17     void loadAlphaDataToBGRASSE2(int width, int height, int depth,
     18         const void *input, unsigned int inputRowPitch, unsigned int inputDepthPitch,
     19         void *output, unsigned int outputRowPitch, unsigned int outputDepthPitch)
     20     {
     21         const unsigned char *source = NULL;
     22         unsigned int *dest = NULL;
     23         __m128i zeroWide = _mm_setzero_si128();
     24 
     25         for (int z = 0; z < depth; z++)
     26         {
     27             for (int y = 0; y < height; y++)
     28             {
     29                 source = static_cast<const unsigned char*>(input) + y * inputRowPitch + z * inputDepthPitch;
     30                 dest = reinterpret_cast<unsigned int*>(static_cast<unsigned char*>(output) + y * outputRowPitch + z * outputDepthPitch);
     31 
     32                 int x;
     33                 // Make output writes aligned
     34                 for (x = 0; ((reinterpret_cast<intptr_t>(&dest[x]) & 0xF) != 0 && x < width); x++)
     35                 {
     36                     dest[x] = static_cast<unsigned int>(source[x]) << 24;
     37                 }
     38 
     39                 for (; x + 7 < width; x += 8)
     40                 {
     41                     __m128i sourceData = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(&source[x]));
     42                     // Interleave each byte to 16bit, make the lower byte to zero
     43                     sourceData = _mm_unpacklo_epi8(zeroWide, sourceData);
     44                     // Interleave each 16bit to 32bit, make the lower 16bit to zero
     45                     __m128i lo = _mm_unpacklo_epi16(zeroWide, sourceData);
     46                     __m128i hi = _mm_unpackhi_epi16(zeroWide, sourceData);
     47 
     48                     _mm_store_si128(reinterpret_cast<__m128i*>(&dest[x]), lo);
     49                     _mm_store_si128(reinterpret_cast<__m128i*>(&dest[x + 4]), hi);
     50                 }
     51 
     52                 // Handle the remainder
     53                 for (; x < width; x++)
     54                 {
     55                     dest[x] = static_cast<unsigned int>(source[x]) << 24;
     56                 }
     57             }
     58         }
     59     }
     60 
     61     void loadRGBAUByteDataToBGRASSE2(int width, int height, int depth,
     62         const void *input, unsigned int inputRowPitch, unsigned int inputDepthPitch,
     63         void *output, unsigned int outputRowPitch, unsigned int outputDepthPitch)
     64     {
     65         const unsigned int *source = NULL;
     66         unsigned int *dest = NULL;
     67         __m128i brMask = _mm_set1_epi32(0x00ff00ff);
     68 
     69         for (int z = 0; z < depth; z++)
     70         {
     71             for (int y = 0; y < height; y++)
     72             {
     73                 source = reinterpret_cast<const unsigned int*>(static_cast<const unsigned char*>(input) + y * inputRowPitch + z * inputDepthPitch);
     74                 dest = reinterpret_cast<unsigned int*>(static_cast<unsigned char*>(output) + y * outputRowPitch + z * outputDepthPitch);
     75                 int x = 0;
     76 
     77                 // Make output writes aligned
     78                 for (x = 0; ((reinterpret_cast<intptr_t>(&dest[x]) & 15) != 0) && x < width; x++)
     79                 {
     80                     unsigned int rgba = source[x];
     81                     dest[x] = (_rotl(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
     82                 }
     83 
     84                 for (; x + 3 < width; x += 4)
     85                 {
     86                     __m128i sourceData = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&source[x]));
     87                     // Mask out g and a, which don't change
     88                     __m128i gaComponents = _mm_andnot_si128(brMask, sourceData);
     89                     // Mask out b and r
     90                     __m128i brComponents = _mm_and_si128(sourceData, brMask);
     91                     // Swap b and r
     92                     __m128i brSwapped = _mm_shufflehi_epi16(_mm_shufflelo_epi16(brComponents, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));
     93                     __m128i result = _mm_or_si128(gaComponents, brSwapped);
     94                     _mm_store_si128(reinterpret_cast<__m128i*>(&dest[x]), result);
     95                 }
     96 
     97                 // Perform leftover writes
     98                 for (; x < width; x++)
     99                 {
    100                     unsigned int rgba = source[x];
    101                     dest[x] = (_rotl(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
    102                 }
    103             }
    104         }
    105     }
    106 
    107 }
    108