Home | History | Annotate | Download | only in detail
      1 ///////////////////////////////////////////////////////////////////////////////////
      2 /// OpenGL Mathematics (glm.g-truc.net)
      3 ///
      4 /// Copyright (c) 2005 - 2012 G-Truc Creation (www.g-truc.net)
      5 /// Permission is hereby granted, free of charge, to any person obtaining a copy
      6 /// of this software and associated documentation files (the "Software"), to deal
      7 /// in the Software without restriction, including without limitation the rights
      8 /// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      9 /// copies of the Software, and to permit persons to whom the Software is
     10 /// furnished to do so, subject to the following conditions:
     11 /// 
     12 /// The above copyright notice and this permission notice shall be included in
     13 /// all copies or substantial portions of the Software.
     14 /// 
     15 /// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 /// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 /// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     18 /// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 /// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20 /// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     21 /// THE SOFTWARE.
     22 ///
     23 /// @ref core
     24 /// @file glm/core/intrinsic_integer.inl
     25 /// @date 2009-05-08 / 2011-06-15
     26 /// @author Christophe Riccio
     27 ///////////////////////////////////////////////////////////////////////////////////
     28 
     29 namespace glm{
     30 namespace detail
     31 {
     32 	inline __m128i _mm_bit_interleave_si128(__m128i x)
     33 	{
     34 		__m128i const Mask4 = _mm_set1_epi32(0x0000FFFF);
     35 		__m128i const Mask3 = _mm_set1_epi32(0x00FF00FF);
     36 		__m128i const Mask2 = _mm_set1_epi32(0x0F0F0F0F);
     37 		__m128i const Mask1 = _mm_set1_epi32(0x33333333);
     38 		__m128i const Mask0 = _mm_set1_epi32(0x55555555);
     39 
     40 		__m128i Reg1;
     41 		__m128i Reg2;
     42 
     43 		// REG1 = x;
     44 		// REG2 = y;
     45 		//Reg1 = _mm_unpacklo_epi64(x, y);
     46 		Reg1 = x;
     47 
     48 		//REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF);
     49 		//REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF);
     50 		Reg2 = _mm_slli_si128(Reg1, 2);
     51 		Reg1 = _mm_or_si128(Reg2, Reg1);
     52 		Reg1 = _mm_and_si128(Reg1, Mask4);
     53 
     54 		//REG1 = ((REG1 <<  8) | REG1) & glm::uint64(0x00FF00FF00FF00FF);
     55 		//REG2 = ((REG2 <<  8) | REG2) & glm::uint64(0x00FF00FF00FF00FF);
     56 		Reg2 = _mm_slli_si128(Reg1, 1);
     57 		Reg1 = _mm_or_si128(Reg2, Reg1);
     58 		Reg1 = _mm_and_si128(Reg1, Mask3);
     59 
     60 		//REG1 = ((REG1 <<  4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F);
     61 		//REG2 = ((REG2 <<  4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F);
     62 		Reg2 = _mm_slli_epi32(Reg1, 4);
     63 		Reg1 = _mm_or_si128(Reg2, Reg1);
     64 		Reg1 = _mm_and_si128(Reg1, Mask2);
     65 
     66 		//REG1 = ((REG1 <<  2) | REG1) & glm::uint64(0x3333333333333333);
     67 		//REG2 = ((REG2 <<  2) | REG2) & glm::uint64(0x3333333333333333);
     68 		Reg2 = _mm_slli_epi32(Reg1, 2);
     69 		Reg1 = _mm_or_si128(Reg2, Reg1);
     70 		Reg1 = _mm_and_si128(Reg1, Mask1);
     71 
     72 		//REG1 = ((REG1 <<  1) | REG1) & glm::uint64(0x5555555555555555);
     73 		//REG2 = ((REG2 <<  1) | REG2) & glm::uint64(0x5555555555555555);
     74 		Reg2 = _mm_slli_epi32(Reg1, 1);
     75 		Reg1 = _mm_or_si128(Reg2, Reg1);
     76 		Reg1 = _mm_and_si128(Reg1, Mask0);
     77 
     78 		//return REG1 | (REG2 << 1);
     79 		Reg2 = _mm_slli_epi32(Reg1, 1);
     80 		Reg2 = _mm_srli_si128(Reg2, 8);
     81 		Reg1 = _mm_or_si128(Reg1, Reg2);
     82 	
     83 		return Reg1;
     84 	}
     85 
     86 	inline __m128i _mm_bit_interleave_si128(__m128i x, __m128i y)
     87 	{
     88 		__m128i const Mask4 = _mm_set1_epi32(0x0000FFFF);
     89 		__m128i const Mask3 = _mm_set1_epi32(0x00FF00FF);
     90 		__m128i const Mask2 = _mm_set1_epi32(0x0F0F0F0F);
     91 		__m128i const Mask1 = _mm_set1_epi32(0x33333333);
     92 		__m128i const Mask0 = _mm_set1_epi32(0x55555555);
     93 
     94 		__m128i Reg1;
     95 		__m128i Reg2;
     96 
     97 		// REG1 = x;
     98 		// REG2 = y;
     99 		Reg1 = _mm_unpacklo_epi64(x, y);
    100 
    101 		//REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF);
    102 		//REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF);
    103 		Reg2 = _mm_slli_si128(Reg1, 2);
    104 		Reg1 = _mm_or_si128(Reg2, Reg1);
    105 		Reg1 = _mm_and_si128(Reg1, Mask4);
    106 
    107 		//REG1 = ((REG1 <<  8) | REG1) & glm::uint64(0x00FF00FF00FF00FF);
    108 		//REG2 = ((REG2 <<  8) | REG2) & glm::uint64(0x00FF00FF00FF00FF);
    109 		Reg2 = _mm_slli_si128(Reg1, 1);
    110 		Reg1 = _mm_or_si128(Reg2, Reg1);
    111 		Reg1 = _mm_and_si128(Reg1, Mask3);
    112 
    113 		//REG1 = ((REG1 <<  4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F);
    114 		//REG2 = ((REG2 <<  4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F);
    115 		Reg2 = _mm_slli_epi32(Reg1, 4);
    116 		Reg1 = _mm_or_si128(Reg2, Reg1);
    117 		Reg1 = _mm_and_si128(Reg1, Mask2);
    118 
    119 		//REG1 = ((REG1 <<  2) | REG1) & glm::uint64(0x3333333333333333);
    120 		//REG2 = ((REG2 <<  2) | REG2) & glm::uint64(0x3333333333333333);
    121 		Reg2 = _mm_slli_epi32(Reg1, 2);
    122 		Reg1 = _mm_or_si128(Reg2, Reg1);
    123 		Reg1 = _mm_and_si128(Reg1, Mask1);
    124 
    125 		//REG1 = ((REG1 <<  1) | REG1) & glm::uint64(0x5555555555555555);
    126 		//REG2 = ((REG2 <<  1) | REG2) & glm::uint64(0x5555555555555555);
    127 		Reg2 = _mm_slli_epi32(Reg1, 1);
    128 		Reg1 = _mm_or_si128(Reg2, Reg1);
    129 		Reg1 = _mm_and_si128(Reg1, Mask0);
    130 
    131 		//return REG1 | (REG2 << 1);
    132 		Reg2 = _mm_slli_epi32(Reg1, 1);
    133 		Reg2 = _mm_srli_si128(Reg2, 8);
    134 		Reg1 = _mm_or_si128(Reg1, Reg2);
    135 	
    136 		return Reg1;
    137 	}
    138 }//namespace detail
    139 }//namespace glms
    140