1 /////////////////////////////////////////////////////////////////////////////////// 2 /// OpenGL Mathematics (glm.g-truc.net) 3 /// 4 /// Copyright (c) 2005 - 2012 G-Truc Creation (www.g-truc.net) 5 /// Permission is hereby granted, free of charge, to any person obtaining a copy 6 /// of this software and associated documentation files (the "Software"), to deal 7 /// in the Software without restriction, including without limitation the rights 8 /// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 /// copies of the Software, and to permit persons to whom the Software is 10 /// furnished to do so, subject to the following conditions: 11 /// 12 /// The above copyright notice and this permission notice shall be included in 13 /// all copies or substantial portions of the Software. 14 /// 15 /// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 /// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 /// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 /// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 /// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 /// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 /// THE SOFTWARE. 22 /// 23 /// @ref core 24 /// @file glm/core/intrinsic_integer.inl 25 /// @date 2009-05-08 / 2011-06-15 26 /// @author Christophe Riccio 27 /////////////////////////////////////////////////////////////////////////////////// 28 29 namespace glm{ 30 namespace detail 31 { 32 inline __m128i _mm_bit_interleave_si128(__m128i x) 33 { 34 __m128i const Mask4 = _mm_set1_epi32(0x0000FFFF); 35 __m128i const Mask3 = _mm_set1_epi32(0x00FF00FF); 36 __m128i const Mask2 = _mm_set1_epi32(0x0F0F0F0F); 37 __m128i const Mask1 = _mm_set1_epi32(0x33333333); 38 __m128i const Mask0 = _mm_set1_epi32(0x55555555); 39 40 __m128i Reg1; 41 __m128i Reg2; 42 43 // REG1 = x; 44 // REG2 = y; 45 //Reg1 = _mm_unpacklo_epi64(x, y); 46 Reg1 = x; 47 48 //REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF); 49 //REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF); 50 Reg2 = _mm_slli_si128(Reg1, 2); 51 Reg1 = _mm_or_si128(Reg2, Reg1); 52 Reg1 = _mm_and_si128(Reg1, Mask4); 53 54 //REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x00FF00FF00FF00FF); 55 //REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x00FF00FF00FF00FF); 56 Reg2 = _mm_slli_si128(Reg1, 1); 57 Reg1 = _mm_or_si128(Reg2, Reg1); 58 Reg1 = _mm_and_si128(Reg1, Mask3); 59 60 //REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F); 61 //REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F); 62 Reg2 = _mm_slli_epi32(Reg1, 4); 63 Reg1 = _mm_or_si128(Reg2, Reg1); 64 Reg1 = _mm_and_si128(Reg1, Mask2); 65 66 //REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x3333333333333333); 67 //REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x3333333333333333); 68 Reg2 = _mm_slli_epi32(Reg1, 2); 69 Reg1 = _mm_or_si128(Reg2, Reg1); 70 Reg1 = _mm_and_si128(Reg1, Mask1); 71 72 //REG1 = ((REG1 << 1) | REG1) & glm::uint64(0x5555555555555555); 73 //REG2 = ((REG2 << 1) | REG2) & glm::uint64(0x5555555555555555); 74 Reg2 = _mm_slli_epi32(Reg1, 1); 75 Reg1 = _mm_or_si128(Reg2, Reg1); 76 Reg1 = _mm_and_si128(Reg1, Mask0); 77 78 //return REG1 | (REG2 << 1); 79 Reg2 = _mm_slli_epi32(Reg1, 1); 80 Reg2 = _mm_srli_si128(Reg2, 8); 81 Reg1 = _mm_or_si128(Reg1, Reg2); 82 83 return Reg1; 84 } 85 86 inline __m128i _mm_bit_interleave_si128(__m128i x, __m128i y) 87 { 88 __m128i const Mask4 = _mm_set1_epi32(0x0000FFFF); 89 __m128i const Mask3 = _mm_set1_epi32(0x00FF00FF); 90 __m128i const Mask2 = _mm_set1_epi32(0x0F0F0F0F); 91 __m128i const Mask1 = _mm_set1_epi32(0x33333333); 92 __m128i const Mask0 = _mm_set1_epi32(0x55555555); 93 94 __m128i Reg1; 95 __m128i Reg2; 96 97 // REG1 = x; 98 // REG2 = y; 99 Reg1 = _mm_unpacklo_epi64(x, y); 100 101 //REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF); 102 //REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF); 103 Reg2 = _mm_slli_si128(Reg1, 2); 104 Reg1 = _mm_or_si128(Reg2, Reg1); 105 Reg1 = _mm_and_si128(Reg1, Mask4); 106 107 //REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x00FF00FF00FF00FF); 108 //REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x00FF00FF00FF00FF); 109 Reg2 = _mm_slli_si128(Reg1, 1); 110 Reg1 = _mm_or_si128(Reg2, Reg1); 111 Reg1 = _mm_and_si128(Reg1, Mask3); 112 113 //REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F); 114 //REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F); 115 Reg2 = _mm_slli_epi32(Reg1, 4); 116 Reg1 = _mm_or_si128(Reg2, Reg1); 117 Reg1 = _mm_and_si128(Reg1, Mask2); 118 119 //REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x3333333333333333); 120 //REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x3333333333333333); 121 Reg2 = _mm_slli_epi32(Reg1, 2); 122 Reg1 = _mm_or_si128(Reg2, Reg1); 123 Reg1 = _mm_and_si128(Reg1, Mask1); 124 125 //REG1 = ((REG1 << 1) | REG1) & glm::uint64(0x5555555555555555); 126 //REG2 = ((REG2 << 1) | REG2) & glm::uint64(0x5555555555555555); 127 Reg2 = _mm_slli_epi32(Reg1, 1); 128 Reg1 = _mm_or_si128(Reg2, Reg1); 129 Reg1 = _mm_and_si128(Reg1, Mask0); 130 131 //return REG1 | (REG2 << 1); 132 Reg2 = _mm_slli_epi32(Reg1, 1); 133 Reg2 = _mm_srli_si128(Reg2, 8); 134 Reg1 = _mm_or_si128(Reg1, Reg2); 135 136 return Reg1; 137 } 138 }//namespace detail 139 }//namespace glms 140