Home | History | Annotate | Download | only in ext
      1 // Optimizations for random number extensions, x86 version -*- C++ -*-
      2 
      3 // Copyright (C) 2012-2013 Free Software Foundation, Inc.
      4 //
      5 // This file is part of the GNU ISO C++ Library.  This library is free
      6 // software; you can redistribute it and/or modify it under the
      7 // terms of the GNU General Public License as published by the
      8 // Free Software Foundation; either version 3, or (at your option)
      9 // any later version.
     10 
     11 // This library is distributed in the hope that it will be useful,
     12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 // GNU General Public License for more details.
     15 
     16 // Under Section 7 of GPL version 3, you are granted additional
     17 // permissions described in the GCC Runtime Library Exception, version
     18 // 3.1, as published by the Free Software Foundation.
     19 
     20 // You should have received a copy of the GNU General Public License and
     21 // a copy of the GCC Runtime Library Exception along with this program;
     22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     23 // <http://www.gnu.org/licenses/>.
     24 
     25 /** @file ext/random.tcc
     26  *  This is an internal header file, included by other library headers.
     27  *  Do not attempt to use it directly. @headername{ext/random}
     28  */
     29 
     30 #ifndef _EXT_OPT_RANDOM_H
     31 #define _EXT_OPT_RANDOM_H 1
     32 
     33 #pragma GCC system_header
     34 
     35 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
     36 
     37 #ifdef __SSE2__
     38 
     39 namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
     40 {
     41 _GLIBCXX_BEGIN_NAMESPACE_VERSION
     42 
     43   namespace {
     44 
     45     template<size_t __sl1, size_t __sl2, size_t __sr1, size_t __sr2,
     46 	     uint32_t __msk1, uint32_t __msk2, uint32_t __msk3, uint32_t __msk4>
     47       inline __m128i __sse2_recursion(__m128i __a, __m128i __b,
     48 				      __m128i __c, __m128i __d)
     49       {
     50 	__m128i __y = _mm_srli_epi32(__b, __sr1);
     51 	__m128i __z = _mm_srli_si128(__c, __sr2);
     52 	__m128i __v = _mm_slli_epi32(__d, __sl1);
     53 	__z = _mm_xor_si128(__z, __a);
     54 	__z = _mm_xor_si128(__z, __v);
     55 	__m128i __x = _mm_slli_si128(__a, __sl2);
     56 	__y = _mm_and_si128(__y, _mm_set_epi32(__msk4, __msk3, __msk2, __msk1));
     57 	__z = _mm_xor_si128(__z, __x);
     58 	return _mm_xor_si128(__z, __y);
     59       }
     60 
     61   }
     62 
     63 
     64 #define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_GEN_READ	1
     65   template<typename _UIntType, size_t __m,
     66 	   size_t __pos1, size_t __sl1, size_t __sl2,
     67 	   size_t __sr1, size_t __sr2,
     68 	   uint32_t __msk1, uint32_t __msk2,
     69 	   uint32_t __msk3, uint32_t __msk4,
     70 	   uint32_t __parity1, uint32_t __parity2,
     71 	   uint32_t __parity3, uint32_t __parity4>
     72     void simd_fast_mersenne_twister_engine<_UIntType, __m,
     73 					   __pos1, __sl1, __sl2, __sr1, __sr2,
     74 					   __msk1, __msk2, __msk3, __msk4,
     75 					   __parity1, __parity2, __parity3,
     76 					   __parity4>::
     77     _M_gen_rand(void)
     78     {
     79       __m128i __r1 = _mm_load_si128(&_M_state[_M_nstate - 2]);
     80       __m128i __r2 = _mm_load_si128(&_M_state[_M_nstate - 1]);
     81 
     82       size_t __i;
     83       for (__i = 0; __i < _M_nstate - __pos1; ++__i)
     84 	{
     85 	  __m128i __r = __sse2_recursion<__sl1, __sl2, __sr1, __sr2,
     86 					 __msk1, __msk2, __msk3, __msk4>
     87 	    (_M_state[__i], _M_state[__i + __pos1], __r1, __r2);
     88 	  _mm_store_si128(&_M_state[__i], __r);
     89 	  __r1 = __r2;
     90 	  __r2 = __r;
     91 	}
     92       for (; __i < _M_nstate; ++__i)
     93 	{
     94 	  __m128i __r = __sse2_recursion<__sl1, __sl2, __sr1, __sr2,
     95 					 __msk1, __msk2, __msk3, __msk4>
     96 	    (_M_state[__i], _M_state[__i + __pos1 - _M_nstate], __r1, __r2);
     97 	  _mm_store_si128(&_M_state[__i], __r);
     98 	  __r1 = __r2;
     99 	  __r2 = __r;
    100 	}
    101 
    102       _M_pos = 0;
    103     }
    104 
    105 
    106 #define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_OPERATOREQUAL	1
    107   template<typename _UIntType, size_t __m,
    108 	   size_t __pos1, size_t __sl1, size_t __sl2,
    109 	   size_t __sr1, size_t __sr2,
    110 	   uint32_t __msk1, uint32_t __msk2,
    111 	   uint32_t __msk3, uint32_t __msk4,
    112 	   uint32_t __parity1, uint32_t __parity2,
    113 	   uint32_t __parity3, uint32_t __parity4>
    114     bool
    115     operator==(const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
    116 	       __m, __pos1, __sl1, __sl2, __sr1, __sr2,
    117 	       __msk1, __msk2, __msk3, __msk4,
    118 	       __parity1, __parity2, __parity3, __parity4>& __lhs,
    119 	       const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
    120 	       __m, __pos1, __sl1, __sl2, __sr1, __sr2,
    121 	       __msk1, __msk2, __msk3, __msk4,
    122 	       __parity1, __parity2, __parity3, __parity4>& __rhs)
    123     {
    124       __m128i __res = _mm_cmpeq_epi8(__lhs._M_state[0], __rhs._M_state[0]);
    125       for (size_t __i = 1; __i < __lhs._M_nstate; ++__i)
    126 	__res = _mm_and_si128(__res, _mm_cmpeq_epi8(__lhs._M_state[__i],
    127 						    __rhs._M_state[__i]));
    128       return (_mm_movemask_epi8(__res) == 0xffff
    129 	      && __lhs._M_pos == __rhs._M_pos);
    130     }
    131 
    132 
    133 _GLIBCXX_END_NAMESPACE_VERSION
    134 } // namespace
    135 
    136 #endif // __SSE2__
    137 
    138 #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    139 
    140 #endif // _EXT_OPT_RANDOM_H
    141