Home | History | Annotate | Download | only in test
      1 /*
      2  * This file derives from SFMT 1.3.3
      3  * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was
      4  * released under the terms of the following license:
      5  *
      6  *   Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
      7  *   University. All rights reserved.
      8  *
      9  *   Redistribution and use in source and binary forms, with or without
     10  *   modification, are permitted provided that the following conditions are
     11  *   met:
     12  *
     13  *       * Redistributions of source code must retain the above copyright
     14  *         notice, this list of conditions and the following disclaimer.
     15  *       * Redistributions in binary form must reproduce the above
     16  *         copyright notice, this list of conditions and the following
     17  *         disclaimer in the documentation and/or other materials provided
     18  *         with the distribution.
     19  *       * Neither the name of the Hiroshima University nor the names of
     20  *         its contributors may be used to endorse or promote products
     21  *         derived from this software without specific prior written
     22  *         permission.
     23  *
     24  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     25  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     26  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     27  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     28  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     29  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     30  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     31  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     32  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     33  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     34  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     35  */
     36 /**
     37  * @file  SFMT-sse2.h
     38  * @brief SIMD oriented Fast Mersenne Twister(SFMT) for Intel SSE2
     39  *
     40  * @author Mutsuo Saito (Hiroshima University)
     41  * @author Makoto Matsumoto (Hiroshima University)
     42  *
     43  * @note We assume LITTLE ENDIAN in this file
     44  *
     45  * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
     46  * University. All rights reserved.
     47  *
     48  * The new BSD License is applied to this software, see LICENSE.txt
     49  */
     50 
     51 #ifndef SFMT_SSE2_H
     52 #define SFMT_SSE2_H
     53 
     54 /**
     55  * This function represents the recursion formula.
     56  * @param a a 128-bit part of the interal state array
     57  * @param b a 128-bit part of the interal state array
     58  * @param c a 128-bit part of the interal state array
     59  * @param d a 128-bit part of the interal state array
     60  * @param mask 128-bit mask
     61  * @return output
     62  */
     63 JEMALLOC_ALWAYS_INLINE __m128i mm_recursion(__m128i *a, __m128i *b,
     64 				   __m128i c, __m128i d, __m128i mask) {
     65     __m128i v, x, y, z;
     66 
     67     x = _mm_load_si128(a);
     68     y = _mm_srli_epi32(*b, SR1);
     69     z = _mm_srli_si128(c, SR2);
     70     v = _mm_slli_epi32(d, SL1);
     71     z = _mm_xor_si128(z, x);
     72     z = _mm_xor_si128(z, v);
     73     x = _mm_slli_si128(x, SL2);
     74     y = _mm_and_si128(y, mask);
     75     z = _mm_xor_si128(z, x);
     76     z = _mm_xor_si128(z, y);
     77     return z;
     78 }
     79 
     80 /**
     81  * This function fills the internal state array with pseudorandom
     82  * integers.
     83  */
     84 JEMALLOC_INLINE void gen_rand_all(sfmt_t *ctx) {
     85     int i;
     86     __m128i r, r1, r2, mask;
     87     mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
     88 
     89     r1 = _mm_load_si128(&ctx->sfmt[N - 2].si);
     90     r2 = _mm_load_si128(&ctx->sfmt[N - 1].si);
     91     for (i = 0; i < N - POS1; i++) {
     92 	r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2,
     93 	  mask);
     94 	_mm_store_si128(&ctx->sfmt[i].si, r);
     95 	r1 = r2;
     96 	r2 = r;
     97     }
     98     for (; i < N; i++) {
     99 	r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1 - N].si, r1, r2,
    100 	  mask);
    101 	_mm_store_si128(&ctx->sfmt[i].si, r);
    102 	r1 = r2;
    103 	r2 = r;
    104     }
    105 }
    106 
    107 /**
    108  * This function fills the user-specified array with pseudorandom
    109  * integers.
    110  *
    111  * @param array an 128-bit array to be filled by pseudorandom numbers.
    112  * @param size number of 128-bit pesudorandom numbers to be generated.
    113  */
    114 JEMALLOC_INLINE void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
    115     int i, j;
    116     __m128i r, r1, r2, mask;
    117     mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
    118 
    119     r1 = _mm_load_si128(&ctx->sfmt[N - 2].si);
    120     r2 = _mm_load_si128(&ctx->sfmt[N - 1].si);
    121     for (i = 0; i < N - POS1; i++) {
    122 	r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2,
    123 	  mask);
    124 	_mm_store_si128(&array[i].si, r);
    125 	r1 = r2;
    126 	r2 = r;
    127     }
    128     for (; i < N; i++) {
    129 	r = mm_recursion(&ctx->sfmt[i].si, &array[i + POS1 - N].si, r1, r2,
    130 	  mask);
    131 	_mm_store_si128(&array[i].si, r);
    132 	r1 = r2;
    133 	r2 = r;
    134     }
    135     /* main loop */
    136     for (; i < size - N; i++) {
    137 	r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2,
    138 			 mask);
    139 	_mm_store_si128(&array[i].si, r);
    140 	r1 = r2;
    141 	r2 = r;
    142     }
    143     for (j = 0; j < 2 * N - size; j++) {
    144 	r = _mm_load_si128(&array[j + size - N].si);
    145 	_mm_store_si128(&ctx->sfmt[j].si, r);
    146     }
    147     for (; i < size; i++) {
    148 	r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2,
    149 			 mask);
    150 	_mm_store_si128(&array[i].si, r);
    151 	_mm_store_si128(&ctx->sfmt[j++].si, r);
    152 	r1 = r2;
    153 	r2 = r;
    154     }
    155 }
    156 
    157 #endif
    158