swh:1:snp:4fe623f11066498dcc10ad18e781b92aa0cfdd0f
Raw File
Tip revision: d4e8232dbce07919f500e306f2d771f22113e17c authored by Christopher McFarland on 04 July 2019, 14:14:36 UTC
for quality test
Tip revision: d4e8232
MT19937.h
#pragma once
#include <unistd.h>
#include <time.h>
#ifndef _XOPEN_SOURCE
#define _XOPEN_SOURCE 600
#endif

/** New functionality:
 *
 *  __cycle__ (def: 500): defines the size of random number array in bytes*16. 
 *  It should be larger than 382, otherwise will SEG-FAULT. 
 *
 *	rand64_t: 64-bit type for access of random bytes
 *	dw128_t	: 128-bit type for SIMD instructions
 *
 *  mt_init(): initializes PRN generator. Seeds from CPU time, PID, and parent 
 *  PID. Can be called multiple times without any issues or overhead. 
 *
 *  wide_uniform()              -> 128 random bits.
 *  uniform_double_PRN()        -> uniform double PRN on domain [0, 1).
 *  rand_long(unsigned long n)  -> uniform unsigned long PRN on [0, n).
 *  rand_long64()               -> uniform unsigned long PRN on [2, 2^64).
 *
 *  static rand64_t Rand: a public random variable that points to the next 
 *  unused element in the random number array. This pointer allows for more
 *  rapid access of these numbers, but can SEG FAULT if used improperly. 
 *  After 64 random bits are used, this pointer should be incremented. Call
 *  MT_FLUSH() every two times this pointer is incremented to avoid a SEG FAULT.
 *
 *	MT_FLUSH(): repopulates array of random numbers when Rand is near end of 
 *	array and moves Rand back to beginning of array.  
 * 
 *  Defining the variable REPORT_PRNS will execute a command that reports the
 *  number of PRNs used during execution at exit. 
 * 
 *	Lastly, this code now uses 19937 as the default Mersenne Prime.
 *
 **/

#define __cycle__		500	

/** 
 * Original Documentation: 
 *
 *
 * @file SFMT.h 
 *
 * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom
 * number generator
 *
 * @author Mutsuo Saito (Hiroshima University)
 * @author Makoto Matsumoto (Hiroshima University)
 *
 * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
 * University. All rights reserved.
 *
 * The new BSD License is applied to this software.
 * see LICENSE.txt
 *
 * @note We assume that your system has inttypes.h.  If your system
 * doesn't have inttypes.h, you have to typedef uint32_t and uint64_t,
 * and you have to define PRIu64 and PRIx64 in this file as follows:
 * @verbatim
 typedef unsigned int uint32_t
 typedef unsigned long long uint64_t  
 #define PRIu64 "llu"
 #define PRIx64 "llx"
@endverbatim
 * uint32_t must be exactly 32-bit unsigned integer type (no more, no
 * less), and uint64_t must be exactly 64-bit unsigned integer type.
 * PRIu64 and PRIx64 are used for printf function to print 64-bit
 * unsigned int and 64-bit unsigned int in hexadecimal format.
 */
 
#ifndef SFMT_H
#define SFMT_H

#define MEXP	19937

#include <stdio.h>

#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
  #include <inttypes.h>
#elif defined(_MSC_VER) || defined(__BORLANDC__)
  typedef unsigned int uint32_t;
  typedef unsigned __int64 uint64_t;
  #define inline __inline
#else
  #include <inttypes.h>
  #if defined(__GNUC__)
    #define inline __inline__
  #endif
#endif

#ifndef PRIu64
  #if defined(_MSC_VER) || defined(__BORLANDC__)
    #define PRIu64 "I64u"
    #define PRIx64 "I64x"
  #else
    #define PRIu64 "llu"
    #define PRIx64 "llx"
  #endif
#endif

#if defined(__GNUC__)
#define ALWAYSINLINE __attribute__((always_inline))
#else
#define ALWAYSINLINE
#endif

#if defined(_MSC_VER)
  #if _MSC_VER >= 1200
    #define PRE_ALWAYS __forceinline
  #else
    #define PRE_ALWAYS inline
  #endif
#else
  #define PRE_ALWAYS inline
#endif



#endif


/** 
 * @file  SFMT.c
 * @brief SIMD oriented Fast Mersenne Twister(SFMT)
 *
 * @author Mutsuo Saito (Hiroshima University)
 * @author Makoto Matsumoto (Hiroshima University)
 *
 * Copyright (C) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
 * University. All rights reserved.
 *
 * The new BSD License is applied to this software, see LICENSE.txt
 */
#include <string.h>
#include <assert.h>

#ifndef SFMT_PARAMS_H
#define SFMT_PARAMS_H

/*-----------------
  BASIC DEFINITIONS
  -----------------*/
/** Mersenne Exponent. The period of the sequence 
 *  is a multiple of 2^MEXP-1.
 * #define MEXP 19937 */
/** SFMT generator has an internal state array of 128-bit integers,
 * and iN is its size. */
#define iN (MEXP / 128 + 1)
/** N32 is the size of internal state array when regarded as an array
 * of 32-bit integers.*/
#define N32 (iN * 4)
/** N64 is the size of internal state array when regarded as an array
 * of 64-bit integers.*/
#define N64 (iN * 2)

/*----------------------
  the parameters of SFMT
  following definitions are in paramsXXXX.h file.
  ----------------------*/
/** the pick up position of the array.Fwd: Renting the Harvard Cabin
#define POS1 122 
*/

/** the parameter of shift left as four 32-bit registers.
#define SL1 18
 */

/** the parameter of shift left as one 128-bit register. 
 * The 128-bit integer is shifted by (SL2 * 8) bits. 
#define SL2 1 
*/

/** the parameter of shift right as four 32-bit registers.
#define SR1 11
*/

/** the parameter of shift right as one 128-bit register. 
 * The 128-bit integer is shifted by (SL2 * 8) bits. 
#define SR2 1 
*/

/** A bitmask, used in the recursion.  These parameters are introduced
 * to break symmetry of SIMD.
#define MSK1 0xdfffffefU
#define MSK2 0xddfecb7fU
#define MSK3 0xbffaffffU
#define MSK4 0xbffffff6U 
*/

/** These definitions are part of a 128-bit period certification vector.
#define PARITY1	0x00000001U
#define PARITY2	0x00000000U
#define PARITY3	0x00000000U
#define PARITY4	0xc98e126aU
*/

#ifndef SFMT_PARAMS19937_H
#define SFMT_PARAMS19937_H

#define POS1	122
#define SL1	18
#define SL2	1
#define SR1	11
#define SR2	1
#define MSK1	0xdfffffefU
#define MSK2	0xddfecb7fU
#define MSK3	0xbffaffffU
#define MSK4	0xbffffff6U
#define PARITY1	0x00000001U
#define PARITY2	0x00000000U
#define PARITY3	0x00000000U
#define PARITY4	0x13c9e684U


/* PARAMETERS FOR ALTIVEC */
#if defined(__APPLE__)	/* For OSX */
    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
    #define ALTI_MSK64 \
	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
    #define ALTI_SL2_PERM \
	(vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8)
    #define ALTI_SL2_PERM64 \
	(vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0)
    #define ALTI_SR2_PERM \
	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
    #define ALTI_SR2_PERM64 \
	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
#else	/* For OTHER OSs(Linux?) */
    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
    #define ALTI_SL2_PERM	{1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8}
    #define ALTI_SL2_PERM64	{1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0}
    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
#endif	/* For OSX */
#define IDSTR	"SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6"

#endif /* SFMT_PARAMS19937_H */
#endif /* SFMT_PARAMS_H */

/*------------------------------------------------------
  128-bit SIMD data type for SSE2 or standard C
  ------------------------------------------------------*/
#if defined(__SSE2__)
  #include <emmintrin.h>

/** 128-bit data structure */
union W128_T {
    __m128i si;
    uint32_t u[4];
    double d[2];
};
/** 128-bit data type */
typedef union W128_T w128_t;

#else

/** 128-bit data structure */
struct W128_T {
    uint32_t u[4];
};
/** 128-bit data type */
typedef struct W128_T w128_t;

#endif

/* Container for random digits */

typedef union RAND64_t {
	uint8_t  s[8];
	uint32_t i[2];
	double d;
	float f[2];
	uint64_t l;
	signed long sl;
} rand64_t;

typedef union dW128_T {
    __m128i si;
    __m128d sd;
    uint64_t l[2];
    uint32_t i[4];
    double d[2];
} dw128_t;

/*--------------------------------------
  FILE GLOBAL VARIABLES
  internal state, index counter and flag 
  --------------------------------------*/
/** the 128-bit internal state array */
static w128_t sfmt[iN];
/** the 32bit integer pointer to the 128-bit internal state array */
static uint32_t *psfmt32 = &sfmt[0].u[0];
/** index counter to the 32-bit internal state array */
static int idx;
/** a flag: it is 0 if and only if the internal state is not yet
 * initialized. */
/** a parity check vector which certificate the period of 2^{MEXP} */
static uint32_t parity[4] = {PARITY1, PARITY2, PARITY3, PARITY4};

/*----------------
  STATIC FUNCTIONS
  ----------------*/
static void gen_rand_array(w128_t *array, int size);
inline static uint32_t func1(uint32_t x);
inline static uint32_t func2(uint32_t x);
static void period_certification(void);

static void mt_init(void);
static inline dw128_t wide_uniform(void);
static inline double uniform_double_PRN(void);
static inline unsigned long rand_long(unsigned long n);
static inline unsigned long rand_long64(void); 
void _report_PRN_total(void);
#if defined(__SSE2__)
/** 
 * @file  SFMT-sse2.h
 * @brief SIMD oriented Fast Mersenne Twister(SFMT) for Intel SSE2
 *
 * @author Mutsuo Saito (Hiroshima University)Fwd: Renting the Harvard Cabin
 * @author Makoto Matsumoto (Hiroshima University)
 *
 * @note We assume LITTLE ENDIAN in this file
 *
 * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
 * University. All rights reserved.
 *
 * The new BSD License is applied to this software, see LICENSE.txt
 */

#ifndef SFMT_SSE2_H
#define SFMT_SSE2_H



PRE_ALWAYS static __m128i mm_recursion(__m128i *a, __m128i *b, __m128i c,
				   __m128i d, __m128i mask) ALWAYSINLINE;

/**
 * This function represents the recursion formula.
 * @param a a 128-bit part of the interal state array
 * @param b a 128-bit part of the interal state array
 * @param c a 128-bit part of the interal state array
 * @param d a 128-bit part of the interal state array
 * @param mask 128-bit mask
 * @return output
 */
PRE_ALWAYS static __m128i mm_recursion(__m128i *a, __m128i *b, 
				   __m128i c, __m128i d, __m128i mask) {
    __m128i v, x, y, z;
    
    x = _mm_load_si128(a);
    y = _mm_srli_epi32(*b, SR1);
    z = _mm_srli_si128(c, SR2);
    v = _mm_slli_epi32(d, SL1);
    z = _mm_xor_si128(z, x);
    z = _mm_xor_si128(z, v);
    x = _mm_slli_si128(x, SL2);
    y = _mm_and_si128(y, mask);
    z = _mm_xor_si128(z, x);
    z = _mm_xor_si128(z, y);
    return z;
}

/**
 * This function fills the user-specified array with pseudorandom
 * integers.
 *
 * @param array an 128-bit array to be filled by pseudorandom numbers.  
 * @param size number of 128-bit pesudorandom numbers to be generated.
 */
static void gen_rand_array(w128_t *array, int size) {
    int i, j;
    __m128i r, r1, r2, mask;
    mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);

    r1 = _mm_load_si128(&sfmt[iN - 2].si);
    r2 = _mm_load_si128(&sfmt[iN - 1].si);
    for (i = 0; i < iN - POS1; i++) {
	r = mm_recursion(&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask);
	_mm_store_si128(&array[i].si, r);
	r1 = r2;
	r2 = r;
    }
    for (; i < iN; i++) {
	r = mm_recursion(&sfmt[i].si, &array[i + POS1 - iN].si, r1, r2, mask);
	_mm_store_si128(&array[i].si, r);
	r1 = r2;
	r2 = r;
    }
    /* main loop */
    for (; i < size - iN; i++) {
	r = mm_recursion(&array[i - iN].si, &array[i + POS1 - iN].si, r1, r2,
			 mask);
	_mm_store_si128(&array[i].si, r);
	r1 = r2;
	r2 = r;
    }
    for (j = 0; j < 2 * iN - size; j++) {
	r = _mm_load_si128(&array[j + size - iN].si);
	_mm_store_si128(&sfmt[j].si, r);
    }
    for (; i < size; i++) {
	r = mm_recursion(&array[i - iN].si, &array[i + POS1 - iN].si, r1, r2,
			 mask);
	_mm_store_si128(&array[i].si, r);
	_mm_store_si128(&sfmt[j++].si, r);
	r1 = r2;
	r2 = r;
    }
}

#endif

#endif


/**
 * This function represents a function used in the initialization
 * by init_by_array
 * @param x 32-bit integer
 * @return 32-bit integer
 */
static uint32_t func1(uint32_t x) {
    return (x ^ (x >> 27)) * (uint32_t)1664525UL;
}

/**
 * This function represents a function used in the initialization
 * by init_by_array
 * @param x 32-bit integer
 * @return 32-bit integer
 */
static uint32_t func2(uint32_t x) {
    return (x ^ (x >> 27)) * (uint32_t)1566083941UL;
}

/**
 * This function certificate the period of 2^{MEXP}
 */
static void period_certification(void) {
    int inner = 0;
    int i, j;
    uint32_t work;union dW128_T {
    __m128i si;
    __m128d sd;
    uint64_t u[2];
    uint32_t u32[4];
    double d[2];
};


    for (i = 0; i < 4; i++)
	inner ^= psfmt32[i] & parity[i];
    for (i = 16; i > 0; i >>= 1)
	inner ^= inner >> i;
    inner &= 1;
    /* check OK */
    if (inner == 1) {
	return;
    }
    /* check NG, and modification */
    for (i = 0; i < 4; i++) {
	work = 1;
};

	for (j = 0; j < 32; j++) {
	    if ((work & parity[i]) != 0) {
		psfmt32[i] ^= work;
		return;
	    }
	    work = work << 1;
	}
}

/*----------------
  PUBLIC FUNCTIONS
  ----------------*/

/**
 * This function initializes the internal state array,
 * with an array of 32-bit integers used as the seeds
 * @param init_key the array of 32-bit integers, used as a seed.
 * @param key_length the length of init_key.
 */
#define __EXP_SET__		0x3ff0000000000000

static w128_t iRandS[__cycle__], *iRend = &iRandS[__cycle__-1]; 
rand64_t *Rand;

static __m128d sse2_double_m_one;
static __m128i sse2_int_set;

#ifdef REPORT_PRNS
static long __n_cycles__ = 0;

static void _report_PRN_total(void) {
    printf("Used ~%ld 64-bit uniform PRNs.\n", 2*__cycle__*__n_cycles__ + Rand - (rand64_t *)iRandS); /* __cycle__ is in dimensions of 128-bit SIMD */    
}
#endif 

static void mt_init(void) {
        /* Avoid initializing twice */
    static int old = 0;
    if (old==1) return;
    old = 1;
        /* Use Process ID, Parent Process ID, and current time to seed the PRNG */
#ifdef _WIN32
    int init_key[] = {(int)getpid(), (int)time(NULL)}, key_length = 2;
#else
    int init_key[] = {(int)getpid(), (int)time(NULL), (int)getppid()}, key_length = 3;
#endif
        /* See http://www.math.sci.hiroshima-u.ac.jp/~%20m-mat/MT/SFMT/index.html 
         * for the remainder. */
#ifdef REPORT_PRNS
    atexit(_report_PRN_total);
#endif
    sse2_double_m_one = _mm_set_pd(-1.0, -1.0);
    sse2_int_set = _mm_set_epi64((__m64)__EXP_SET__, (__m64)__EXP_SET__);
    
    int i, j, count;
    uint32_t r;
    int lag;
    int mid;
    int size = iN * 4;
    if (size >= 623) {
	  lag = 11;
    } else if (size >= 68) {
	  lag = 7;
    } else if (size >= 39) {
	  lag = 5;
    } else {
	  lag = 3;
    }
    mid = (size - lag) / 2;

    memset(sfmt, 0x8b, sizeof(sfmt));
    if (key_length + 1 > N32) {
	count = key_length + 1;
    } else {
	count = N32;
    }
    r = func1(psfmt32[0] ^ psfmt32[mid] 
	      ^ psfmt32[N32 - 1]);
    psfmt32[mid] += r;

    r += key_length;
    psfmt32[mid + lag] += r;
    psfmt32[0] = r;

    count--;
    for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
	r = func1(psfmt32[i] ^ psfmt32[(i + mid) % N32] 
		  ^ psfmt32[(i + N32 - 1) % N32]);
	psfmt32[(i + mid) % N32] += r;
	r += init_key[j] + i;
	psfmt32[(i + mid + lag) % N32] += r;
	psfmt32[i] = r;
	i = (i + 1) % N32;
    }
    for (; j < count; j++) {
	r = func1(psfmt32[i] ^ psfmt32[(i + mid) % N32] 
		  ^ psfmt32[(i + N32 - 1) % N32]);
	psfmt32[(i + mid) % N32] += r;
	r += i;
	psfmt32[(i + mid + lag) % N32] += r;
	psfmt32[i] = r;
	i = (i + 1) % N32;
    }
    for (j = 0; j < N32; j++) {
	r = func2(psfmt32[i] + psfmt32[(i + mid) % N32] 
		  + psfmt32[(i + N32 - 1) % N32]);
	psfmt32[(i + mid) % N32] ^= r;
	r -= i;
	psfmt32[(i + mid + lag) % N32] ^= r;
	sse2_double_m_one = _mm_set_pd(-1.0, -1.0);
	psfmt32[i] = r;
	i = (i + 1) % N32;
    }

    idx = N32;
    period_certification();
	gen_rand_array(iRandS,__cycle__); 
	Rand = (rand64_t *)iRandS;
}

#ifdef REPORT_PRNS
#define INCREMENT_N_CYCLES() (__n_cycles__++);
#else
#define INCREMENT_N_CYCLES() ;
#endif

#define MT_FLUSH() { if (Rand > (rand64_t *)iRend) { \
gen_rand_array(iRandS,__cycle__); \
Rand = (rand64_t *) iRandS; \
INCREMENT_N_CYCLES() \
}; } 

static inline dw128_t wide_uniform(void) {
  MT_FLUSH();
  dw128_t W;
  W.si = _mm_set_epi64x(Rand[0].l, Rand[1].l);	
  Rand+=2;
  W.si = _mm_or_si128(_mm_srli_epi64(W.si, 2), sse2_int_set); 
  W.sd = _mm_add_pd(W.sd, sse2_double_m_one);
  return W;
}

static inline double uniform_double_PRN(void) {
  MT_FLUSH();
  Rand->l = (Rand->l >> 2) | __EXP_SET__;
  return Rand++->d - 1;
}

static inline unsigned long rand_long(unsigned long n){
  MT_FLUSH();
  return Rand++->l % n;
}

static inline unsigned long rand_long64(void){
  MT_FLUSH();
  return Rand++->l;
}

back to top