xoshiro / xoroshiro

Collection of psuedo Random Number Generators

Versions

64 bit

64 bit generator with 256 bits of state. It passes all statistical tests (which authors are aware of)

xoshiro256++

3 dimensionally equidistributed. Easily parallelized with Intels extended AVX2 instruction set, reducting time to 0.30ns for eight instances.

xoshiro256**

4 dimensionally equidistributed byte has lower linear complexity

xoshiro256+

For when the interest is in only generating floating point numbers. 3 dimensionally equidistributed, with lowest bits having low linear complexity. However since one needs just the upper 53 bits, the resulting floating point values have no linear bias. Can be parallelized bringing down time from 0.78ns to 0.22ns.

32 bit

These are generators for where space is an issue, and provides similar timings and properties to the 64 bit versions.

xoroshiro128++

xoroshiro128**

xoroshiro128+

xoroshiro256** C implementation

Simple implementation which uses splitmix64 algorithm to seed xoroshiro256**. Need to do some testing to verify the robustness of this implementation use with caution.

#include <stdint.h>

typedef struct _rng
{
  uint64_t  seed;
  uint64_t* state;
  uint64_t (*next)(void);
  double (*nextf64)(void);
} rng_t;

//------------------------------------------------------------------------------
// SplittableRandom generator

/*
 * fixed-increment version of Java 8's SplittableRandom generator
 *    - http://dx.doi.org/10.1145/2714064.2660195
 *    - http://docs.oracle.com/javase/8/docs/api/java/util/SplittableRandom.html
 *    Useful when 64 bits of state. Primarily used for /seeding/ other prng
 *    algorithms.
 */

static uint64_t split_random(uint64_t* x)
{
  uint64_t z = ((*x) += 0x9e3779b97f4a7c15);
  z          = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
  z          = (z ^ (z >> 27)) * 0x94d049bb133111eb;
  return z ^ (z >> 31);
}

// rotate bits
static inline uint64_t rotl(const uint64_t x, int k)
{
  return (x << k) | (x >> (64 - k));
}

//------------------------------------------------------------------------------
// xoshiro256**

static uint64_t xo256_state[4];

static uint64_t xo256_next(void)
{
  const uint64_t result = rotl(xo256_state[1] * 5, 7) * 9;

  const uint64_t t = xo256_state[1] << 17;

  xo256_state[2] ^= xo256_state[0];
  xo256_state[3] ^= xo256_state[1];
  xo256_state[1] ^= xo256_state[2];
  xo256_state[0] ^= xo256_state[3];

  xo256_state[2] ^= t;

  xo256_state[3] = rotl(xo256_state[3], 45);

  return result;
}

static double xo256_get_double(void)
{
  double r = (xo256_next() >> 11) * 0x1.0p-53;
  return r;
}

/*
 * equivalent to 2^128 calls to next(); it can be used to generate 2^128
 * non-overlapping subsequences for parallel computations.
 */

static void xo256_jump(void)
{
  static const uint64_t JUMP[] = {0x180ec6d33cfd0aba, 0xd5a61266f0c9392c,
                                  0xa9582618e03fc9aa, 0x39abdc4529b1661c};

  uint64_t s0 = 0;
  uint64_t s1 = 0;
  uint64_t s2 = 0;
  uint64_t s3 = 0;
  for (unsigned int i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
    for (unsigned int b = 0; b < 64; b++) {
      if (JUMP[i] & UINT64_C(1) << b) {
        s0 ^= xo256_state[0];
        s1 ^= xo256_state[1];
        s2 ^= xo256_state[2];
        s3 ^= xo256_state[3];
      }
      xo256_next();
    }

  xo256_state[0] = s0;
  xo256_state[1] = s1;
  xo256_state[2] = s2;
  xo256_state[3] = s3;
}

/*
 * equivalent to 2^192 calls to next(); it can be used to generate 2^64 starting
 * points, from each of which jump() will generate 2^64 non-overlapping
 * subsequences for parallel distributed computations.
 */

static void xo256_long_jump(void)
{
  static const uint64_t LONG_JUMP[] = {0x76e15d3efefdcbbf, 0xc5004e441c522fb3,
                                       0x77710069854ee241, 0x39109bb02acbe635};

  uint64_t s0 = 0;
  uint64_t s1 = 0;
  uint64_t s2 = 0;
  uint64_t s3 = 0;
  for (unsigned int i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++)
    for (unsigned int b = 0; b < 64; b++) {
      if (LONG_JUMP[i] & UINT64_C(1) << b) {
        s0 ^= xo256_state[0];
        s1 ^= xo256_state[1];
        s2 ^= xo256_state[2];
        s3 ^= xo256_state[3];
      }
      xo256_next();
    }

  xo256_state[0] = s0;
  xo256_state[1] = s1;
  xo256_state[2] = s2;
  xo256_state[3] = s3;
}

// State must be seeded so it is /not/ 0 everywhere. Seed with splitmix64.
static void rng_xoshiro256_seed(rng_t* r, uint64_t seed)
{
  uint64_t x = seed;
  r->seed    = seed;
  r->state   = xo256_state;

  r->state[0] = split_random(&x);
  r->state[1] = split_random(&x);
  r->state[2] = split_random(&x);
  r->state[3] = split_random(&x);

  r->next    = xo256_next;
  r->nextf64 = xo256_get_double;
}

#define rng_xoshiro256_init(rng) rng_xoshiro256_seed(rng, 0)

References:


No notes link to this note