from Utils import linConGen
import numpy as np
import matplotlib.pyplot as plt
from Utils import discrete_histogram

m,a,b = (2**32, 1103515245,12345)
seed = 1
np.array(linConGen(m,a,b,seed,10))/m

array([2.32830644e-10, 2.56935039e-01, 5.87870652e-01, 1.54325758e-01,
       7.67266943e-01, 9.73813963e-01, 5.85868151e-01, 8.51115584e-01,
       6.13215341e-01, 7.47386723e-01])

def random():
    """Generates one random sample from the uniform [0,1] distribution"""
    global seed
    seed = linConGen(m,a,b,seed,2)[1]
    return seed/m

unif_x = [random() for i in range(1000)]

_=plt.hist(unif_x,bins=10)

def unif_to_bernoulli(x,p):
    from math import floor
    return floor(x+p)

bernoulli_x = [unif_to_bernoulli(random(),0.9) for i in range(1000)]

discrete_histogram(bernoulli_x,normed=True)

import numpy as np

p = np.array([0.1,0.2,0.5,0.2])

p_cumsum = np.cumsum(p)

p_cumsum

array([0.1, 0.3, 0.8, 1. ])

uniform_x = np.array([random() for i in range(10000)])
#np.argmax(uniform_x-p_cumsum,)

xp = (uniform_x.reshape(-1,1)-p_cumsum.reshape(1,-1)) # (10000,1), (1,4) -> (10000,4)
xp.shape

(10000, 4)

xp <= 0
x - p1
x - p1 - p2
x - p1 - p2 -p3
...
(0,p1),(p1,p1+p2),...,(p1+p2+p3,1]
0, 1, ..., 3

from Utils import discrete_histogram
discrete_histogram(np.argmax(xp <= 0,axis=1),normed=True)

def randint(b):
    """Producing random integers between 0 and b inclusive"""
    u = random()
    from math import floor
    return floor(u*(b+1))

from Utils import plotEDF,makeEDF
random_samples_int = [randint(10) for i in range(10000)]
discrete_histogram(random_samples_int)
plotEDF(makeEDF(random_samples_int))

arr = np.arange(0,100)
n = len(arr)
for i in range(n-1,0,-1):
    j = randint(i) # Random integer between 0 and i inclusive
    tmp = arr[j]
    arr[j] = arr[i]
    arr[i] = tmp

arr

array([68, 42, 71, 11, 65, 75, 74, 85, 60, 91, 80, 56, 87, 30, 35, 89, 93,
       58, 17,  4, 13, 16, 39, 57,  2, 36, 37, 96, 19,  0, 62, 29,  3,  6,
       64, 66, 88, 77, 38, 24, 83, 90, 20, 81, 10, 84, 61, 86, 63, 95, 45,
       41, 98, 26, 44, 54, 22, 82, 14, 49, 76, 23, 21, 79, 51, 32, 28,  7,
       46, 97, 53, 43, 18,  1, 55,  9, 78, 48, 52, 59, 12,  8, 50, 31, 73,
       33, 70, 25,  5, 34, 92, 72, 94, 99, 67, 15, 27, 47, 69, 40])

def random_shuffle(arr):
    """Shuffles an array in place"""
    n = len(arr)
    for i in range(n-1,0,-1):
        j = randint(i) # Random integer between 0 and i inclusive
        tmp = arr[j]
        arr[j] = arr[i]
        arr[i] = tmp
    return arr

aa = np.arange(10)
random_shuffle(aa)
aa

array([3, 7, 4, 9, 5, 6, 8, 1, 2, 0])

from sklearn.datasets import make_classification
X,y = make_classification()

index = np.arange(0,len(y))
random_shuffle(index)
X_shuffle = X[index,:]
y_shuffle = y[index]

from math import floor
X = np.array([floor(random()+0.5) for i in range(100)])
Y = np.array([floor(random()+0.2) for i in range(100)])
XY = np.concatenate([X,Y])

def compute_diff(arr):
    return np.abs(np.mean(arr[:100])-np.mean(arr[100:]))

compute_diff(XY)

0.35

differences = np.array([compute_diff(random_shuffle(XY)) for i in range(10000)])

from Utils import plotEDF,makeEDF
plotEDF(makeEDF(differences))

uniform_sequence = np.array([random() for i in range(10000)])
re_unif = uniform_sequence.reshape(-1,2)
Z1 = np.sqrt(-2*np.log(re_unif[:,0]))*np.cos(2*np.pi*re_unif[:,1])
Z2 = np.sqrt(-2*np.log(re_unif[:,0]))*np.sin(2*np.pi*re_unif[:,1])
Z = np.column_stack([Z1,Z2])

import matplotlib.pyplot as plt
plt.scatter(Z1,Z2,alpha=0.1)

<matplotlib.collections.PathCollection at 0x15f458550>

from Utils import makeEDF,plotEDF
from scipy.stats import norm
x_plot = np.linspace(-3,3)
plotEDF(makeEDF(Z.flatten()),force_display=False)
plt.plot(x_plot,norm.cdf(x_plot),color='red')

[<matplotlib.lines.Line2D at 0x161e98dc0>]

_=plt.hist(Z.flatten())

samples = []
rejections = 0
while (len(samples) < 100000):
    # We know that the ratio is 1 when x is in the circle and 0 if it is outside
    (X1,X2) = (2*random()-1,2*random()-1)
    if (np.linalg.norm([X1,X2]) <= 1): # We are inside the circle
        samples.append((X1,X2))
    else:
        rejections+=1

100000/(100000+rejections)

0.7847137756503315

np.pi/4

0.7853981633974483

samp_arr = np.array(samples)

plt.scatter(samp_arr[:,0],samp_arr[:,1])

<matplotlib.collections.PathCollection at 0x10ef84460>

_=plt.hist2d(samp_arr[:,0],samp_arr[:,1])

f = np.array([0.1,0.2,0.5,0.2])
g = np.array([1/4,1/4,1/4,1/4])
M = 2
f-M*g

array([-0.275, -0.175,  0.125, -0.175])

samples = []

r = f/(M*g)
rejections = 0
while (len(samples) < 10000):
    x = randint(3)
    r[x]
    U = random()
    if (U <= r[x]):
        samples.append(x)
    else:
        rejections+=1

rejections/(10000+rejections)

0.4117647058823529

discrete_histogram(samples,normed=True)

def sample_poisson():
    from scipy.special import factorial
    Y = random()
    l = 2

    F = 0
    for i in range(1000):
        F = F + np.exp(-l)*np.power(l,i)/factorial(i)
        if (F > Y):
            break
    return i

poisson_samples = [sample_poisson() for i in range(1000)]
f = []
for i in range(13):
    f.append(np.exp(-l)*np.power(l,i)/factorial(i))
F = np.cumsum(f)
plt.plot(F,color='black')
plotEDF(makeEDF(poisson_samples))
discrete_histogram(poisson_samples)

Introduction to Data Science ¶

1MS041, 2024¶

Generating random variables in different forms¶

Generating Bernoulli rv.s¶

Generating integers with different probabilites¶

Shuffling an array¶

Permutation testing¶

Box Muller¶

Rejection sampling¶

Sampling from the Poisson¶

Introduction to Data Science¶

1MS041, 2024¶

Generating random variables in different forms¶

Generating Bernoulli rv.s¶

Generating integers with different probabilites¶

Shuffling an array¶

Permutation testing¶

Box Muller¶

Rejection sampling¶

Sampling from the Poisson¶

Introduction to Data Science ¶