Introduction to Data Science¶
1MS041, 2024¶
©2024 Raazesh Sainudiin, Benny Avelin. Attribution 4.0 International (CC BY 4.0)
from Utils import plotEMF
p = 0.1
plotEMF([(0,1-p),(1,p)])
import numpy as np
np.random.randint(0,2,size=10)
array([0, 1, 0, 0, 1, 0, 1, 1, 0, 1])
from Utils import plotEDF,emfToEdf
plotEDF(emfToEdf([(0,0),(0,1-p),(1,p)]))
Binomial random variable¶
If we do $n$ trials with success probability $p$, then the binomial random variable is the number of successes. The PMF is $$ f(x) = {n \choose x} p^x (1-p)^{n-x} $$ Can only produce numbers $0,1,\ldots,n$.
from scipy.special import binom as binomial
n = 20
p = 0.5
plotEMF([(i,binomial(n,i)*(p**i)*((1-p)**(n-i))) for i in range(n)])
np.random.binomial(20,0.5,size=10)
plotEDF(emfToEdf([(i,binomial(n,i)*(p**i)*((1-p)**(n-i))) for i in range(n)]))
Poisson random variable¶
Pois($\lambda$) where $\lambda \in (0,\infty)$ is called the rate $$ f(x) = \frac{\lambda^x e^{-\lambda}}{x!} $$
from scipy.special import factorial
from math import exp
l = 2
plotEMF([(i,l**i*exp(-l)/factorial(i)) for i in range(10)])
np.random.poisson(2,size=10)
plotEDF(emfToEdf([(i,l**i*exp(-l)/factorial(i)) for i in range(10)]))
Empirical means¶
from random import randint
def X():
"""Produces a single random number from DeMoivre(1/3,1/3,1/3)"""
return randint(0,2)
def empirical_mean(n=1):
"""Produces the empirical mean of n experiments of the X above"""
Z = [X() for i in range(n)]
return sum(Z)/n
# Run this to get an observation of X and rerun for another
X
<function __main__.X()>
# Run this to get an observation of the empirical mean of X
# when doing 10 experiments
empirical_mean(10)
1.3
Common continuous random variables¶
The uniform [0,1] random variable¶
In this case we have
$$ f(x) = \begin{cases} 1 & \text{if } 0 \leq x \leq 1 \\ 0 & \text{otherwise} \end{cases} $$
Also, for $x \in [0,1]$ we have
$$ F(x) = \int_{-\infty}^x f(v) dv = \int_0^x dv = x $$
import numpy as np
np.array([1,2,3])
array([1, 2, 3])
import matplotlib.pyplot as plt
x = np.random.uniform(0,1,size=100)
_=plt.hist(x,density=False,bins=3)
from Utils import makeEDF,makeEMF
plotEMF(makeEMF(np.random.uniform(size=100)))
import numpy as np
from Utils import makeEDF,makeEMF,plotEDF
plotEDF(makeEDF(np.random.uniform(size=100)))
The Gaussian random variable (Normal)¶
In this case we have $$ f(x) = \frac{1}{\sigma \sqrt{2\pi}} e^{-\frac{1}{2} \left ( \frac{x-\mu}{\sigma}\right )} $$ here we have two parameters, the mean $\mu$ and the standard deviation $\sigma$.
np.random.normal(size=10)
array([ 1.01386181, -0.56043332, 0.49921684, 0.88863697, -1.64044673,
0.37383609, -2.41185133, 1.21347747, 0.21825103, 0.97901204])
_=plt.hist(np.random.normal(size=100000),bins=200)