©2023 Raazesh Sainudiin, Benny Avelin. Attribution 4.0 International (CC BY 4.0)
from Utils import showURL
showURL("https://en.wikipedia.org/wiki/Continuous_uniform_distribution")
from sympy import var, integrate
a = var('a')
b = var('b')
x = var('x')
f=1/(b-a)
F_prim = integrate(f,x)
# We need that F(a) = 0
# F(a) = F_prim(a) + C
# C = -F_prim(a)
F = F_prim - a/(b-a)
F
We basically start by solving for $y$ $$ F(x) = y $$ This is solvable because F is strictly increasing at least in the interval $x \in (a,b)$.
import numpy as np
y = np.random.uniform(0,1,10000)
x = y*(20-10)+10
import matplotlib.pyplot as plt
_=plt.hist(x)
Inversion sampling starts with a distribution function $F$. We want to simulate from $F$. We first compute $F^{-1}(y)$ and then we let $X$ be a uniform $(0,1)$ random variable and then we define $$ Y = F^{-1}(X) $$ then $Y$ has distribution function $F$.
Rename $F(y) = x$ then $$ P(X \leq x) = F_X(x) = F_X(F(y)) $$
Since $X$Â is uniform $(0,1)$ we have that $F_X(x) = x$ if $x \in (0,1)$
Conclusion is that $F_Y(y) = F(y)$.
showURL("https://en.wikipedia.org/wiki/Exponential_distribution")
lam = var('lambda')
x = var('x')
from sympy import exp
f = lam*exp(-lam*x)
f
F_prim = integrate(f,x)
F_prim
F = F_prim+1
F
y = np.random.uniform(0,1,10000)
x = np.log(1-y)/(-1)
_=plt.hist(x,bins=100,density=True)
x_plot = np.linspace(0,9,100)
plt.plot(x_plot,np.exp(-x_plot))
%%bash
ls data
%%bash
head -n 100 data/co2_mm_mlo.txt
with open("data/co2_mm_mlo.txt",mode="r") as f:
current_line = f.readline()
while (current_line[0] == '#'):
current_line = f.readline()
print(current_line)
import csv
data_raw = []
with open("data/co2_mm_mlo.txt",mode="r") as f:
current_pos = f.tell()
current_line = f.readline()
while (current_line[0] == '#'):
current_pos = f.tell()
current_line = f.readline()
f.seek(current_pos)
csv_reader = csv.reader(f,delimiter=' ',skipinitialspace=True)
for line in csv_reader:
data_raw.append(line)
data_raw[0]
len(data_raw)
schema = [int, int, float, float, float, float, int]
type(int("123"))
data_parsed = [[typ(item) for item,typ in zip(line,schema)] for line in data_raw]
data_parsed[0]
data_array = np.array(data_parsed)
data_array.shape
data_array[0,0]
# Basic stats
from Utils import basic_stats
basic_stats(data_array[:,4])
data_array[0:10,4]
x = np.random.normal(0,27.53,10000)
basic_stats(x)
_=plt.hist(data_array[:,4],bins=50)
# EDF
from Utils import makeEDF, plotEDF
plotEDF(makeEDF(data_array[:,4]))