def likelihoodBernoulli(theta, n, tStatistic):
    '''Bernoulli likelihood function.
    theta in [0,1] is the theta to evaluate the likelihood at.
    n is the number of observations.
    tStatistic is the sum of the n Bernoulli observations.
    return a value for the likelihood of theta given the n observations and tStatistic.'''
    retValue = 0 # default return value
    if (theta >= 0 and theta <= 1): # check on theta
        mpfrTheta = RR(theta) # make sure we use a Sage mpfr 
        retValue = (mpfrTheta^tStatistic)*(1-mpfrTheta)^(n-tStatistic)
    return retValue
    
def bernoulliFInverse(u, theta):
    '''A function to evaluate the inverse CDF of a bernoulli.
    
    Param u is the value to evaluate the inverse CDF at.
    Param theta is the distribution parameters.
    Returns inverse CDF under theta evaluated at u'''
    
    return floor(u + theta)
    
def bernoulliSample(n, theta, simSeed=None):
    '''A function to simulate samples from a bernoulli distribution.
    
    Param n is the number of samples to simulate.
    Param theta is the bernoulli distribution parameter.
    Param simSeed is a seed for the random number generator, defaulting to 30.
    Returns a simulated Bernoulli sample as a list.'''
    
    set_random_seed(simSeed)
    us = [random() for i in range(n)]
    set_random_seed(None)
    return [bernoulliFInverse(u, theta) for u in us] # use bernoulliFInverse in a list comprehension
    
def bernoulliSampleSecretTheta(n, theta=0.30, simSeed=30):
    '''A function to simulate samples from a bernoulli distribution.
    
    Param n is the number of samples to simulate.
    Param theta is the bernoulli distribution parameter.
    Param simSeed is a seed for the random number generator, defaulting to 30.
    Returns a simulated Bernoulli sample as a list.'''
    
    set_random_seed(simSeed)
    us = [random() for i in range(n)]
    set_random_seed(None)
    return [bernoulliFInverse(u, theta) for u in us] # use bernoulliFInverse in a list comprehension

def bernoulliRunningMeans(n, myTheta, mySeed = None):
    '''Function to give a list of n running means from bernoulli with specified theta.
    
    Param n is the number of running means to generate.
    Param myTheta is the theta for the Bernoulli distribution
    Param mySeed is a value for the seed of the random number generator, defaulting to None.'''
     
    sample = bernoulliSample(n, theta=myTheta, simSeed = mySeed)
    from pylab import cumsum # we can import in the middle of code
    csSample = list(cumsum(sample))
    samplesizes = range(1, n+1,1)
    return [RR(csSample[i])/samplesizes[i] for i in range(n)]
    
#return a plot object for BernoulliLikelihood using the secret theta bernoulli generator
def plotBernoulliLikelihoodSecretTheta(n):
    '''Return a plot object for BernoulliLikelihood using the secret theta bernoulli generator.
    
    Param n is the number of simulated samples to generate and do likelihood plot for.'''
    
    thisBSample = bernoulliSampleSecretTheta(n) # make sample
    tn = sum(thisBSample) # summary statistic
    from pylab import arange
    ths = arange(0,1,0.01) # get some values to plot against
    liks = [likelihoodBernoulli(t,n,tn) for t in ths] # use the likelihood function to generate likelihoods
    redshade = 1*n/1000 # fancy colours
    blueshade = 1 - redshade
    return line(zip(ths, liks), rgbcolor = (redshade, 0, blueshade))
    
def cauchyFInverse(u):
    '''A function to evaluate the inverse CDF of a standard Cauchy distribution.
    
    Param u is the value to evaluate the inverse CDF at.'''
    
    return RR(tan(pi*(u-0.5)))
    
def cauchySample(n):
    '''A function to simulate samples from a standard Cauchy distribution.
    
    Param n is the number of samples to simulate.'''
    
    us = [random() for i in range(n)]
    return [cauchyFInverse(u) for u in us]

def cauchyRunningMeans(n):
    '''Function to give a list of n running means from standardCauchy.
    
    Param n is the number of running means to generate.'''
    
    sample = cauchySample(n)
    from pylab import cumsum
    csSample = list(cumsum(sample))
    samplesizes = range(1, n+1,1)
    return [RR(csSample[i])/samplesizes[i] for i in range(n)]

def paretoFInverse(u,a=1.5):
    '''A function to evaluate the inverse CDF of a Pareto type II (Lomax) distribution with parameter a and scale 1'''
    return RR((1-u)^(-1/a)-1)

def paretoSample(n,a=1.5):
    '''A function to simulate samples from a Pareto type II (Lomax) distribution with parameter a and scale 1.
    
    Param n is the number of samples to simulate.'''
    
    us = [random() for i in range(n)]
    return [paretoFInverse(u,a) for u in us]

def paretoRunningMeans(n,a=1.5):
    '''Function to give a list of n running means from a Pareto type II (Lomax) distribution with parameter a and scale 1.
    
    Param n is the number of running means to generate.'''
    
    sample = paretoSample(n,a)
    from pylab import cumsum
    csSample = list(cumsum(sample))
    samplesizes = range(1, n+1,1)
    return [RR(csSample[i])/samplesizes[i] for i in range(n)]

def twoRunningMeansPlot(nToPlot, iters):
    '''Function to return a graphics array containing plots of running means for Bernoulli and Standard Cauchy.
    
    Param nToPlot is the number of running means to simulate for each iteration.
    Param iters is the number of iterations or sequences of running means or lines on each plot to draw.
    Returns a graphics array object containing both plots with titles.'''
    xvalues = range(1, nToPlot+1,1)
    for i in range(iters):
        shade = 0.5*(iters - 1 - i)/iters # to get different colours for the lines
        bRunningMeans = bernoulliSecretThetaRunningMeans(nToPlot)
        cRunningMeans = cauchyRunningMeans(nToPlot)
        bPts = zip(xvalues, bRunningMeans)
        cPts = zip(xvalues, cRunningMeans)
        if (i < 1):
            p1 = line(bPts, rgbcolor = (shade, 0, 1))
            p2 = line(cPts, rgbcolor = (1-shade, 0, shade))
            cauchyTitleMax = max(cRunningMeans) # for placement of cauchy title
        else:
            p1 += line(bPts, rgbcolor = (shade, 0, 1))
            p2 += line(cPts, rgbcolor = (1-shade, 0, shade))
            if max(cRunningMeans) > cauchyTitleMax: cauchyTitleMax = max(cRunningMeans)
    titleText1 = "Bernoulli running means" # make title text
    t1 = text(titleText1, (nToGenerate/2,1), rgbcolor='blue',fontsize=10) 
    titleText2 = "Standard Cauchy running means" # make title text
    t2 = text(titleText2, (nToGenerate/2,ceil(cauchyTitleMax)+1), rgbcolor='red',fontsize=10)
    return graphics_array((p1+t1,p2+t2))

def pmfPointMassPlot(theta):
    '''Returns a pmf plot for a point mass function with parameter theta.'''
    
    ptsize = 10
    linethick = 2
    fudgefactor = 0.07 # to fudge the bottom line drawing
    pmf = points((theta,1), rgbcolor="blue", pointsize=ptsize)
    pmf += line([(theta,0),(theta,1)], rgbcolor="blue", linestyle=':')
    pmf += points((theta,0), rgbcolor = "white", faceted = true, pointsize=ptsize)
    pmf += line([(min(theta-2,-2),0),(theta-0.05,0)], rgbcolor="blue",thickness=linethick)
    pmf += line([(theta+.05,0),(theta+2,0)], rgbcolor="blue",thickness=linethick)
    pmf+= text("Point mass f", (theta,1.1), rgbcolor='blue',fontsize=10)
    pmf.axes_color('grey') 
    return pmf
    
def cdfPointMassPlot(theta):
    '''Returns a cdf plot for a point mass function with parameter theta.'''
    
    ptsize = 10
    linethick = 2
    fudgefactor = 0.07 # to fudge the bottom line drawing
    cdf = line([(min(theta-2,-2),0),(theta-0.05,0)], rgbcolor="blue",thickness=linethick) # padding
    cdf += points((theta,1), rgbcolor="blue", pointsize=ptsize)
    cdf += line([(theta,0),(theta,1)], rgbcolor="blue", linestyle=':')
    cdf += line([(theta,1),(theta+2,1)], rgbcolor="blue", thickness=linethick) # padding
    cdf += points((theta,0), rgbcolor = "white", faceted = true, pointsize=ptsize)
    cdf+= text("Point mass F", (theta,1.1), rgbcolor='blue',fontsize=10)
    cdf.axes_color('grey') 
    return cdf
    
def uniformFInverse(u, theta1, theta2):
    '''A function to evaluate the inverse CDF of a uniform(theta1, theta2) distribution.
    
    u, u should be 0 <= u <= 1, is the value to evaluate the inverse CDF at.
    theta1, theta2, theta2 > theta1, are the uniform distribution parameters.'''
    
    return theta1 + (theta2 - theta1)*u

def uniformSample(n, theta1, theta2):
    '''A function to simulate samples from a uniform distribution.
    
    n > 0 is the number of samples to simulate.
    theta1, theta2 (theta2 > theta1) are the uniform distribution parameters.'''
    
    us = [random() for i in range(n)]
    
    return [uniformFInverse(u, theta1, theta2) for u in us]

def exponentialFInverse(u, lam):
    '''A function to evaluate the inverse CDF of a exponential distribution.
    
    u is the value to evaluate the inverse CDF at.
    lam is the exponential distribution parameter.'''
    
    # log without a base is the natural logarithm
    return (-1.0/lam)*log(1 - u)
    
def exponentialSample(n, lam):
    '''A function to simulate samples from an exponential distribution.
    
    n is the number of samples to simulate.
    lam is the exponential distribution parameter.'''
    
    us = [random() for i in range(n)]
    
    return [exponentialFInverse(u, lam) for u in us]


def bernoulliSecretThetaRunningMeans(n, mySeed = None):
    '''Function to give a list of n running means from Bernoulli with unknown theta.
    
    Param n is the number of running means to generate.
    Param mySeed is a value for the seed of the random number generator, defaulting to None
    Note: the unknown theta parameter for the Bernoulli process is defined in bernoulliSampleSecretTheta
    Return a list of n running means.'''
    
    sample = bernoulliSampleSecretTheta(n, simSeed = mySeed)
    from pylab import cumsum # we can import in the middle of code
    csSample = list(cumsum(sample))
    samplesizes = range(1, n+1,1)
    return [RR(csSample[i])/samplesizes[i] for i in range(n)]


nToGenerate = 1500
iterations = 5
xvalues = range(1, nToGenerate+1,1)
for i in range(iterations):
    redshade = 0.5*(iterations - 1 - i)/iterations # to get different colours for the lines
    bRunningMeans = bernoulliSecretThetaRunningMeans(nToGenerate)
    pts = zip(xvalues,bRunningMeans)
    if (i == 0):
        p = line(pts, rgbcolor = (redshade,0,1))
    else:
        p += line(pts, rgbcolor = (redshade,0,1))
show(p, figsize=[5,3], axes_labels=['n','sample mean'])


nToGenerate = 15000
iterations = 5
g = twoRunningMeansPlot(nToGenerate, iterations) # uses above function to make plot
show(g,figsize=[10,5])


var('i, p')
f = 1/(i^p)
# make and show plot, note we can use f in the label
plot(f.subs(p=1), (x, 0.1, 3), axes_labels=('i',f)).show(figsize=[6,3])


var('i')
f = i^(1/i)
n=500
p=plot(f.subs(p=1), (x, 0, n), axes_labels=('i',f)) # main plot
p+=line([(0,1),(n,1)],linestyle=':') # add a dotted line at height 1
p.show(figsize=[6,3]) # show the plot

x

x


# x is defined as a symbolic variable by default by Sage so we do not need var('x')
f = (1+x)^(1/x)
# uncomment and try evaluating next line
#f.subs(x=0) # this will give you an error message


f = (1+x)^(1/x)
n1=5
p1=plot(f.subs(p=1), (x, 0.001, n1), axes_labels=('x',f)) # main plot
t1 = text("Large scale plot", (n1/2,e), rgbcolor='blue',fontsize=10) 
n2=0.1
p2=plot(f.subs(p=1), (x, 0.0000001, n2), axes_labels=('x',f)) # main plot
p2+=line([(0,e),(n2,e)],linestyle=':') # add a dotted line at height e
t2 = text("Small scale plot", (n2/2,e+.01), rgbcolor='blue',fontsize=10) 
show(graphics_array((p1+t1,p2+t2)),figsize=[6,3]) # show the plot


theta = 2.0
show(graphics_array((pmfPointMassPlot(theta),cdfPointMassPlot(theta))),\
     figsize=[8,2]) # show the plots


# mock up a picture of a sequence of point mass rvs converging on theta = 0
ptsize = 20
i = 1
theta_i = 1/i
p = points((theta_i,1), rgbcolor="blue", pointsize=ptsize)
p += line([(theta_i,0),(theta_i,1)], rgbcolor="blue", linestyle=':')
while theta_i > 0.01:
    i+=1
    theta_i = 1/i
    p += points((theta_i,1), rgbcolor="blue", pointsize=ptsize)
    p += line([(theta_i,0),(theta_i,1)], rgbcolor="blue", linestyle=':')
p += points((0,1), rgbcolor="red", pointsize=ptsize)
p += line([(0,0),(0,1)], rgbcolor="red", linestyle=':')
p.show(xmin=-1, xmax = 2, ymin=0, ymax = 1.1, axes=false, gridlines=[None,[0]], \
       figsize=[7,2])


@interact
def _(my_mu=input_box(0, label='mu') ,my_sigma=input_box(1,label='sigma')):
    '''Interactive function to plot the normal pdf and ecdf.'''
    
    if my_sigma > 0:
        html('<h4>Normal('+str(my_mu)+','+str(my_sigma)+'<sup>2</sup>)</h4>')
        var('mu sigma')
        f = (1/(sigma*sqrt(2.0*pi)))*exp(-1.0/(2*sigma^2)*(x - mu)^2)
        p1=plot(f.subs(mu=my_mu,sigma=my_sigma), \
                (x, my_mu - 3*my_sigma - 2, my_mu + 3*my_sigma + 2),\
                axes_labels=('x','f(x)'))
        show(p1,figsize=[8,3])
    else:
        print( "sigma must be greater than 0")


# mock up a picture of a sequence of converging normal distributions
my_mu = 0
upper = my_mu + 5; lower = -upper;     # limits for plot
var('mu sigma')
stop_i = 12
html('<h4>N(0,1) to N(0, 1/'+str(stop_i)+')</h4>')
f = (1/(sigma*sqrt(2.0*pi)))*exp(-1.0/(2*sigma^2)*(x - mu)^2)
p=plot(f.subs(mu=my_mu,sigma=1.0), (x, lower, upper), rgbcolor = (0,0,1))
for i in range(2, stop_i, 1): # just do a few of them
    shade = 1-11/i # make them different colours
    p+=plot(f.subs(mu=my_mu,sigma=1/i), (x, lower, upper), rgbcolor = (1-shade, 0, shade))
textOffset = -0.2 # offset for placement of text -  may need adjusting 
p+=text("0",(0,textOffset),fontsize = 10, rgbcolor='grey') 
p+=text(str(upper.n(digits=2)),(upper,textOffset),fontsize = 10, rgbcolor='grey') 
p+=text(str(lower.n(digits=2)),(lower,textOffset),fontsize = 10, rgbcolor='grey') 
p.show(axes=false, gridlines=[None,[0]], figsize=[7,3])


# mock up a picture of a sequence of converging normal distributions
my_mu = 0
upper = my_mu + 5; lower = -upper;     # limits for plot
var('mu sigma')
stop_i = 12
html('<h4>N(0,1) to N(0, 1/'+str(stop_i)+')</h4>')
f = (1/2)*(1+erf((x - mu)/(sqrt(2)*sigma)))
p=plot(f.subs(mu=my_mu,sigma=1.0), (x, lower, upper), rgbcolor = (0,0,1))
for i in range(2, stop_i, 1): # just do a few of them
    shade = 1-11/i # make them different colours
    p+=plot(f.subs(mu=my_mu,sigma=1/i), (x, lower, upper), rgbcolor = (1-shade, 0, shade))
textOffset = -0.2 # offset for placement of text -  may need adjusting 
p+=text("0",(0,textOffset),fontsize = 10, rgbcolor='grey') 
p+=text(str(upper.n(digits=2)),(upper,textOffset),fontsize = 10, rgbcolor='grey') 
p+=text(str(lower.n(digits=2)),(lower,textOffset),fontsize = 10, rgbcolor='grey') 
p.show(axes=false, gridlines=[None,[0]], figsize=[7,3])


theta = 0.0
# show the plots
show(graphics_array((pmfPointMassPlot(theta),cdfPointMassPlot(theta))),figsize=[8,2])


# mock up a picture of a sequence of converging normal distributions
my_mu = 0
var('mu sigma')
upper = 0.2; lower = -upper
i = 20 # start part way into the sequence
lim = 100 # how far to go
stop_i = 12
html('<h4>N(0,1/'+str(i)+') to N(0, 1/'+str(lim)+')</h4>')
f = (1/(sigma*sqrt(2.0*pi)))*exp(-1.0/(2*sigma^2)*(x - mu)^2)
p=plot(f.subs(mu=my_mu,sigma=1.0/i), (x, lower, upper), rgbcolor = (0,0,1))
for j in range(i, lim+1, 4): # just do a few of them
    shade = 1-(j-i)/(lim-i) # make them different colours
    p+=plot(f.subs(mu=my_mu,sigma=1/j), (x, lower,upper), rgbcolor = (1-shade, 0, shade))
textOffset = -1.5 # offset for placement of text -  may need adjusting 
p+=text("0",(0,textOffset),fontsize = 10, rgbcolor='grey') 
p+=text(str(upper.n(digits=2)),(upper,textOffset),fontsize = 10, rgbcolor='grey') 
p+=text(str(lower.n(digits=2)),(lower,textOffset),fontsize = 10, rgbcolor='grey') 
p.show(axes=false, gridlines=[None,[0]], figsize=[7,3])


def showURL(url, ht=500):
    """Return an IFrame of the url to show in notebook with height ht"""
    from IPython.display import IFrame
    return IFrame(url, width='95%', height=ht)
showURL('https://en.wikipedia.org/wiki/Convergence_of_random_variables') # also check out 'almost sure convergence'


@interact
def _(nToGen=slider(1,1500,1,100,label='n'),my_theta=input_box(0.3,label='theta'),rSeed=input_box(1234,label='random seed')):
    '''Interactive function to plot running mean for a Bernoulli with specified n, theta and random number seed.'''
    
    if my_theta >= 0 and my_theta <= 1:
        html('<h4>Bernoulli('+str(my_theta.n(digits=2))+')</h4>')
        xvalues = range(1, nToGen+1,1)
        bRunningMeans = bernoulliRunningMeans(nToGen, myTheta=my_theta, mySeed=rSeed)
        pts = zip(xvalues, bRunningMeans)
        p = line(pts, rgbcolor = (0,0,1))
        p+=line([(0,my_theta),(nToGen,my_theta)],linestyle=':',rgbcolor='grey')
        show(p, figsize=[5,3], axes_labels=['n','sample mean'],ymax=1)
    else:
        print ('Theta must be between 0 and 1')


# Example of Pareto and Weak law of large numbers
running_means = [paretoRunningMeans(10000,a=1.9) for i in range(100)]

@interact
def _(n=slider(1,9999,100,999,label='n'),rSeed=input_box(1234,label='random seed')):
    '''Interactive function to plot distribution of running mean for a Cauchy random variable with specified n and random number seed.'''
    
    n_th_means = [rm[n] for rm in running_means]
    p=histogram(n_th_means)
    show(p)


theta, n, samples = 0.6, 10, 5 # concise way to set some variable values
sampleMeans=[] # empty list
for i in range(0, samples, 1):  # loop 
    thisMean = QQ(sum(bernoulliSample(n, theta)))/n # get a sample and find the mean
    sampleMeans.append(thisMean) # add mean to the list of means
sampleMeans    # disclose the sample means

[3/5, 4/5, 7/10, 2/5, 3/5]


import pylab
@interact
def _(replicates=slider(1,3000,1,100,label='replicates'), \
      nToGen=slider(1,1500,1,100,label='sample size n'),\
      my_theta=input_box(0.3,label='theta'),Bins=5):
    '''Interactive function to plot distribution of replicates of sample means for n IID Bernoulli trials.'''
    
    if my_theta >= 0 and my_theta <= 1 and replicates > 0:
        sampleMeans=[] # empty list
        for i in range(0, replicates, 1):          
            thisMean = RR(sum(bernoulliSample(nToGen, my_theta)))/nToGen
            sampleMeans.append(thisMean)
        pylab.clf() # clear current figure
        n, bins, patches = pylab.hist(sampleMeans, Bins, density=true) 
        pylab.ylabel('normalised count')
        pylab.title('Normalised histogram for Bernoulli sample means')
        pylab.savefig('myHist') # to actually display the figure
        pylab.show()
        #show(p, figsize=[5,3], axes_labels=['n','sample mean'],ymax=1)
    else:
        print ('Theta must be between 0 and 1, and samples > 0')


import pylab
@interact
def _(replicates=input_box(100,label='replicates'), \
      nToGen=slider(1,1500,1,100,label='sample size n'),\
      my_theta1=input_box(2,label='theta1'),\
      my_theta2=input_box(4,label='theta1'),Bins=5):
    '''Interactive function to plot distribution of 
    sample means for n IID Uniform(theta1, theta2) trials.'''
    
    if (my_theta1 < my_theta2) and replicates > 0:
        sampleMeans=[] # empty list
        for i in range(0, replicates, 1):
            
            thisMean = RR(sum(uniformSample(nToGen, my_theta1, my_theta2)))/nToGen
            sampleMeans.append(thisMean)
        pylab.clf() # clear current figure
        n, bins, patches = pylab.hist(sampleMeans, Bins, density=true) 
        pylab.ylabel('normalised count')
        pylab.title('Normalised histogram for Uniform sample means')
        pylab.savefig('myHist') # to actually display the figure
        pylab.show()
        #show(p, figsize=[5,3], axes_labels=['n','sample mean'],ymax=1)
    else:
        print ('theta1 must be less than theta2, and samples > 0')


import pylab
@interact
def _(replicates=input_box(100,label='replicates'), \
      nToGen=slider(1,1500,1,100,label='sample size n'),\
      my_lambda=input_box(0.1,label='lambda'),Bins=5):
    '''Interactive function to plot distribution of \
    sample means for an Exponential(lambda) process.'''
    
    if my_lambda > 0 and replicates > 0:
        sampleMeans=[] # empty list
        for i in range(0, replicates, 1):            
            thisMean = RR(sum(exponentialSample(nToGen, my_lambda)))/nToGen
            sampleMeans.append(thisMean)
        pylab.clf() # clear current figure
        n, bins, patches = pylab.hist(sampleMeans, Bins, density=true) 
        pylab.ylabel('normalised count')
        pylab.title('Normalised histogram for Exponential sample means')
        pylab.savefig('myHist') # to actually display the figure
        pylab.show()
        #show(p, figsize=[5,3], axes_labels=['n','sample mean'],ymax=1)
    else:
        print ('lambda must be greater than 0, and samples > 0')


nToGenerate = 100
replicates = 20
xvalues = range(1, nToGenerate+1,1)
for i in range(replicates):
    redshade = 0.5*(replicates - 1 - i)/replicates # to get different colours for the lines
    bRunningMeans = bernoulliSecretThetaRunningMeans(nToGenerate)
    pts = zip(xvalues,bRunningMeans)
    if (i == 0):
        p = line(pts, rgbcolor = (redshade,0,1))
    else:
        p += line(pts, rgbcolor = (redshade,0,1))
    mle=bRunningMeans[nToGenerate-1]
    se95Correction=2.0*sqrt(mle*(1-mle)/nToGenerate)
    lower95CI = mle-se95Correction
    upper95CI = mle+se95Correction
    p += line([(nToGenerate+i,lower95CI),(nToGenerate+i,upper95CI)], rgbcolor = (redshade,0,1), thickness=0.5)
p += line([(1,0.3),(nToGenerate+replicates,0.3)], rgbcolor='black', thickness='2')
p += text('sample mean up to n='+str(nToGenerate)+' and their 95% confidence intervals',(nToGenerate/1.5,1),fontsize=16)
show(p, figsize=[10,6])


# Sample Exam Problem 5
# Only replace the XXX below, do not change the function naemes or parameters
import numpy as np
sampleWaitingTimes = np.array([8,3,7,18,18,3,7,9,9,25,0,0,25,6,10,0,10,8,16,9,1,5,16,6,4,1,3,21,0,28,3,8,6,6,11,\
                               8,10,15,0,8,7,11,10,9,12,13,8,10,11,8,7,11,5,9,11,14,13,5,8,9,12,10,13,6,11,13,0,\
                               0,11,1,9,5,14,16,2,10,21,1,14,2,10,24,6,1,14,14,0,14,4,11,15,0,10,2,13,2,22,10,5,\
                               6,13,1,13,10,11,4,7,9,12,8,16,15,14,5,10,12,9,8,0,5,13,13,6,8,4,13,15,7,11,6,23,1])

def SampleExamProblem5(exponentialSamples):
    '''return the 95% confidence interval as a 2-tuple for the unknown rate parameter lambda* 
    from n IID Exponential(lambda*) trials in the input numpy array called exponentialSamples'''
    XXX
    XXX
    XXX
    lower95CI=XXX
    upper95CI=XXX
    return (lower95CI,upper95CI)

# do NOT change anything below
lowerCISampleExamProblem5,upperCISampleExamProblem5 = SampleExamProblem5(sampleWaitingTimes)
print ("The 95% CI for lambda in the Orbiter Waiting time experiment = ")
print (lowerCISampleExamProblem5,upperCISampleExamProblem5)


# Sample Exam Problem 5 Solution
# solution is straightforward by following these steps symbolically
# or you can do it by hand with pen/paper or do both to be safe

## STEP 1 - define the variables you need
lam,x,n = var('lam','x','n')

## STEP 2 - get symbolic expression for the likelihood of one sample
logfx = log(lam^x*exp(-lam)/factorial(x)).full_simplify()
print ("logfx = ", logfx)

## STEP 3 - find second derivate of expression from STEP 2 w.r.t. parameter
d2logfx = logfx.diff(lam,2).full_simplify()
print ("d2logfx = ", d2logfx)

## STEP 4 - to get Fisher Information of one sample
##          integrate d2logfx * f(x) over x in [0,Infinity), f(x) id PDF lam*exp(-lam*x)
assume(lam>0) # usually you need make such assume's for integrate to work - see suggestions in error messages
FisherInformation1 = -integrate(d2logfx*lam*exp(-lam*x),x,0,Infinity)
print ("FisherInformation1 = ",FisherInformation1)

## STEP 5 - get Standard Error from FisherInformation1
StdErr = 1/sqrt(n*FisherInformation1)
print ("StdErr = ",StdErr)

## STEP 6 - get Standard Error from Standard Error and MLE or lamHat
# lamHat = 1/xBar = 1/sampleMean; know from before
lamHat,sampMean = var('lamHat','sampMean')
lamHat = 1/sampMean
EstStdErr = StdErr.subs(lam=lamHat)
print ("EstStdErr = ",EstStdErr)

## STEP 7 - Get lower and upper 95% CI
(lamHat-2*EstStdErr, lamHat+2*EstStdErr)

logfx =  log(lam^x*e^(-lam)/factorial(x))
d2logfx =  -x/lam^2
FisherInformation1 =  lam^(-3)
StdErr =  1/sqrt(n/lam^3)
EstStdErr =  1/sqrt(n*sampMean^3)

(-2/sqrt(n*sampMean^3) + 1/sampMean, 2/sqrt(n*sampMean^3) + 1/sampMean)


# Sample Exam Problem 5 Solution
# Only replace the XXX below, do not change the function naemes or parameters
import numpy as np
sampleWaitingTimes = np.array([8,3,7,18,18,3,7,9,9,25,0,0,25,6,10,0,10,8,16,9,1,5,16,6,4,1,3,21,0,28,3,8,6,6,11,\
                               8,10,15,0,8,7,11,10,9,12,13,8,10,11,8,7,11,5,9,11,14,13,5,8,9,12,10,13,6,11,13,0,\
                               0,11,1,9,5,14,16,2,10,21,1,14,2,10,24,6,1,14,14,0,14,4,11,15,0,10,2,13,2,22,10,5,\
                               6,13,1,13,10,11,4,7,9,12,8,16,15,14,5,10,12,9,8,0,5,13,13,6,8,4,13,15,7,11,6,23,1])

def SampleExamProblem5(exponentialSamples):
    '''return the 95% confidence interval as a 2-tuple for the unknown rate parameter lambda* 
    from n IID Exponential(lambda*) trials in the input numpy array called exponentialSamples'''
    sampleMean = exponentialSamples.mean()
    n=len(exponentialSamples)
    correction=RR(2/(sqrt(n)*sampleMean)) # you can also replace RR by float here or you get expressions
    lower95CI=1.0/sampleMean - correction
    upper95CI=1.0/sampleMean + correction
    return (lower95CI,upper95CI)

# do NOT change anything below
lowerCISampleExamProblem5,upperCISampleExamProblem5 = SampleExamProblem5(sampleWaitingTimes)
print ("The 95% CI for lambda in the Orbiter Waiting time experiment = ")
print (lowerCISampleExamProblem5,upperCISampleExamProblem5)

The 95% CI for lambda in the Orbiter Waiting time experiment = 
0.09100312972775282 0.12936414907024382


import numpy as np
# do a live simulation ... to implement this test...
# simulate from Bernoulli(theta0) n samples
# make mle
# construct Wald test
# make a decision - i.e., decide if you will reject or fail to reject the H0: theta0=0.5
trueTheta=0.45
n=20
myBernSamples=np.array([floor(random()+trueTheta) for i in range(0,n)])
#myBernSamples
mle=myBernSamples.mean() # 1/mean
mle
NullTheta=0.5
se=sqrt(mle*(1.0-mle)/n)
W=(mle-NullTheta)/se
print (abs(W))
alpha = 0.05
abs(W) > 2 # alpha=0.05, so z_{alpha/2} =1.96 approx=2

0.4494665749754946

False


# Sample Exam Problem 6 Problem

## STEP 1: get the MLE thetaHat
thetaHat=XXX 
print ("mle thetaHat = ",thetaHat)

## STEP 2: get the NullTheta or theta0
NullTheta=XXX
print ("Null value of theta under H0 = ", NullTheta)

## STEP 3: get estimated standard error
seTheta=XXX # for Bernoulli trials from earleir in 10.ipynb
print ("estimated standard error",seTheta)

# STEP 4: get Wald Statistic
W=XXX
print ("Wald staatistic = ",W)

# STEP 5: conduct the size alpha=0.05 Wald test
# do NOT change anything below
rejectNullSampleExamProblem6 = abs(W) > 2.0 # alpha=0.05, so z_{alpha/2} =1.96 approx=2.0
if (rejectNullSampleExamProblem6):
    print ("we reject the null hypothesis that theta_0=0.5")
else:
    print ("we fail to reject the null hypothesis that theta_0=0.5")


# Sample Exam Problem 6 Solution

## STEP 1: get the MLE thetaHat
n=1114 # sample size
thetaHat=546/n # MLE is sample mean for IID Bernoulli trials
print ("mle thetaHat = ",thetaHat)

## STEP 2: get the NullTheta or theta0
NullTheta=0.5
print ("Null value of theta under H0 = ", NullTheta)

## STEP 3: get estimated standard error
seTheta=sqrt(thetaHat*(1.0-thetaHat)/n) # for Bernoulli trials from earleir in 10.ipynb
print ("estimated standard error",seTheta)

# STEP 4: get Wald Statistic
W=(thetaHat-NullTheta)/seTheta
print ("Wald staatistic = ",W)

# STEP 5: conduct the size alpha=0.05 Wald test
rejectNullSampleExamProblem6 = abs(W) > 2.0 # alpha=0.05, so z_{alpha/2} =1.96 approx=2.0
if (rejectNullSampleExamProblem6):
    print ("we reject the null hypothesis that theta_0=0.5")
else:
    print ("we fail to reject the null hypothesis that theta_0=0.5")

mle thetaHat =  273/557
Null value of theta under H0 =  0.500000000000000
estimated standard error 0.0149776163832414
Wald staatistic =  -0.659272243178650
we fail to reject the null hypothesis that theta_0=0.5


p=text('Reject $H_0$?',(12,12)); p+=text('No',(30,10)); p+=text('Yes',(30,15)); p+=text('p-value',(70,10))
p+=text('size',(65,4)); p+=text('$0$',(40,4)); p+=text('$1$',(90,4)); p+=points((59,5),rgbcolor='red',size=50)
p+=line([(40,17),(40,5),(95,5)]); p+=line([(40,10),(59,10),(59,15),(90,15)]);
p+=line([(68,9.5),(59.5,5.5)],rgbcolor='red'); p.show(axes=False)


popltn = range(1, 101, 1) # make a population
sample(popltn, 10) # sample 10 elements from it at random

[15, 59, 92, 29, 6, 99, 56, 20, 84, 77]


popltnWithDuplicates = list(range(1, 11, 1))*4 # make a population with repeated elements
print(popltnWithDuplicates)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]


for i in range (5):
    print( sample(popltnWithDuplicates, 10))

[8, 3, 10, 6, 4, 6, 1, 8, 9, 1]
[7, 5, 9, 7, 2, 10, 9, 8, 4, 8]
[4, 6, 1, 2, 5, 4, 7, 10, 2, 10]
[7, 2, 7, 1, 9, 2, 4, 6, 4, 2]
[8, 3, 2, 3, 1, 9, 7, 5, 10, 1]


#?sample


#?shuffle


#?choice

Introduction to Data Science: A Comp-Math-Stat Approach¶

1MS041, 2021¶

10. Convergence of Limits of Random Variables, Confidence Set Estimation and Testing¶

Inference and Estimation: The Big Picture¶

Limits¶

Preparation: Let's just evaluate the next cell and focus on concepts.¶

Limits of a Sequence of Real Numbers¶

YouTry¶

Limits of Functions¶

Limit of a Sequence of Random Variables¶

The $Gaussian(\mu, \sigma^2)$ or $Normal(\mu, \sigma^2)$ RV?¶

Convergence in Distribution¶

There is an interesting point to note about this convergence:¶

Convergence in Probability¶

Markov's inequality¶

Proof¶

Convergence in probability of our sequence¶

Some Basic Limit Laws in Statistics¶

Weak Law of Large Numbers¶

Central Limit Theorem¶

YouTry¶

Properties of the MLE¶

1. The MLE is asymptotically consistent¶

2. The MLE is equivariant¶

3. The MLE is asymptotically normal¶

Confidence Interval and Set Estimation from MLE¶

Example of Confidence Interval for IID $Bernoulli(\theta)$ Trials¶

Sample Exam Problem 5¶

Sample Exam Problem 5 Solution¶

Hypothesis Testing¶

Introduction¶

Power, Size and Level of a Test¶

Power Function¶

Size of a test¶

Wald test¶

Definition¶

Asymptotic $\mathsf{size}$ of a Wald test¶

Asymptotic power of a Wald test¶

The $\mathsf{size}$ Wald test¶

Example: Wald test for the mean waiting times at our Orbiter bus-stop¶

A Live Example: Simulating Bernoulli Trials to understand Wald Tests¶

Sample Exam Problem 6¶

P-value¶

Definition of p-value¶

Understanding p-value¶

The p-value of a hypothesis test¶

Example: p-value for the parametric Orbiter bus waiting times experiment¶

Concentration Inequalities¶

Preparation for Nonparametric Estimation and Testing¶

YouTry Later¶

Introduction to Data Science: A Comp-Math-Stat Approach ¶