import numpy as np 
import matplotlib.pyplot as plt

def estimate_coefficients(x, y): 
    # size of the dataset  
    n = np.size(x) 
    # mean of x and y
    mean_x, mean_y = np.mean(x), np.mean(y) 
    # xy cross-deviation and xx deviatio
    SS_xy = np.sum(y*x) - n*mean_y*mean_x
    SS_xx = np.sum(x*x) - n*mean_x*mean_x
    # calculating LSE of regression coefficients 
    b1_hat = SS_xy / SS_xx 
    b0_hat = mean_y - b1_hat*mean_x 
    sigma_hat2 = np.mean((y - (b0_hat + b1_hat * x))^2)
    if n>2:
        sigma_hat2 = sigma_hat2*n/(n-2)
    sigma_hat=np.sqrt(sigma_hat2)
    return(b0_hat, b1_hat, sigma_hat)

def standard_errors(x,y):
    n = np.size(x) 
    b0_hat,b1_hat,s_hat = estimate_coefficients(x,y)
    mean_x = np.mean(x)
    s2X = np.mean( (x-mean_x)^2 )
    se_b1 = s_hat/np.sqrt(s2X*n)
    se_b0 = se_b1*np.sqrt(np.mean(x^2))
    return (se_b0, se_b1)

def plot_regression_line(x, y, b): 
    # plotting the data points on a graph
    plt.scatter(x, y, color = "m",marker = "o", s = 10) 
    # predicted response vector 
    y_pred = b[0] + b[1]*x 
    # plotting the fitted regression line
    plt.plot(x, y_pred, color = "b")
    # putting generic labels for x and y axis
    plt.xlabel('x') 
    plt.ylabel('y') 
    # function to show plotted graph
    plt.show()

def SimpleLinearRegression(x,y): 
    # estimating coefficients 
    b = estimate_coefficients(x, y) 
    print("Estimated coefficients:\nb0_hat = {} \nb1_hat = {}\nsigma_hat = {}".format(b[0], b[1],b[2])) 
    # plotting fitted regression line  with data
    plot_regression_line(x, y, b)


# Datasets for x and y 
LSAT=np.array([576, 635, 558, 578, 666, 580, 555, 661, 651, 605, 653, 575, 545, 572, 594]) # LSAT data
GPA=np.array([3.39, 3.30, 2.81, 3.03, 3.44, 3.07, 3.00, 3.43, 3.36, 3.13, 3.12, 2.74, 2.76, 2.88, 3.96]) # GPA data

SimpleLinearRegression(LSAT,GPA)

Estimated coefficients:
b0_hat = 0.5998612958803515 
b1_hat = 0.004267223518635576
sigma_hat = 0.28404612916999544


predictedGPA = 0.5998612958803515 + 0.004267223518635576*LSAT
residuals = GPA - predictedGPA
plt.scatter(LSAT, residuals, color = "k",marker = "o", s = 10) 
plt.axhline()
# putting generic labels for x and y axis
plt.ylabel('$\\epsilon_i$') 
plt.xlabel('LSAT') # draw a y=0 line
plt.show()
# in general we want residuals to be Normally distributes about 0 with the same variance


b0_hat, b1_hat, s_hat = estimate_coefficients(LSAT,GPA)
se_b0,se_b1 = standard_errors(LSAT,GPA)
print ("Estimated standard errors for beta_0_hat and beta_1_hat are:")
print (se_b0,se_b1 )
print ("and the approximate 95% confidence intervals for beta_0_hat is:")
print ("          [ ", b0_hat-2*se_b0," , ", b0_hat+2*se_b0, " ]")
print ("and the approximate 95% confidence intervals for beta_1_hat is:")
print ("          [ ", b1_hat-2*se_b1," , ", b1_hat+2*se_b1, " ]")
print ("The Wald test for the null hypothesis H0 that beta_1 = 0 is:")
W = (b1_hat-0)/se_b1
if abs(W > 2):
    print ("Reject H0 that beta_1=0 at alpha=0.05, since W = ",W)
else:
    print ("fail to reject H0 that beta_1=0 at alpha=0.05, since W = ",W)

Estimated standard errors for beta_0_hat and beta_1_hat are:
1.0927735237389298 0.0018163754932013147
and the approximate 95% confidence intervals for beta_0_hat is:
          [  -1.585685751597508  ,  2.785408343358211  ]
and the approximate 95% confidence intervals for beta_1_hat is:
          [  0.0006344725322329466  ,  0.007899974505038206  ]
The Wald test for the null hypothesis H0 that beta_1 = 0 is:
Reject H0 that beta_1=0 at alpha=0.05, since W =  2.34930692172834


from scipy.linalg import lstsq
import matplotlib.pyplot as plt
import numpy as np

# suppose we have the following data
x = np.array([1, 2.5, 3.5, 4, 5, 7, 8.5])
y = np.array([0.3, 1.1, 1.5, 2.0, 3.2, 6.6, 8.6])

#We want to fit a line of the form y = a + b*x to this data. We first form the 
#“design matrix” M, with a constant column of 1s and a column containing x
M1 = x[:, np.newaxis]^[0, 1]
M1

array([[1. , 1. ],
       [1. , 2.5],
       [1. , 3.5],
       [1. , 4. ],
       [1. , 5. ],
       [1. , 7. ],
       [1. , 8.5]])


#We want to find the least-squares solution to 
#M1.dot(p) = y, where p is a vector with length 2 that holds the parameters a and b.
p, res, rnk, s = lstsq(M1, y)
p

array([-1.93080357,  1.16875   ])


plt.plot(x, y, 'o', label='data')
xx = np.linspace(0, 9, 101)
yy = p[0] + p[1]*xx
plt.plot(xx, yy, label='least squares fit, $y = a + bx$')
plt.xlabel('x')
plt.ylabel('y')
plt.legend(framealpha=1, shadow=True)
plt.grid(alpha=0.25)
plt.show()


M2 = x[:, np.newaxis]^[0, 2]
M2

array([[ 1.  ,  1.  ],
       [ 1.  ,  6.25],
       [ 1.  , 12.25],
       [ 1.  , 16.  ],
       [ 1.  , 25.  ],
       [ 1.  , 49.  ],
       [ 1.  , 72.25]])


# least square solution with M2
p, res, rnk, s = lstsq(M2, y)
plt.plot(x, y, 'o', label='data')
xx = np.linspace(0, 9, 101)
yy = p[0] + p[1]*xx^2
plt.plot(xx, yy, label='least squares fit, $y = a + bx$')
plt.xlabel('x')
plt.ylabel('y')
plt.legend(framealpha=1, shadow=True)
plt.grid(alpha=0.25)
plt.show()


# Fitting a cubic polynolial is the same idea
M3 = x[:, np.newaxis]^[0, 1, 2, 3]
M3

array([[  1.   ,   1.   ,   1.   ,   1.   ],
       [  1.   ,   2.5  ,   6.25 ,  15.625],
       [  1.   ,   3.5  ,  12.25 ,  42.875],
       [  1.   ,   4.   ,  16.   ,  64.   ],
       [  1.   ,   5.   ,  25.   , 125.   ],
       [  1.   ,   7.   ,  49.   , 343.   ],
       [  1.   ,   8.5  ,  72.25 , 614.125]])


p, res, rnk, s = lstsq(M3, y)
plt.plot(x, y, 'o', label='data')
xx = np.linspace(0, 9, 101)
yy = p[0] + p[1]*xx + p[2]*xx^2 + p[3]*xx^3
plt.plot(xx, yy, label='least squares fit, $y = a + bx$')
plt.xlabel('x')
plt.ylabel('y')
plt.legend(framealpha=1, shadow=True)
plt.grid(alpha=0.25)
plt.show()


# Sample Exam Problem 8 
# do not change this import and data block ########################
from scipy.linalg import lstsq
import matplotlib.pyplot as plt
import numpy as np
logLightIntens_logSurfTemp=[(4.37,5.23),(4.56,5.74),
(4.26,4.93),(4.56,5.74),(4.30,5.19),(4.46,5.46),(3.84,4.65),(4.57,5.27),(4.26,5.57),(4.37,5.12),(3.49,5.73),
(4.43,5.45),(4.48,5.42),(4.01,4.05),(4.29,4.26),(4.42,4.58),(4.23,3.94),(4.42,4.18),(4.23,4.18),(3.49,5.89),
(4.29,4.38),(4.29,4.22),(4.42,4.42),(4.49,4.85),(4.38,5.02),(4.42,4.66),(4.29,4.66),(4.38,4.90),(4.22,4.39),
(3.48,6.05),(4.38,4.42),(4.56,5.10),(4.45,5.22),(3.49,6.29),(4.23,4.34),(4.62,5.62),(4.53,5.10),(4.45,5.22),
(4.53,5.18),(4.43,5.57),(4.38,4.62),(4.45,5.06),(4.50,5.34),(4.45,5.34),(4.55,5.54),(4.45,4.98),(4.42,4.50)]
CleanedlogLightIntens_logSurfTemp=\
np.array([yx for yx in logLightIntens_logSurfTemp if yx[1]<5.9 and yx[0]>4]) # data range constraint
x=CleanedlogLightIntens_logSurfTemp[:,1]
y=CleanedlogLightIntens_logSurfTemp[:,0]
########### end of import and data block ##########################

# Replace only ZZZ by the right values
M1 = ZZZ # design matrix M1
b, res, rnk, s = lstsq(ZZZ,ZZZ)
plt.plot(x, y, 'o', label='data')
xx = np.linspace(ZZZ, ZZZ, 101)
yy = ZZZ *xx
plt.plot(xx, yy, label='least squares fit')
plt.xlabel('log light intensity (X)')
plt.ylabel('log surface temperature (Y)')
plt.legend(framealpha=1, shadow=True)
plt.grid(alpha=0.25)
plt.text(4, 4.7, r'$\widehat{r}(x) = \widehat{\beta}_0 + \widehat{\beta}_1 x, \quad \
\widehat{\beta}_0 = $ %(b0)0.3f , $\widehat{\beta}_1 = $ %(b1)0.3f' % {'b0': b[0], 'b1': b[1]} )
plt.show()


# Sample Exam Problem 8 Solution
logLightIntens_logSurfTemp=[(4.37,5.23),(4.56,5.74),
(4.26,4.93),(4.56,5.74),(4.30,5.19),(4.46,5.46),(3.84,4.65),(4.57,5.27),(4.26,5.57),(4.37,5.12),(3.49,5.73),
(4.43,5.45),(4.48,5.42),(4.01,4.05),(4.29,4.26),(4.42,4.58),(4.23,3.94),(4.42,4.18),(4.23,4.18),(3.49,5.89),
(4.29,4.38),(4.29,4.22),(4.42,4.42),(4.49,4.85),(4.38,5.02),(4.42,4.66),(4.29,4.66),(4.38,4.90),(4.22,4.39),
(3.48,6.05),(4.38,4.42),(4.56,5.10),(4.45,5.22),(3.49,6.29),(4.23,4.34),(4.62,5.62),(4.53,5.10),(4.45,5.22),
(4.53,5.18),(4.43,5.57),(4.38,4.62),(4.45,5.06),(4.50,5.34),(4.45,5.34),(4.55,5.54),(4.45,4.98),(4.42,4.50)]
CleanedlogLightIntens_logSurfTemp=\
np.array([yx for yx in logLightIntens_logSurfTemp if yx[1]<5.9 and yx[0]>4]) # data range constraint
x=CleanedlogLightIntens_logSurfTemp[:,1]
y=CleanedlogLightIntens_logSurfTemp[:,0]

from scipy.linalg import lstsq
import matplotlib.pyplot as plt
import numpy as np
M1 = x[:, np.newaxis]^[0, 1]
b, res, rnk, s = lstsq(M1, y)
plt.plot(x, y, 'o', label='data')
xx = np.linspace(3.9, 5.8, 101)
yy = b[0] + b[1]*xx
plt.plot(xx, yy, label='least squares fit')
plt.xlabel('log light intensity (X)')
plt.ylabel('log surface temperature (Y)')
plt.legend(framealpha=1, shadow=True)
plt.grid(alpha=0.25)
plt.text(4, 4.7, r'$\widehat{r}(x) = \widehat{\beta}_0 + \widehat{\beta}_1 x, \quad \
\widehat{\beta}_0 = $ %(b0)0.3f , $\widehat{\beta}_1 = $ %(b1)0.3f' % {'b0': b[0], 'b1': b[1]} )
plt.show()


import numpy as np
import matplotlib.pyplot as plt
np.random.seed(1)
x = np.linspace(-1,1,10)
epsilon = np.random.normal(loc=0,scale=1,size=len(x),)
y = 2*x + 1 + epsilon


# These are our guesses for b
prop_b = np.linspace(0,4,100)

# Compute the residual if we use these b's, remember this is with a = 0,
# as noted above the a can be taken to be the mean of the unadjusted residual for each b

unadj_residual = y.reshape(-1,1)-prop_b.reshape(1,-1)*x.reshape(-1,1)

# Compute the variance of the unadjusted residual
variance_residual = np.std(unadj_residual,axis=0)**2

# Plot the variance for each proposed value of b
plt.plot(prop_b,variance_residual)

# Find the minimum variance
min_index = np.argmin(variance_residual)

# Set a and b based on this minimum
a = np.mean(unadj_residual[:,min_index])
b = prop_b[min_index]

print("Optimal a: %f, optimal b: %f" % (a,b))

Optimal a: 0.902859, optimal b: 1.818182


def L(a,b):
    return np.mean((y-(a+b*x))^2)


plot3d(L,(a,0.5,1.5),(b,1.5,2.5))


G(a,b) = L(a,b) # Create a symbolic expression


@interact
def gradient_descent_3d(x_start=input_box(0,type=float,label='x_start'), y_start=input_box(0,type=float,label='y_start'), n_steps=(1,(1,30))):
    P=points([])

    lr = 0.1
    point = (x_start,y_start)
    levels = []
    for i in range(0,n_steps):
        dLdb = G.diff(b).subs(b=point[1]).subs(a=point[0]).n(10)
        dLda = G.diff(a).subs(b=point[1]).subs(a=point[0]).n(10)
        new_a = point[0] - lr*dLda
        new_b = point[1] - lr*dLdb

        P+=arrow((point[0],point[1],L(point[0],point[1])),(new_a,new_b,L(new_a,new_b)),color='red',width=0.5)
        #plt.arrow(point[0],point[1],-lr*dLda*0.7,-lr*dLdb*0.7,head_width=0.05)
        point = (new_a,new_b)
    x_min = min(x_start,new_a)-0.1
    x_max = max(x_start,new_a)+0.1
    y_min = min(y_start,new_b)-0.1
    y_max = max(y_start,new_b)+0.1
    P+=plot3d(L,(a,x_min,x_max),(b,y_min,y_max))
    P.show() # increase the number of steps by toggling n_steps or play with initial values x_start and y_start


import numpy as np

@interact
def gradient_descent_contours(n_steps=(1,(1,30))):
    # make a contour plot
    P=contour_plot(G, (a, 0.3, 1.5), (b, 1.2, 2.5),fill=False, aspect_ratio=1, contours=20)
    lr = 0.2
    point = (0.3,1.2)

    for i in range(0,n_steps):
        dLdb = G.diff(b).subs(b=point[1]).subs(a=point[0]).n(10)
        dLda = G.diff(a).subs(b=point[1]).subs(a=point[0]).n(10)
        new_a = point[0] - lr*dLda
        new_b = point[1] - lr*dLdb

        P+=arrow((point[0],point[1]),(new_a,new_b),color='red',width=0.3)
        #plt.arrow(point[0],point[1],-lr*dLda*0.7,-lr*dLdb*0.7,head_width=0.05)
        point = (new_a,new_b)

    P.show()


# this is x and y available as numpy arrays in SageMath/Python
logLightIntens_logSurfTemp=[(4.37,5.23),(4.56,5.74),
(4.26,4.93),(4.56,5.74),(4.30,5.19),(4.46,5.46),(3.84,4.65),(4.57,5.27),(4.26,5.57),(4.37,5.12),(3.49,5.73),
(4.43,5.45),(4.48,5.42),(4.01,4.05),(4.29,4.26),(4.42,4.58),(4.23,3.94),(4.42,4.18),(4.23,4.18),(3.49,5.89),
(4.29,4.38),(4.29,4.22),(4.42,4.42),(4.49,4.85),(4.38,5.02),(4.42,4.66),(4.29,4.66),(4.38,4.90),(4.22,4.39),
(3.48,6.05),(4.38,4.42),(4.56,5.10),(4.45,5.22),(3.49,6.29),(4.23,4.34),(4.62,5.62),(4.53,5.10),(4.45,5.22),
(4.53,5.18),(4.43,5.57),(4.38,4.62),(4.45,5.06),(4.50,5.34),(4.45,5.34),(4.55,5.54),(4.45,4.98),(4.42,4.50)]
CleanedlogLightIntens_logSurfTemp=\
np.array([yx for yx in logLightIntens_logSurfTemp if yx[1]<5.9 and yx[0]>4]) # data range constraint
x=CleanedlogLightIntens_logSurfTemp[:,1]
y=CleanedlogLightIntens_logSurfTemp[:,0]
print (x)
print (y)

[5.23 5.74 4.93 5.74 5.19 5.46 5.27 5.57 5.12 5.45 5.42 4.05 4.26 4.58
 3.94 4.18 4.18 4.38 4.22 4.42 4.85 5.02 4.66 4.66 4.9  4.39 4.42 5.1
 5.22 4.34 5.62 5.1  5.22 5.18 5.57 4.62 5.06 5.34 5.34 5.54 4.98 4.5 ]
[4.37 4.56 4.26 4.56 4.3  4.46 4.57 4.26 4.37 4.43 4.48 4.01 4.29 4.42
 4.23 4.42 4.23 4.29 4.29 4.42 4.49 4.38 4.42 4.29 4.38 4.22 4.38 4.56
 4.45 4.23 4.62 4.53 4.45 4.53 4.43 4.38 4.45 4.5  4.45 4.55 4.45 4.42]


%%r
x <- c(5.23,  5.74,  4.93,  5.74,  5.19,  5.46,  5.27,  5.57,  5.12,
         5.45,  5.42,  4.05,  4.26,  4.58,  3.94,  4.18,  4.18,  4.38,
         4.22,  4.42,  4.85,  5.02,  4.66,  4.66,  4.9 ,  4.39,  4.42,
         5.1 ,  5.22,  4.34,  5.62,  5.1 ,  5.22,  5.18,  5.57,  4.62,
         5.06,  5.34,  5.34,  5.54,  4.98,  4.5)
y <- c(4.37,  4.56,  4.26,  4.56,  4.3 ,  4.46,  4.57,  4.26,  4.37,
         4.43,  4.48,  4.01,  4.29,  4.42,  4.23,  4.42,  4.23,  4.29,
         4.29,  4.42,  4.49,  4.38,  4.42,  4.29,  4.38,  4.22,  4.38,
         4.56,  4.45,  4.23,  4.62,  4.53,  4.45,  4.53,  4.43,  4.38,
         4.45,  4.5 ,  4.45,  4.55,  4.45,  4.42)

 [1] 4.37 4.56 4.26 4.56 4.30 4.46 4.57 4.26 4.37 4.43 4.48 4.01 4.29 4.42 4.23
[16] 4.42 4.23 4.29 4.29 4.42 4.49 4.38 4.42 4.29 4.38 4.22 4.38 4.56 4.45 4.23
[31] 4.62 4.53 4.45 4.53 4.43 4.38 4.45 4.50 4.45 4.55 4.45 4.42


%%r
linearRegressionModel <- lm(formula = y ~ x + I(x^2))

summary(linearRegressionModel)

Call:
lm(formula = y ~ x + I(x^2))

Residuals:
     Min       1Q   Median       3Q      Max 
-0.22916 -0.05145  0.01121  0.06263  0.16072 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept)  1.87480    1.44471   1.298    0.202
x            0.87443    0.59759   1.463    0.151
I(x^2)      -0.07272    0.06128  -1.187    0.243

Residual standard error: 0.09108 on 39 degrees of freedom
Multiple R-squared:  0.484,	Adjusted R-squared:  0.4576 
F-statistic: 18.29 on 2 and 39 DF,  p-value: 2.491e-06


%%r
# there will be further dependencies, you may need to recursively install...
#install.packages("Flury")
#library(Flury)
#data(dead.beetles)

<rpy2.rinterface_lib.sexp.NULLType object at 0x7f29b952dd08> [RTYPES.NILSXP]

Introduction to Data Science: A Comp-Math-Stat Approach ¶

1MS041, 2021¶

12. Linear Regression¶

Introduction¶

Simple Linear Regression¶

Interactive Animations for Regression¶

Least Squares and Maximum Likelihood¶

Theorem [MLE is LSE]¶

Properties of the Least Squares Estimator (LSE)¶

Conditional Mean and Variance of LSE¶

Estimated Standard Errors¶

Four Asymptotic Properties of the LSE¶

1. Asymptotic Consistency¶

2. Asymptotic Normality¶

3. Approximate $1-\alpha$ Confidence Interval¶

4. The Wald Test¶

Implementing Simple Linear Regression from Scratch¶

Residual Analysis¶

Multiple Regression¶

This is just as simple, except we have more than one covariate¶

Solving Least Squares Using Numerical Linear Algebra Routine in scipy¶

Example 1: Fitting a Line is Simple Linear Regression¶

Example 2: Fitting a Quadratic is also Simple Linear Regresssion¶

Example 3: Fitting a 3rd Order Polynomial is Multiple Linear Regresssion¶

Sample Exam Problem 8¶

Prediction¶

Multiple Regression on 2018 Swedish Election Data¶

Prelude to Statistical Machine Learning¶

Loss functions and gradient descent¶

Introduction to R in SageMath Jupyter IPython Notebook¶

Running R in SageMath is "easy as":¶

Assigning to `x` and `y` in SageMath/R¶

Doing Linear Regression in SameMath/R¶

Running R in SageMath is "easy as":¶

Additional Packages¶

SageMath/R docs¶

Introduction to Data Science: A Comp-Math-Stat Approach¶

1MS041, 2021¶

12. Linear Regression¶

Introduction¶

Simple Linear Regression¶

Interactive Animations for Regression¶

Least Squares and Maximum Likelihood¶

Theorem [MLE is LSE]¶

Properties of the Least Squares Estimator (LSE)¶

Conditional Mean and Variance of LSE¶

Estimated Standard Errors¶

Four Asymptotic Properties of the LSE¶

1. Asymptotic Consistency¶

2. Asymptotic Normality¶

3. Approximate $1-\alpha$ Confidence Interval¶

4. The Wald Test¶

Implementing Simple Linear Regression from Scratch¶

Residual Analysis¶

Multiple Regression¶

This is just as simple, except we have more than one covariate¶

Solving Least Squares Using Numerical Linear Algebra Routine in scipy¶

Example 1: Fitting a Line is Simple Linear Regression¶

Example 2: Fitting a Quadratic is also Simple Linear Regresssion¶

Example 3: Fitting a 3rd Order Polynomial is Multiple Linear Regresssion¶

Sample Exam Problem 8¶

Prediction¶

Multiple Regression on 2018 Swedish Election Data¶

Prelude to Statistical Machine Learning¶

Loss functions and gradient descent¶

Introduction to R in SageMath Jupyter IPython Notebook¶

Running R in SageMath is "easy as":¶

Assigning to x and y in SageMath/R¶

Doing Linear Regression in SameMath/R¶

Running R in SageMath is "easy as":¶

Additional Packages¶

SageMath/R docs¶

Introduction to Data Science: A Comp-Math-Stat Approach ¶

Assigning to `x` and `y` in SageMath/R¶