Introduction to Data Science

1MS041, 2023

©2023 Raazesh Sainudiin, Benny Avelin. Attribution 4.0 International (CC BY 4.0)

Problem 5, Chap 1

In [28]:
import numpy as np
In [29]:
def true_event(sequence):
    if (np.sum(sequence) == 2):
        if (sequence[-1] == 1):
            return 1
    return 0
In [30]:
k = 4
x = np.random.randint(0,2,size=k)
print(x)
true_event(x)
[0 0 0 1]
Out[30]:
0
In [31]:
def true_event_vector(sequence):
    return (np.sum(x,axis=1) == 2)*(x[:,-1] == 1)
In [ ]:
k = 4
In [39]:
x = np.random.randint(0,2,size=(100,k))
In [40]:
np.mean(true_event_vector(x))
Out[40]:
0.21
In [36]:
(k-1)*((1/2)**k)
Out[36]:
0.1875

Problem 10, Chap 1

In [ ]:
import numpy as np
In [ ]:
door = np.random.randint(1,4,size=1000)
np.mean(door == 1) # Probability when not switching
In [ ]:
from random import randint
host = [] # Door opened by host
for d in door:
    if (d == 1):
        # Randomly open either 2 or 3
        host.append(randint(2,3))
    elif (d == 2):
        host.append(3)
    elif (d == 3):
        host.append(2)
In [ ]:
switch_success = [] # If switching was a success
for d,h in zip(door,host):
    if (h == 2) and d==3: # Switching from 1 to 3 was a success
        switch_success.append(1)
    elif (h == 3) and d==2: # Switching from 1 to 2 was a success
        switch_success.append(1)
    else:
        switch_success.append(0)
In [ ]:
np.mean(switch_success)

Some fun with text

In [ ]:
import numpy as np
with open('data/pride_and_prejudice.txt',mode='r') as f:
    txt = f.read()
In [ ]:
print(txt[:1000])
In [ ]:
letters = [l.lower() for l in txt if l.isalpha()]
In [ ]:
# Lets convert everthing to numbers for processing
In [ ]:
ord('h')
In [ ]:
chr(104)
In [ ]:
ascii_ = [ord(l) for l in letters]
In [ ]:
from Utils import makeEMF
In [ ]:
emf = makeEMF(ascii_)
In [ ]:
from Utils import plotEMF
plotEMF(emf,force_display=False)
import matplotlib.pyplot as plt
_=plt.xticks(emf[:,0],[chr(int(i)) for i in emf[:,0]])
In [ ]:
alphaRelFreqs = [73/1000,9/1000,30/1000,44/1000,130/1000,28/1000,16/1000,35/1000,74/1000,
                 2/1000,3/1000,35/1000, 25/1000,78/1000,74/1000,27/1000,3/1000,77/1000,63/1000,
                 93/1000,27/1000,13/1000,16/1000,5/1000,19/1000,1/1000]
plotEMF(list(zip(emf[:,0],alphaRelFreqs)),force_display=False)
_=plt.xticks(emf[:,0],[chr(int(i)) for i in emf[:,0]])
In [ ]:
plotEMF(emf,force_display=False)
plotEMF(list(zip(emf[:,0],alphaRelFreqs)),force_display=False)
_=plt.xticks(emf[:,0],[chr(int(i)) for i in emf[:,0]])
In [ ]:
alphabet = sorted(list(set(letters)))
shuffled_alphabet = alphabet.copy()
np.random.shuffle(shuffled_alphabet)
In [ ]:
encryption = dict(zip(alphabet,shuffled_alphabet))
In [ ]:
encrypted_text = ''.join([encryption.get(a.lower(),a) for a in txt])
print(encrypted_text[:200])
In [ ]:
letters_2 = [l.lower() for l in encrypted_text if l.isalpha()]
In [ ]:
ascii_2 = [ord(l) for l in letters_2]
In [ ]:
emf2 = makeEMF(ascii_2)
from Utils import plotEMF
plotEMF(emf2,force_display=False)
import matplotlib.pyplot as plt
_=plt.xticks(emf2[:,0],[chr(int(i)) for i in emf2[:,0]])
alphaRelFreqs = [73/1000,9/1000,30/1000,44/1000,130/1000,28/1000,16/1000,35/1000,74/1000,
                 2/1000,3/1000,35/1000, 25/1000,78/1000,74/1000,27/1000,3/1000,77/1000,63/1000,
                 93/1000,27/1000,13/1000,16/1000,5/1000,19/1000,1/1000]
plotEMF(list(zip(emf[:,0],alphaRelFreqs)),force_display=False)
_=plt.xticks(emf[:,0],[chr(int(i)) for i in emf[:,0]])
In [ ]:
from Utils import makeFreq
sorted(makeFreq([''.join(l) for l in list(zip(letters[10000:-1],letters[10001:]))]),key=lambda x: int(x[1]),reverse=True)
In [ ]:
from Utils import makeFreq
sorted(makeFreq([''.join(l) for l in list(zip(letters_2[:10000],letters_2[1:10000]))]),key=lambda x: int(x[1]),reverse=True)