def showURL(url, ht=500):
    """Return an IFrame of the url to show in notebook with height ht"""
    from IPython.display import IFrame 
    return IFrame(url, width='95%', height=ht) 
showURL('https://en.wikipedia.org/wiki/Earthquake',500)


showURL('https://terrytao.wordpress.com/2010/01/01/254a-notes-0-a-review-of-probability-theory/')


%%sh
### print working directory
pwd

/home/user/datascience-intro/raaz/1MS041/master/jp


%%sh
#ls ## uncomment to `ls` or list contents of working directory


%%sh
### after download you should have the following file in directory named data
ls data

CORIS.csv
NYPowerBall.csv
co2_mm_mlo.txt
digits.csv
earthquakes.csv
earthquakes.csv.zip
earthquakes.tgz
earthquakes_small.csv
final.csv
final.csv.zip
final.tgz
leukemia.csv
portland.csv
pride_and_prejudice.txt
rainfallInChristchurch.csv
ratings.csv


%%sh  
### first three lines
head -3 data/earthquakes_small.csv

publicid,eventtype,origintime,modificationtime,longitude, latitude, magnitude, depth,magnitudetype,depthtype,evaluationmethod,evaluationstatus,evaluationmode,earthmodel,usedphasecount,usedstationcount,magnitudestationcount,minimumdistance,azimuthalgap,originerror,magnitudeuncertainty
2018p371534,,2018-05-18T11:13:48.826Z,2018-05-18T11:15:55.741Z,176.469659,-38.10063545,2.123583253,93.125,M,,NonLinLoc,,automatic,nz3drx,18,18,11,0.3996779802,94.08602902,1.036195008,0
2018p371524,,2018-05-18T11:08:07.588Z,2018-05-18T11:11:14.319Z,176.4213445,-38.63584892,2.570467678,35.9375,M,,NonLinLoc,,automatic,nz3drx,22,22,11,0.3208135882,89.12864378,1.012353739,0


%%sh 
### last three lines by `tail`
tail -3 data/earthquakes_small.csv

2018p352775,,2018-05-11T12:38:54.732Z,2018-05-11T12:40:28.518Z,175.6063627,-40.81585537,1.835272336,13.671875,M,,NonLinLoc,,automatic,nz3drx,22,22,12,0.1097369199,84.14006379,0.3314536834,0
2018p352725,,2018-05-11T12:12:36.343Z,2018-05-11T12:14:42.372Z,176.0372811,-38.78743116,2.103529946,76.25,M,,NonLinLoc,,automatic,nz3drx,17,17,4,0.4257033383,244.4056741,1.445270768,0
2018p352684,,2018-05-11T11:50:06.019Z,2018-05-11T11:51:41.163Z,176.5437111,-40.07042442,1.503468463,13.671875,M,,NonLinLoc,,automatic,nz3drx,13,13,7,0.079302248,81.46123042,0.4485324555,0


%%sh  
### number of lines in the file; mnemonic from `man wc` is wc = word-count option=-l is for lines
wc -l  data/earthquakes_small.csv

411 data/earthquakes_small.csv


#%%sh
#man wc


with open("data/earthquakes_small.csv") as f:
    reader = f.read()
    
dataList = reader.split('\n')


len(dataList)

412


dataList[0]

'publicid,eventtype,origintime,modificationtime,longitude, latitude, magnitude, depth,magnitudetype,depthtype,evaluationmethod,evaluationstatus,evaluationmode,earthmodel,usedphasecount,usedstationcount,magnitudestationcount,minimumdistance,azimuthalgap,originerror,magnitudeuncertainty'


dataList[-1] ## Remember last line is empty

''


myDataAccumulatorList =[]
for data in dataList[1:-2]:
    dataRow = data.split(',')
    myData = [dataRow[4],dataRow[5],dataRow[6]]
    myFloatData = tuple([float(x) for x in myData])
    myDataAccumulatorList.append(myFloatData)


points(myDataAccumulatorList)


showURL('https://en.wikipedia.org/wiki/Borel_set')


%%sh
cd data
### if you don't see final.csv in data/ below
### then either uncomment and try the next line in linux/Mac OSX
#tar -zxvf final.tgz
### or try the next line after uncommenting it to extract final.csv
### unzip final.csv.zip
ls -al

total 10258
drwxr-xr-x  2 user user      18 Sep 16 14:48 .
drwxr-xr-x 20 user user      73 Oct  5 06:49 ..
-rw-r--r--  1 user user   22701 Feb 12  2021 CORIS.csv
-rw-r--r--  1 user user   29002 Sep 16 14:52 NYPowerBall.csv
-rw-r--r--  1 user user   50555 Feb 12  2021 co2_mm_mlo.txt
-rw-r--r--  1 user user  495375 Feb 12  2021 digits.csv
-rw-r--r--  1 user user 4085555 Feb 12  2021 earthquakes.csv
-rw-r--r--  1 user user 1344114 Feb 12  2021 earthquakes.csv.zip
-rw-r--r--  1 user user 1344959 Feb 12  2021 earthquakes.tgz
-rw-r--r--  1 user user   77786 Feb 12  2021 earthquakes_small.csv
-rw-r--r--  1 user user 4894689 Nov  9  2018 final.csv
-rw-r--r--  1 user user  467572 Feb 12  2021 final.csv.zip
-rw-r--r--  1 user user  467611 Feb 12  2021 final.tgz
-rw-r--r--  1 user user 2185909 Feb 12  2021 leukemia.csv
-rw-r--r--  1 user user     730 Feb 12  2021 portland.csv
-rw-r--r--  1 user user  724725 Feb 12  2021 pride_and_prejudice.txt
-rw-r--r--  1 user user  376954 Feb 12  2021 rainfallInChristchurch.csv
-rw-r--r--  1 user user 2483723 Feb 12  2021 ratings.csv


%%sh
wc data/final.csv
head data/final.csv

  82226  461865 4894689 data/final.csv
region,municipality,district,party,votes
Blekinge län,Karlshamn,0 - Centrala Asarum,S,519
Blekinge län,Karlshamn,0 - Centrala Asarum,SD,311
Blekinge län,Karlshamn,0 - Centrala Asarum,M,162
Blekinge län,Karlshamn,0 - Centrala Asarum,V,82
Blekinge län,Karlshamn,0 - Centrala Asarum,KD,53
Blekinge län,Karlshamn,0 - Centrala Asarum,C,37
Blekinge län,Karlshamn,0 - Centrala Asarum,L,37
Blekinge län,Karlshamn,0 - Centrala Asarum,MP,32
Blekinge län,Karlshamn,0 - Centrala Asarum,BLANK,13


import csv, sys

filename = 'data/final.csv'
linesAlreadyRead=0
partyVotesDict={}
with open(filename, 'r') as f:
    reader = csv.reader(f,delimiter=',',quotechar='"',)
    headers = next(reader) ## skip first line of header
    try:
        for row in reader:
            linesAlreadyRead+=1
            party=row[3] ## convert str to unicode
            votes=int(row[4])
            if party in partyVotesDict: ## the data value already exists as a key
                partyVotesDict[party] = partyVotesDict[party] + votes ## add 1 to the count
            else: ## the data value does not exist as a key value
                ## add a new key-value pair for this new data value, frequency 1
                partyVotesDict[party] = votes
    except csv.Error as e:
        sys.exit('file %s, line %d: %s' % (filename, reader.line_num, e))

print("lines read = ", linesAlreadyRead)

lines read =  82225


### fancy printing of non-ASCII string
for kv in partyVotesDict.items():
    print("party ",kv[0], "\thas a total of votes =\t", kv[1])

party  S 	has a total of votes =	 1830386
party  SD 	has a total of votes =	 1135627
party  M 	has a total of votes =	 1284698
party  V 	has a total of votes =	 518454
party  KD 	has a total of votes =	 409478
party  C 	has a total of votes =	 557500
party  L 	has a total of votes =	 355546
party  MP 	has a total of votes =	 285899
party  BLANK 	has a total of votes =	 53084
party  ÖVR 	has a total of votes =	 69472
party  FI 	has a total of votes =	 29665
party  OGEJ 	has a total of votes =	 2120
party  OG 	has a total of votes =	 3342


### let's sort by descending order of votes
for party in sorted(partyVotesDict, key=partyVotesDict.get, reverse=True):
  print(party, "\t", partyVotesDict[party])

S 	 1830386
M 	 1284698
SD 	 1135627
C 	 557500
V 	 518454
KD 	 409478
L 	 355546
MP 	 285899
ÖVR 	 69472
BLANK 	 53084
FI 	 29665
OG 	 3342
OGEJ 	 2120


### To get a dictionary back with the top K=3 most popular parties
top3PartiesDict={} ## make an empty dict
for party in sorted(partyVotesDict, key=partyVotesDict.get, reverse=True):
    top3PartiesDict[party]=partyVotesDict[party]
top3PartiesDict

{'S': 1830386,
 'M': 1284698,
 'SD': 1135627,
 'C': 557500,
 'V': 518454,
 'KD': 409478,
 'L': 355546,
 'MP': 285899,
 'ÖVR': 69472,
 'BLANK': 53084,
 'FI': 29665,
 'OG': 3342,
 'OGEJ': 2120}


import requests
import json

overpass_url = "http://overpass-api.de/api/interpreter"
overpass_query = """
[out:json];
area["ISO3166-1"="DE"][admin_level=2];
(node["amenity"="biergarten"](area);
 way["amenity"="biergarten"](area);
 rel["amenity"="biergarten"](area);
);
out center;
"""
response = requests.get(overpass_url, 
                        params={'data': overpass_query})
data = response.json()


import numpy as np

### Collect coords into list
coords = []
for element in data['elements']:
  if element['type'] == 'node':
    lon = element['lon']
    lat = element['lat']
    coords.append((lon, lat))
  elif 'center' in element:
    lon = element['center']['lon']
    lat = element['center']['lat']
    coords.append((lon, lat))

### Convert coordinates into numpy array
X = np.array(coords)

p = points(zip(X[:, 0], X[:, 1]))
p += text('Biergarten in Germany',(12,56))
p.axes_labels(['Longitude','Latitude'])
#plt.axis('equal')
p.show()


import requests
import json

overpass_url = "http://overpass-api.de/api/interpreter"
overpass_query = """
[out:json];
area["ISO3166-1"="SE"][admin_level=2];
(node["amenity"="pub"](area);
 way["amenity"="pub"](area);
 rel["amenity"="pub"](area);
);
out center;
"""
response = requests.get(overpass_url, \
                        params={'data': overpass_query})
data = response.json()

import numpy as np

### Collect coords into list
coords = []
for element in data['elements']:
  if element['type'] == 'node':
    lon = element['lon']
    lat = element['lat']
    coords.append((lon, lat))
  elif 'center' in element:
    lon = element['center']['lon']
    lat = element['center']['lat']
    coords.append((lon, lat))

### Convert coordinates into numpy array
X = np.array(coords)

p = points(zip(X[:, 0], X[:, 1]))
p += text('Pubar i Sverige',(14,68))
p.axes_labels(['Longitude','Latitude'])
#plt.axis('equal')
p.show()

Introduction to Data Science: A Comp-Math-Stat Approach ¶

1MS041, 2021¶

06. Data and Statistics: New Zealand Earthquakes, 2018 Swedish National Election and Pubs in Open Street Maps of DL & SE¶

Earthquakes¶

Live Data-fetching Exercise Now¶

What should you do now?¶

Let's analyse the measured earth quakes in `data/earthquakes.csv`¶

Exercise:¶

More on Data and Statistics¶

Let's Play Live with other datasets, shall we?¶

Swedish 2018 National Election Data¶

Swedish Election Outcomes 2018¶

Counting total votes per party¶

Geospatial adventures: Pubs in Open Street Maps of DL & SE¶

Pubs in Sweden¶

Introduction to Data Science: A Comp-Math-Stat Approach¶

1MS041, 2021¶

06. Data and Statistics: New Zealand Earthquakes, 2018 Swedish National Election and Pubs in Open Street Maps of DL & SE¶

Earthquakes¶

Live Data-fetching Exercise Now¶

What should you do now?¶

Let's analyse the measured earth quakes in data/earthquakes.csv¶

Exercise:¶

More on Data and Statistics¶

Let's Play Live with other datasets, shall we?¶

Swedish 2018 National Election Data¶

Swedish Election Outcomes 2018¶

Counting total votes per party¶

Geospatial adventures: Pubs in Open Street Maps of DL & SE¶

Pubs in Sweden¶

Introduction to Data Science: A Comp-Math-Stat Approach ¶

Let's analyse the measured earth quakes in `data/earthquakes.csv`¶