Python ITB
Makers Institute
13 February 2018
Since 12 January 2016
First open class: September 2016
You can also view this repository from: https://github.com/arisbw/open-class-makers
Say hello to dat snake. π
print("Hello Python!")
Hello Python!
If you forgot what the simple math looks like...
1 + 1
2
8 - 1
7
10 * 2
20
35 / 5
7.0
5 // 3
1
5.0 // 3.0
1.0
2**4
16
s = "ayam"
len(s)
4
s[0]
'a'
Or, we can do this!
s[-1]
'm'
s[-2]
'a'
We can also slice that string
s[0:3]
'aya'
s[1:]
'yam'
s[:5]
'ayam'
s[:-1]
'aya'
Or add with another string
s + ' ' +'euy'
'ayam euy'
s*4
'ayamayamayamayam'
Find and replace substring
s.find('yam')
1
s.replace('ayam', 'bebek')
'bebek'
line = 'aaa,bbb,ccccc,dd'
line.split(',')
['aaa', 'bbb', 'ccccc', 'dd']
s
'ayam'
s.upper()
'AYAM'
s.isalpha()
True
L = [123, 'spam', 1.23]
L[0]
123
L[:-1]
[123, 'spam']
L + [4, 5, 6]
[123, 'spam', 1.23, 4, 5, 6]
L
[123, 'spam', 1.23]
L.append('hehe')
L
[123, 'spam', 1.23, 'hehe']
L.pop(1)
'spam'
L
[123, 1.23, 'hehe']
D = {'food': 'Spam', 'quantity': 4, 'color': 'pink'}
D['food']
'Spam'
D['quantity']
4
D = {}
D['name'] = 'Bob'# Create keys by assignment
D['job'] = 'dev'
D['age'] = 40
D
{'age': 40, 'job': 'dev', 'name': 'Bob'}
a = 90
if a > 50:
print("Lebih dari 50")
else:
print("Kurang dari 50")
Lebih dari 50
a = 3
"seeep" if a > 2 else 2
'seeep'
Now, what ifs...
a = 400
if a < 100:
print("Kurang dari 100")
elif a >= 100 and a <= 500:
print("Antara 100 dan 500")
else:
print("Lebih dari 500")
Antara 100 dan 500
... say that again?
a = 5
for i in range(a):
if a > 10:
print("Lebih dari 10")
else:
print("Kurang dari 10")
Kurang dari 10 Kurang dari 10 Kurang dari 10 Kurang dari 10 Kurang dari 10
or
a = 0
while a < 5:
print("Gampang euy")
a += 1
Gampang euy Gampang euy Gampang euy Gampang euy Gampang euy
Functions!
def tambah(a=0,b=0):
c = a+b
return c
tambah(2,3)
5
Initialize libraries that we will use later.
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
First, let's download the data we need.
!curl https://raw.githubusercontent.com/fivethirtyeight/data/master/bob-ross/elements-by-episode.csv -O elements-by-episode.csv
% Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0 100 66316 100 66316 0 0 121k 0 --:--:-- --:--:-- --:--:-- 121k 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
Or you can use wget if you use linux.
df = pd.read_csv("elements-by-episode.csv")
Let's peek the data.
df.head()
EPISODE | TITLE | APPLE_FRAME | AURORA_BOREALIS | BARN | BEACH | BOAT | BRIDGE | BUILDING | BUSHES | ... | TOMB_FRAME | TREE | TREES | TRIPLE_FRAME | WATERFALL | WAVES | WINDMILL | WINDOW_FRAME | WINTER | WOOD_FRAMED | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | S01E01 | "A WALK IN THE WOODS" | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | S01E02 | "MT. MCKINLEY" | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
2 | S01E03 | "EBONY SUNSET" | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
3 | S01E04 | "WINTER MIST" | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | S01E05 | "QUIET STREAM" | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows Γ 69 columns
df.columns
Index(['EPISODE', 'TITLE', 'APPLE_FRAME', 'AURORA_BOREALIS', 'BARN', 'BEACH', 'BOAT', 'BRIDGE', 'BUILDING', 'BUSHES', 'CABIN', 'CACTUS', 'CIRCLE_FRAME', 'CIRRUS', 'CLIFF', 'CLOUDS', 'CONIFER', 'CUMULUS', 'DECIDUOUS', 'DIANE_ANDRE', 'DOCK', 'DOUBLE_OVAL_FRAME', 'FARM', 'FENCE', 'FIRE', 'FLORIDA_FRAME', 'FLOWERS', 'FOG', 'FRAMED', 'GRASS', 'GUEST', 'HALF_CIRCLE_FRAME', 'HALF_OVAL_FRAME', 'HILLS', 'LAKE', 'LAKES', 'LIGHTHOUSE', 'MILL', 'MOON', 'MOUNTAIN', 'MOUNTAINS', 'NIGHT', 'OCEAN', 'OVAL_FRAME', 'PALM_TREES', 'PATH', 'PERSON', 'PORTRAIT', 'RECTANGLE_3D_FRAME', 'RECTANGULAR_FRAME', 'RIVER', 'ROCKS', 'SEASHELL_FRAME', 'SNOW', 'SNOWY_MOUNTAIN', 'SPLIT_FRAME', 'STEVE_ROSS', 'STRUCTURE', 'SUN', 'TOMB_FRAME', 'TREE', 'TREES', 'TRIPLE_FRAME', 'WATERFALL', 'WAVES', 'WINDMILL', 'WINDOW_FRAME', 'WINTER', 'WOOD_FRAMED'], dtype='object')
df.shape
(403, 69)
Summary of the data:
df.describe()
APPLE_FRAME | AURORA_BOREALIS | BARN | BEACH | BOAT | BRIDGE | BUILDING | BUSHES | CABIN | CACTUS | ... | TOMB_FRAME | TREE | TREES | TRIPLE_FRAME | WATERFALL | WAVES | WINDMILL | WINDOW_FRAME | WINTER | WOOD_FRAMED | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 403.000000 | 403.000000 | 403.000000 | 403.000000 | 403.000000 | 403.000000 | 403.000000 | 403.000000 | 403.000000 | 403.000000 | ... | 403.000000 | 403.000000 | 403.000000 | 403.000000 | 403.000000 | 403.000000 | 403.000000 | 403.000000 | 403.000000 | 403.000000 |
mean | 0.002481 | 0.004963 | 0.042184 | 0.066998 | 0.004963 | 0.017370 | 0.002481 | 0.297767 | 0.171216 | 0.009926 | ... | 0.002481 | 0.895782 | 0.836228 | 0.002481 | 0.096774 | 0.084367 | 0.002481 | 0.002481 | 0.171216 | 0.002481 |
std | 0.049814 | 0.070359 | 0.201258 | 0.250328 | 0.070359 | 0.130807 | 0.049814 | 0.457845 | 0.377166 | 0.099255 | ... | 0.049814 | 0.305923 | 0.370528 | 0.049814 | 0.296018 | 0.278283 | 0.049814 | 0.049814 | 0.377166 | 0.049814 |
min | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
25% | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
50% | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
75% | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
max | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
8 rows Γ 67 columns
Transpose the data
df.T.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
EPISODE | S01E01 | S01E02 | S01E03 | S01E04 | S01E05 | S01E06 | S01E07 | S01E08 | S01E09 | S01E10 | ... | S31E04 | S31E05 | S31E06 | S31E07 | S31E08 | S31E09 | S31E10 | S31E11 | S31E12 | S31E13 |
TITLE | "A WALK IN THE WOODS" | "MT. MCKINLEY" | "EBONY SUNSET" | "WINTER MIST" | "QUIET STREAM" | "WINTER MOON" | "AUTUMN MOUNTAINS" | "PEACEFUL VALLEY" | "SEASCAPE" | "MOUNTAIN LAKE" | ... | "TRANQUILITY COVE" | "CABIN IN THE HOLLOW" | "VIEW FROM CLEAR CREEK" | "BRIDGE TO AUTUMN" | "TRAIL'S END" | "EVERGREEN VALLEY" | "BALMY BEACH" | "LAKE AT THE RIDGE" | "IN THE MIDST OF WINTER" | "WILDERNESS DAY" |
APPLE_FRAME | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
AURORA_BOREALIS | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
BARN | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
5 rows Γ 403 columns
Sort the data
df.sort_values(by='TREES',ascending=False).head()
EPISODE | TITLE | APPLE_FRAME | AURORA_BOREALIS | BARN | BEACH | BOAT | BRIDGE | BUILDING | BUSHES | ... | TOMB_FRAME | TREE | TREES | TRIPLE_FRAME | WATERFALL | WAVES | WINDMILL | WINDOW_FRAME | WINTER | WOOD_FRAMED | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | S01E01 | "A WALK IN THE WOODS" | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
263 | S21E04 | "SERENITY" | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
261 | S21E02 | "TRANQUIL DAWN" | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
260 | S21E01 | "VALLEY VIEW" | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
259 | S20E13 | "DOUBLE TAKE" | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
5 rows Γ 69 columns
Select and Slice
df.EPISODE[1:5]
1 S01E02 2 S01E03 3 S01E04 4 S01E05 Name: EPISODE, dtype: object
df['EPISODE'][1:5]
1 S01E02 2 S01E03 3 S01E04 4 S01E05 Name: EPISODE, dtype: object
df.loc[0:2]
EPISODE | TITLE | APPLE_FRAME | AURORA_BOREALIS | BARN | BEACH | BOAT | BRIDGE | BUILDING | BUSHES | ... | TOMB_FRAME | TREE | TREES | TRIPLE_FRAME | WATERFALL | WAVES | WINDMILL | WINDOW_FRAME | WINTER | WOOD_FRAMED | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | S01E01 | "A WALK IN THE WOODS" | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | S01E02 | "MT. MCKINLEY" | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
2 | S01E03 | "EBONY SUNSET" | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
3 rows Γ 69 columns
df.loc[100:106,['WATERFALL','TREE']]
WATERFALL | TREE | |
---|---|---|
100 | 0 | 0 |
101 | 0 | 1 |
102 | 0 | 1 |
103 | 0 | 1 |
104 | 0 | 1 |
105 | 0 | 0 |
106 | 0 | 1 |
df[(df.BRIDGE>0) & (df.WINTER>0)]
EPISODE | TITLE | APPLE_FRAME | AURORA_BOREALIS | BARN | BEACH | BOAT | BRIDGE | BUILDING | BUSHES | ... | TOMB_FRAME | TREE | TREES | TRIPLE_FRAME | WATERFALL | WAVES | WINDMILL | WINDOW_FRAME | WINTER | WOOD_FRAMED | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
240 | S19E07 | "COVERED BRIDGE OVAL" | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
1 rows Γ 69 columns
df1 = df.iloc[:,3:6]
df1.head()
AURORA_BOREALIS | BARN | BEACH | |
---|---|---|---|
0 | 0 | 0 | 0 |
1 | 0 | 0 | 0 |
2 | 0 | 0 | 0 |
3 | 0 | 0 | 0 |
4 | 0 | 0 | 0 |
df1.plot.hist()
<matplotlib.axes._subplots.AxesSubplot at 0x1feccad0cc0>
df1.iloc[30:100,:].plot(figsize=(8, 8))
<matplotlib.axes._subplots.AxesSubplot at 0x1fecde85748>
Machine Learning: ... a method of teaching computers to make and improve predictions or behaviors based on some data.
Basically, there are 3*
main task in machine learning:
I mean... why now?
So many algorithms that are really good right now:
DISCLAIMER: This is the oversimplified version
A classification and regression method. In this case, we will explore in the classification problem.
from sklearn import neighbors
import numpy as np
%matplotlib inline
import seaborn
Create dataset:
training_data = pd.DataFrame()
training_data['test_1'] = [0.3051,0.4949,0.6974,0.3769,0.2231,0.341,0.4436,0.5897,0.6308,0.5]
training_data['test_2'] = [0.5846,0.2654,0.2615,0.4538,0.4615,0.8308,0.4962,0.3269,0.5346,0.6731]
training_data['outcome'] = ['win','win','win','win','win','loss','loss','loss','loss','loss']
training_data.head()
test_1 | test_2 | outcome | |
---|---|---|---|
0 | 0.3051 | 0.5846 | win |
1 | 0.4949 | 0.2654 | win |
2 | 0.6974 | 0.2615 | win |
3 | 0.3769 | 0.4538 | win |
4 | 0.2231 | 0.4615 | win |
Plot the data
seaborn.lmplot('test_1', 'test_2', data=training_data, fit_reg=False,hue="outcome", scatter_kws={"marker": "D","s": 100})
<seaborn.axisgrid.FacetGrid at 0x1fece93a748>
Convert data to np.arrays
X = training_data.as_matrix(columns=['test_1', 'test_2'])
y = np.array(training_data['outcome'])
Training!
clf = neighbors.KNeighborsClassifier(3, weights = 'uniform')
trained_model = clf.fit(X, y)
drum rolls π₯
trained_model.score(X, y)
0.80000000000000004
Check the model result to new data point
x_test = np.array([[.4,.6]])
trained_model.predict(x_test)
array(['loss'], dtype=object)
Check result
trained_model.predict_proba(x_test)
array([[ 0.66666667, 0.33333333]])
Load libraries
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import GridSearchCV
Load Iris Flower Data
iris = datasets.load_iris()
X = iris.data
y = iris.target
Standardize Data
standardizer = StandardScaler()
X_std = standardizer.fit_transform(X)
Create knn model
knn = KNeighborsClassifier(n_neighbors=5, metric='euclidean', n_jobs=-1).fit(X_std, y)
Create search space of possible value of k
pipe = Pipeline([('standardizer', standardizer), ('knn', knn)])
search_space = [{'knn__n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}]
Search!
clf = GridSearchCV(pipe, search_space, cv=5, verbose=0).fit(X_std, y)
Show best k
clf.best_estimator_.get_params()['knn__n_neighbors']
6
%mkdir image\paintings
%cd image\paintings
D:\Google Drive\Python ITB\Kelas Makers\image\paintings
#Credit to: Jeff Thompson (https://gist.github.com/jeffThompson/6d4c45f89dc907925775972e72d9cf00)
num_images=403
import urllib.request
from tqdm import *
for i in tqdm(range(1, num_images+1)):
try:
url = 'http://www.twoinchbrush.com/images/painting' + str(i) + '.png'
filename = ('%03d' % (i,)) + '.png'
urllib.request.urlretrieve(url, filename)
except:
print('- ERROR!')
100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 403/403 [14:12<00:00, 2.37s/it]