The Manga Guide to Machine Learning

From Hidden Wiki
Jump to navigation Jump to search
Mathematics Animal intelligence Biological neural network Web development Security
Statistics Animal cognition Neural circuit Darknet web development Security
Messenger AI ANN VPS Cryptocurrency
Session Artificial intelligence Artificial neural network Virtual private server Cryptocurrency wallet

The Manga Guide to Machine Learning (マンガでわかる機械学習) is a manga guide that explains about machine learning which is one of the artificial intelligence field, including the algorithm and application in real life.


It's one of The Manga Guides.

Source codes

1

from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression

boston = load_boston()
X = boston.data
y = boston.target

lr1 = LinearRegression()
lr1.fit(X, y)

print("Linear Regression")
for f, w in zip(boston.feature_names, lr1.coef_):
    print("{0:7s}: {1:6.2f}". format(f, w))
print("coef = {0:4.2f}".format(sum(lr1.coef_**2)))


2

from sklearn.datasets import load_boston
from sklearn.linear_model import Ridge

boston = load_boston()
X = boston.data
y = boston.target

lr2 = Ridge(alpha=10.0)
lr2.fit(X, y)

print("Ridge")
for f, w in zip(boston.feature_names, lr2.coef_):
    print("{0:7s}: {1:6.2f}". format(f, w))
print("coef = {0:4.2f}".format(sum(lr2.coef_**2)))


3

from sklearn.datasets import load_boston
from sklearn.linear_model import Lasso

boston = load_boston()
X = boston.data
y = boston.target

lr3 = Lasso(alpha=2.0)
lr3.fit(X, y)

print("Lasso")
for f, w in zip(boston.feature_names, lr3.coef_):
    print("{0:7s}: {1:6.2f}". format(f, w))
print("coef = {0:4.2f}".format(sum(lr3.coef_**2)))


4

from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression

breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

clf1 = LogisticRegression()
clf1.fit(X,y)

for f, w in zip(breast_cancer.feature_names, clf1.coef_[0]):
    print("{0:<23}: {1:6.2f}". format(f, w))


5

from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import graphviz
import os

#tell Python where the excute file of Graphviz
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin'

#load the sample data (breast cancer data set)
cancer = load_breast_cancer()

#shuffle training and test data
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, stratify = cancer.target, random_state=42)

#define the decision tree
dTreeAll = DecisionTreeClassifier(random_state=0)

# training (using all leaf nodes)
dTreeAll.fit(X_train, y_train)

#print scores
print("training set preciseness (before): {:.2f}".format(dTreeAll.score(X_train, y_train)))
print("test set preciseness (before): {:.2f}".format(dTreeAll.score(X_test, y_test)))

#define the decision tree (limit the tree depth)
dTreeLimit = DecisionTreeClassifier(max_depth=3, random_state=0)

#training (pruning: limit the leaf node depth)
dTreeLimit.fit(X_train, y_train)

#print scores
print("training set preciseness (after): {:.2f}".format(dTreeLimit.score(X_train, y_train)))
print("test set preciseness (after): {:.2f}".format(dTreeLimit.score(X_test, y_test)))

# save as a dot file
export_graphviz(dTreeLimit, out_file="decisionTree1.dot", feature_names=cancer.feature_names, class_names=["Malignant", "Benign"], filled=True, impurity=False)

#convert a dot file into a png
with open("decisionTree1.dot", encoding='UTF-8') as f:
    dot_graph = f.read()
dot=graphviz.Source(dot_graph); dot.format='png'; dot.render(filename='decisionTree1')


6

import keras
from keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

img_rows, img_cols = 28, 28

X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)

X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255

from keras.utils import to_categorical
Y_train = to_categorical(y_train)
Y_test = to_categorical(y_test)

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

n_out = len(Y_train[0]) # 10

model = Sequential()
model.add(Conv2D(16, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(n_out, activation='softmax'))
model.summary()

model.compile(loss = 'categorical_crossentropy', optimizer = 'rmsprop', metrics = ['accuracy'])
model.fit(X_train, Y_train, epochs=5, batch_size=200)
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


7

# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)

import pandas as pd

# list for column headers
ch = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
# open file with pd.read_csv
dataframe = pd.read_csv("https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv", names=ch)
print("DataFrame Shape: ", dataframe.shape)
# print head of data set
print("DataFrame Head")
print(dataframe.head())

X = dataframe.drop('class', axis=1)
y = dataframe['class']

from sklearn.model_selection import train_test_split
# implementing train-test-split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=66)

from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
# random forest model creation
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
# predictions
rfc_predict = rfc.predict(X_test)

from sklearn.model_selection import cross_val_score

clf1 = BaggingClassifier()
scores = cross_val_score(clf1, X, y, cv=10)
print("{0:4.2f} +/- {1:4.2f} %".format(scores.mean() * 100, scores.std() * 100))

clf2 = RandomForestClassifier()
scores = cross_val_score(clf2, X, y, cv=10)
print("{0:4.2f} +/- {1:4.2f} %".format(scores.mean() * 100, scores.std() * 100))

clf3 = AdaBoostClassifier()
scores = cross_val_score(clf3, X, y, cv=10)
print("{0:4.2f} +/- {1:4.2f} %".format(scores.mean() * 100, scores.std() * 100))

clf4 = GradientBoostingClassifier()
scores = cross_val_score(clf4, X, y, cv=10)
print("{0:4.2f} +/- {1:4.2f} %".format(scores.mean() * 100, scores.std() * 100))

See also