visit
The task is to classify a given image into one of the 10 digits.
I’m doing it all in Python.
Let's get started.
import itertools
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
images = x_train[0:1000]
labels = y_train[0:1000]
def flatten_image(image):
return list(itertools.chain.from_iterable(image))
def weighted_sum(a, b):
assert(len(a) == len(b))
output = 0
for i in range(len(a)):
output += (a[i] * b[i])
return output
def vector_matrix_multiplication(a, b):
output = [0 for i in range(10)]
for i in range(len(output)):
assert(len(a) == len(b[i]))
output[i] = weighted_sum(a, b[i])
return output
def zeros_matrix(rows, cols):
output = []
for r in range(rows):
output.append([0 for col in range(cols)])
return output
def outer_product(a, b):
output = zeros_matrix(len(a), len(b))
for i in range(len(a)):
for j in range(len(b)):
output[i][j] = a[i] * b[j]
return output
class NeuralNet:
def __init__(self):
self.weights = [
[0.0000 for i in range(784)],
[0.0001 for i in range(784)],
[0.0002 for i in range(784)],
[0.0003 for i in range(784)],
[0.0004 for i in range(784)],
[0.0005 for i in range(784)],
[0.0006 for i in range(784)],
[0.0007 for i in range(784)],
[0.0008 for i in range(784)],
[0.0009 for i in range(784)]
]
self.alpha = 0.0000001
def predict(self, input):
return vector_matrix_multiplication(input, self.weights)
def train(self, input, labels, epochs):
for i in range(epochs):
for j in range(len(input)):
pred = self.predict(input[j])
label = labels[j]
goal = [0 for k in range(10)]
goal[label] = 1
error = [0 for k in range(10)]
delta = [0 for k in range(10)]
for a in range(len(goal)):
delta[a] = pred[a] - goal[a]
error[a] = delta[a] ** 2
weight_deltas = outer_product(delta, input[j])
for x in range(len(self.weights)):
for y in range(len(self.weights[0])):
self.weights[x][y] -= (self.alpha * weight_deltas[x][y])
# Train on first image
first_image = images[0]
first_label = labels[0]
input = [flatten_image(first_image)]
label = [first_label]
nn = NeuralNet()
nn.train(input, label, 5)
prediction = nn.predict(input[0])
print(prediction)
print("The label is: " + str(label[0]) + ". The prediction is: " + str(prediction.index(max(prediction))))
# Train on full dataset
prepared_images = [flatten_image(image) for image in images]
mm = NeuralNet()
mm.train(prepared_images, labels, 45)
# Test 1 prediction
prediction = mm.predict(prepared_images[3])
print("That image is the number " + str(prediction.index(max(prediction))))
# Calculate accuracy
test_set = x_test[0:100]
test_labels = y_test[0:100]
num_correct = 0
for i in range(len(test_set)):
prediction = mm.predict(flatten_image(test_set[i]))
correct = test_labels[i]
if prediction.index(max(prediction)) == int(correct):
num_correct += 1
print(str(num_correct/len(test_set) * 100) + "%")
The
keras
library helpfully includes the dataset so I can import it from the library.from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
images = x_train[0:1000]
labels = y_train[0:1000]
When I call
load_data()
, I get back two tuples: a training set and a test set. To successfully finishing training on my personal laptop, I had to limit the data to the first 1000 elements. When I tried training on the full data set, it was hadn't finished after a full 24 hours and I had to kill the process to use my laptop :D.With only 1000 images, the best accuracy I achieved was about 75%. Maybe you can tweak the numbers and get something better!Getting back to the data, if I take a look at one of the images in the training set, I see that it is an array of arrays - a matrix. The numbers range from 0 to 255 - each representing the greyscale value of the pixel at a particular position in the image.
images[0]
# Output
array([[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
18, 18, 18, 126, 136, 175, 26, 166, 255, 247, 127, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 30, 36, 94, 154, 170,
253, 253, 253, 253, 253, 225, 172, 253, 242, 195, 64, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 49, 238, 253, 253, 253, 253,
253, 253, 253, 253, 251, 93, 82, 82, 56, 39, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 18, 219, 253, 253, 253, 253,
253, 198, 182, 247, 241, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 80, 156, 107, 253, 253,
205, 11, 0, 43, 154, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 1, 154, 253,
90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 139, 253,
190, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 190,
253, 70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35,
241, 225, 160, 108, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81, 240, 253, 253, 119, 25, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 45, 186, 253, 253, 150, 27, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 16, 93, 252, 253, 187, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 249, 253, 249, 64, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 46, 130, 183, 253, 253, 207, 2, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39,
148, 229, 253, 253, 253, 250, 182, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 114, 221,
253, 253, 253, 253, 201, 78, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 23, 66, 213, 253, 253,
253, 253, 198, 81, 2, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 18, 171, 219, 253, 253, 253, 253,
195, 80, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 55, 172, 226, 253, 253, 253, 253, 244, 133,
11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 136, 253, 253, 253, 212, 135, 132, 16, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0]], dtype=uint8)
labels[0]
# Output
5
import itertools
def flatten_image(image):
return list(itertools.chain.from_iterable(image))
What I'm doing in this function is using the
itertools
library to flatten the array. Specifically, I'm using the .chain.from_iterable()
method to give me one element at a time. Then I use the `list()` function to create a flat list to return. When I print the first image, I see that all the numbers are in one flat array.print(flatten_image(images[0]))
# Output
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 18, 18, 18, 126, 136, 175, 26, 166, 255, 247, 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 36, 94, 154, 170, 253, 253, 253, 253, 253, 225, 172, 253, 242, 195, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 238, 253, 253, 253, 253, 253, 253, 253, 253, 251, 93, 82, 82, 56, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 219, 253, 253, 253, 253, 253, 198, 182, 247, 241, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 156, 107, 253, 253, 205, 11, 0, 43, 154, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 1, 154, 253, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 139, 253, 190, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 190, 253, 70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 241, 225, 160, 108, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 240, 253, 253, 119, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 186, 253, 253, 150, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 93, 252, 253, 187, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249, 253, 249, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 130, 183, 253, 253, 207, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 148, 229, 253, 253, 253, 250, 182, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 114, 221, 253, 253, 253, 253, 201, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 66, 213, 253, 253, 253, 253, 198, 81, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 171, 219, 253, 253, 253, 253, 195, 80, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 172, 226, 253, 253, 253, 253, 244, 133, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136, 253, 253, 253, 212, 135, 132, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
def weighted_sum(a, b):
assert(len(a) == len(b))
output = 0
for i in range(len(a)):
output += (a[i] * b[i])
return output
The best way to think about what this single number represents is as a score of similarity between two arrays. The higher the weighted sum, the more similar arrays
a
and b
are to each other. Roughly speaking, the neural network will give higher scores to inputs that are more similar to its weights.def vector_matrix_multiplication(a, b):
output = [0 for i in range(10)]
for i in range(len(output)):
assert(len(a) == len(b[i]))
output[i] = weighted_sum(a, b[i])
return output
def zeros_matrix(rows, cols):
output = []
for r in range(rows):
output.append([0 for col in range(cols)])
return output
def outer_product(a, b):
output = zeros_matrix(len(a), len(b))
for i in range(len(a)):
for j in range(len(b)):
output[i][j] = a[i] * b[j]
return output
class NeuralNet:
def __init__(self):
self.weights = [
[0.0000 for i in range(784)],
[0.0001 for i in range(784)],
[0.0002 for i in range(784)],
[0.0003 for i in range(784)],
[0.0004 for i in range(784)],
[0.0005 for i in range(784)],
[0.0006 for i in range(784)],
[0.0007 for i in range(784)],
[0.0008 for i in range(784)],
[0.0009 for i in range(784)]
]
self.alpha = 0.0000001
def predict(self, input):
return vector_matrix_multiplication(input, self.weights)
def train(self, input, labels, epochs):
for i in range(epochs):
for j in range(len(input)):
pred = self.predict(input[j])
label = labels[j]
goal = [0 for k in range(10)]
goal[label] = 1
error = [0 for k in range(10)]
delta = [0 for k in range(10)]
for a in range(len(goal)):
delta[a] = pred[a] - goal[a]
error[a] = delta[a] ** 2
weight_deltas = outer_product(delta, input[j])
for x in range(len(self.weights)):
for y in range(len(self.weights[0])):
self.weights[x][y] -= (self.alpha * weight_deltas[x][y])
In the initializer, I have the weights and the alpha. I've initialized each weight array to have
784
elements of an initial number. 784
is the number of pixels in the image.def __init__(self):
self.weights = [
[0.0000 for i in range(784)],
[0.0001 for i in range(784)],
[0.0002 for i in range(784)],
[0.0003 for i in range(784)],
[0.0004 for i in range(784)],
[0.0005 for i in range(784)],
[0.0006 for i in range(784)],
[0.0007 for i in range(784)],
[0.0008 for i in range(784)],
[0.0009 for i in range(784)]
]
self.alpha = 0.0000001
def predict(self, input):
return vector_matrix_multiplication(input, self.weights)
The training function iterates through the dataset an
epoch
number of times.for i in range(epochs):
for j in range(len(input)):
pred = self.predict(input[j])
label = labels[j]
goal = [0 for k in range(10)]
goal[label] = 1
error = [0 for k in range(10)]
delta = [0 for k in range(10)]
for a in range(len(goal)):
delta[a] = pred[a] - goal[a]
error[a] = delta[a] ** 2
weight_deltas = outer_product(delta, input[j])
for x in range(len(self.weights)):
for y in range(len(self.weights[0])):
self.weights[x][y] -= (self.alpha * weight_deltas[x][y])
first_image = images[0]
first_label = labels[0]
input = [flatten_image(first_image)]
label = [first_label]
nn = NeuralNet()
nn.train(input, label, 5)
prediction = nn.predict(input[0])
print(prediction)
print("The label is: " + str(label[0]) + ". The prediction is: " + str(prediction.index(max(prediction))))
# Output
[0.0, 0.03036370905054081, 0.06072741810108162, 0.092263, 0.216324, 1.00253, 0.324525, 0.278556, 0.24290967240432648, 0.2732733814548679]
The label is: 5. The prediction is: 5
The number in index five is the greatest, so the network correctly identified the handwritten number of the number
5
.It works on one data point but what about the entire data set?Let's do that next.
I run it for 5 epochs. Through trial and error I found that 5 epochs gives me the highest accuracy of just under 75%.When it's finished, I test the network by making a prediction on a random image. It correctly identified the image.
prepared_images = [flatten_image(image) for image in images]
mm = NeuralNet()
mm.train(prepared_images, labels, 5)
prediction = mm.predict(prepared_images[3])
print("That image is the number " + str(prediction.index(max(prediction))))
# Output
That image is the number 1
labels[3]
# Output
1
test_set = x_test
test_labels = y_test
num_correct = 0
for i in range(len(test_set)):
prediction = mm.predict(flatten_image(test_set[i]))
correct = test_labels[i]
if prediction.index(max(prediction)) == int(correct):
num_correct += 1
print(str(num_correct/len(test_set) * 100) + "%")
# Output
74.47%
Previously published .