visit
Pre-requisites:
Tensorflow >= 1.15.0Install the latest version by executing pip install tensorflowWe are now good to go!
Step 1. Download or clone the TensorFlow Object Detection Code into your local machine from Github
Execute the following command in the terminal :git clone //github.com/tensorflow/models.git
Step 2. Installing the dependencies
The next step is to make sure that we have all the libraries and modules that we need to run the object detector on our machine.Here is a list of libraries that the project depends on. (Most of the dependencies comes with Tensorflow by default)Step 3. Installing Protobuf compiler
Protobuf or Protocol buffers are Google’s language-neutral, platform-neutral, extensible mechanism for serializing structured data. It helps us define how we want our data to be structured and once structured it lets us easily write and read the structured data to and from a variety of data streams and using a variety of languages.This is also a dependency for this project. You can learn more about Protobufs . For now, we will install Protobuf in our machine.Head to Choose the appropriate version for your OS and copy the download link.Open your terminal or command prompt, change directory to the cloned repository and execute the following commands in your terminal.cd models/research \
wget -O protobuf.zip //github.com/protocolbuffers/protobuf/releases/download/v3.9.1/protoc-3.9.1-osx-x86_64.zip \
unzip protobuf.zip
Note: Make sure that you decompress the protobuf.zip file inside models/research directory
Step 4. Compiling the Protobuf compiler
Execute the following command from the research/ directory to compile the Protocol Buffer../bin/protoc object_detection/protos/*.proto --python_out=.
Implement Object Detection in Python
Now that we have all the dependencies installed, let’s use Python to implement Object Detection.In the downloaded repository, change directory to
models/research/object_detection
. In this directory, you will find an ipython notebook named object_detection_tutorial.ipynb. This file is a demo for Object detection which on execution will use the specified ‘ssd_mobilenet_v1_coco_2017_11_17
’ model to classify two test images provided in the repository.Given below is one of the test outputs:In [1]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from utils import ops as utils_ops
if StrictVersion(tf.__version__) < StrictVersion('1.12.0'):
raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')
In [2]:
# This is needed to display the images.
get_ipython().run_line_magic('matplotlib', 'inline')
In [3]:
# Object detection imports
# Here are the imports from the object detection module.
from utils import label_map_util
from utils import visualization_utils as vis_util
In [4]:
# Model preparation
# Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_FROZEN_GRAPH` to point to a new .pb file.
# By default we use an "SSD with Mobilenet" model here.
#See //github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
#for a list of other models that can be run out-of-the-box with varying speeds and accuracies.
# What model to download.
MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = '//download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
In [5]:
#Download Model
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
file_name = os.path.basename(file.name)
if 'frozen_inference_graph.pb' in file_name:
tar_file.extract(file, os.getcwd())
In [6]:
# Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
In [7]:
# Loading label map
# Label maps map indices to category names, so that when our convolution network predicts `5`,
#we know that this corresponds to `airplane`. Here we use internal utility functions,
#but anything that returns a dictionary mapping integers to appropriate string labels would be fine
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
In [8]:
def run_inference_for_single_image(image, graph):
with graph.as_default():
with tf.Session() as sess:
# Get handles to input and output tensors
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks']:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(tensor_name)
if 'detection_masks' in tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image.shape[1], image.shape[2])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# Follow the convention by adding back the batch dimension
tensor_dict['detection_masks'] = tf.expand_dims(
detection_masks_reframed, 0)
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
# Run inference
output_dict = sess.run(tensor_dict, feed_dict={image_tensor: image})
# all outputs are float32 numpy arrays, so convert types as appropriate
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
'detection_classes'][0].astype(np.int64)
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict
In [8]:
import cv2
cam = cv2.cv2.VideoCapture(0)
rolling = True
while (rolling):
ret, image_np = cam.read()
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
output_dict = run_inference_for_single_image(image_np_expanded, detection_graph)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks'),
use_normalized_coordinates=True,
line_thickness=8)
cv2.imshow('image', cv2.resize(image_np,(1000,800)))
if cv2.waitKey(25) & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
cam.release()
For more Project Ideas refer . Happy Learning!!