In this article, I will use Keras and Tensorflow to build a CNN architecture for image classification.

The dataset is available for download on ICU Machine Learning Repository and our purpose is to classify eye state as wearing SUNGLASSES or just OPEN. Classification on the head position or facial expression can be done as well using the same structure but they require more sampling for a better accuracy as the number of labels is higher.

I will use Python Library “netpbmfile” to read the PGM images and convert them into numpy array. Also, since it is a grayscale image, we need to add channels_last column as we intend to process them on Convolutional network

pip install netpbmfile
!tar xvf faces.tar  >> /dev/null
import netpbmfile
import pathlib
from sklearn import preprocessing

data_dir = pathlib.Path("faces")
images_open = list(data_dir.glob('*_open_2.pgm'))
images_sunglasses = list(data_dir.glob('*_sunglasses_2.pgm'))

images_str = np.concatenate((images_open, images_sunglasses), axis=0)
y_open = np.full((len(images_open), 1), 'open')
y_sunglasses = np.full((len(images_sunglasses), 1), 'sunglasses')

y = np.concatenate((y_open, y_sunglasses), axis=0)

CLASS_NAMES = ['open', 'sunglasses']
encoder = preprocessing.LabelEncoder()
encoder.fit(CLASS_NAMES)
labels = encoder.transform(np.array(y))

X = []
for i in range(len(images_str)):
    image_arr = netpbmfile.imread(str(images_str[i]))
    # Add channels_last
    image_arr = np.expand_dims(image_arr, axis=2)
    image_arr = image_arr.astype(np.float32)
    X.append(image_arr)
    
features = np.asarray(X)

Now, we define the input function for training and evaluation:

from sklearn.model_selection import train_test_split
xTrain, xTest, yTrain, yTest = train_test_split(features, labels, test_size = 0.1, random_state = 0)

BATCH_SIZE = 32
REPEAT_SIZE = 100

def train_input_fn():
    return tf.data.Dataset.from_tensor_slices((xTrain, yTrain)).repeat(REPEAT_SIZE).batch(BATCH_SIZE)

def test_input_fn():
    return tf.data.Dataset.from_tensor_slices((xTest, yTest)).repeat(REPEAT_SIZE).batch(BATCH_SIZE)

Next, the most exciting part is to define the structure of the network. A very good article about the layering that we may need to look at is https://arxiv.org/pdf/1409.1556/ :

from tensorflow.keras import layers

import tensorflow.compat.v1 as tf1
tf1.disable_v2_behavior()

def cnn_model_fn(features, labels, mode):    
    """Model function for CNN"""    
    
    layer = layers.Conv2D(32, 3, padding='same', activation='relu')(features)
    layer = layers.BatchNormalization()(layer)
    layer = layers.Conv2D(32, 3, padding='same', activation='relu')(layer)
    layer = layers.BatchNormalization()(layer)
    layer = layers.MaxPooling2D(pool_size=(2,2))(layer)
    layer = layers.Dropout(0.25)(layer)

    layer = layers.Conv2D(64, 3, padding='same', activation='relu')(layer)
    layer = layers.BatchNormalization()(layer)
    layer = layers.Conv2D(64, 3, padding='same', activation='relu')(layer)
    layer = layers.BatchNormalization()(layer)
    layer = layers.MaxPooling2D(pool_size=(2,2))(layer)
    layer = layers.Dropout(0.25)(layer)

    layer = layers.Conv2D(128, 3, padding='same', activation='relu')(layer)
    layer = layers.BatchNormalization()(layer)
    layer = layers.Conv2D(128, 3, padding='same', activation='relu')(layer)
    layer = layers.BatchNormalization()(layer)
    layer = layers.Conv2D(128, 3, padding='same', activation='relu')(layer)
    layer = layers.BatchNormalization()(layer)
    layer = layers.MaxPooling2D(pool_size=(2,2))(layer)
    layer = layers.Dropout(0.25)(layer)
    
    #Fully connected layer    
    flatten = layers.Flatten()(layer)                    
    dense1 = layers.Dense(flatten.get_shape()[1], activation='relu')(flatten)
    batch2 = layers.BatchNormalization()(dense1)
    drop2 = layers.Dropout(0.25)(batch2)    
    
    
    # Logits Layer
    logits = tf.keras.layers.Dense(units=len(CLASS_NAMES))(drop2)  
    
    predictions = {
      # Generate predictions (for PREDICT and EVAL mode)
      "classes": tf.argmax(input=logits, axis=1),
      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
    }
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
    
    # Calculate Loss   
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)    
    loss = tf.reduce_mean(loss)
    
    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf1.train.GradientDescentOptimizer(learning_rate=0.0005)
        train_op = optimizer.minimize(
            loss=loss,
            global_step=tf1.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
    
    # Add evaluation metrics Evaluation mode
    eval_metric_ops = {
        "accuracy": tf1.metrics.accuracy(
            labels=labels, predictions=predictions["classes"])}
    
    return tf.estimator.EstimatorSpec(
        mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

We can create now the estimator for the classification:

# Create the Estimator
import tempfile
model_dir = tempfile.mkdtemp()
classifier = tf.estimator.Estimator(
    model_fn=cnn_model_fn, model_dir=model_dir)

To monitor the learning is heading to the right direction, let’s monitor the execution by hooking a log tensor:

# Set up logging for predictions
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.estimator.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=100)

Now, we can proceed to the training:

classifier.train(
    input_fn=train_input_fn,
    steps=5000,
    hooks=[logging_hook])

Time to evaluate:

eval_results = classifier.evaluate(input_fn=test_input_fn)
print(eval_results)

Overwhelming accuracy for an image classification with small dataset:

{'accuracy': 0.95238096, 'loss': 0.07515864, 'global_step': 1754}

Jupyter file is avaialble on github.

Your comment is just welcome!