AI pipeline from disk

This notebook contains a Keras pipeline that takes chips stored in individual folders per class and train a model on them.

import os
import glob
import shutil
from pathlib import Path
import random
import numpy
import tensorflow as tf

from model_builder import model_builder, relabel, class_merger, balancer
import tools_keras
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import resnet_v2, vgg19, efficientnet

random.seed(42)
numpy.random.seed(42)
tf.random.set_seed(42)
specs = {
    'chips': "../../chips_gb/32_shuffled/",
    'chips_combined': "../../chips_gb/32_shuffled_combined_12_named/",
    'chips_balanced': "../../chips_gb/32_shuffled_balanced_12_named/",
    'folder': "../../ai/gb_32_shuffled/",
}

Combine groups

group_mapping = [
    ['9_0', '9_1', '9_2', '9_4', '9_5'],
    ['2_0'], 
    ['2_1'], 
    ['2_2'],
    ['1_0'], 
    ['3_0'], 
    ['5_0'], 
    ['6_0'], 
    ['8_0'],
    ['0_0'],
    ['4_0'],
    ['7_0']
]

group_naming = [
    "Urbanity", 
    "Dense residential neighbourhoods",
    "Connected residential neighbourhoods",
    "Dense urban neighbourhoods",
    "Accessible suburbia",
    "Open sprawl",
    "Warehouse_Park land",
    "Gridded residential quarters",
    "Disconnected suburbia",
    "Countryside agriculture", 
    "Wild countryside", 
    "Urban buffer"
]
for subset in ["train", "validation", "secret"]:
    os.makedirs(specs['chips_combined'] + subset, exist_ok=True)
    
    for name, group in zip(group_naming, group_mapping):
        os.makedirs(specs['chips_combined'] + subset + "/" + name.replace(" ","_").lower(), exist_ok=True)
        
        for g in group:
            files = glob.glob(f"{specs['chips']}{subset}/{g}/*.tif")
            for f in files:
                f = Path(f)
                shutil.copy(f, specs['chips_combined'] + subset + "/" + name.replace(" ","_").lower() + "/" + f.name)

Balance groups

for subset in ["train", "validation", "secret"]:
    total = 35000 if subset == "train" else 5000
    os.makedirs(specs['chips_balanced'] + subset, exist_ok=True)
    
    for folder in glob.glob(specs["chips_combined"] + f"{subset}/*"):
        os.makedirs(specs['chips_balanced'] + subset + "/" + Path(folder).name, exist_ok=True)
        files = glob.glob(folder + "/*")
        random.shuffle(files)
        for f in files[:total]:
            f = Path(f)
            shutil.copy(f, specs['chips_balanced'] + subset + "/" + Path(folder).name + "/" + f.name)
model_specs = {
    'meta_class_map': group_mapping,
    'meta_class_names': group_naming,
    'meta_chip_size': 32,
}
model = model_builder(
    model_name="efficientnet", 
    bridge="pooling", 
    top_layer_neurons=256,
    n_labels=12,
    input_shape=(224, 224, 3),
)
2022-05-09 10:43:11.087950: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6391 MB memory:  -> device: 0, name: Quadro RTX 4000, pci bus id: 0000:21:00.0, compute capability: 7.5

Define class weigths as an inverse of the count.

train_datagen = tf.keras.preprocessing.image.ImageDataGenerator()
train_generator = train_datagen.flow_from_directory(
    specs['chips_balanced'] + 'train',
    target_size=(224, 224),
    class_mode='sparse')
Found 262937 images belonging to 12 classes.
un, c = numpy.unique(train_generator.classes, return_counts=True)
class_weights = dict(zip(un, c.max() / c))
h = tools_keras.fit_phase(
        model,
        specs['chips_balanced'] + 'train',
        specs['chips_balanced'] + 'validation',
        specs['chips_balanced'] + 'secret',
        log_folder=specs["folder"] + "logs",
        pred_folder=specs["folder"] + "pred",
        model_folder=specs["folder"] + "model",
        json_folder=specs["folder"] + "json",
        specs=model_specs,
        epochs=200,
        patience=5,
        batch_size=32,
        verbose=True,
        class_weight=class_weights,
    )
Model: "efficientnet_pooling_256_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 resizing (Resizing)         (None, 224, 224, 3)       0         
                                                                 
 efficientnetb4 (Functional)  (None, 7, 7, 1792)       17673823  
                                                                 
 global_average_pooling2d (G  (None, 1792)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dense (Dense)               (None, 256)               459008    
                                                                 
 dense_1 (Dense)             (None, 12)                3084      
                                                                 
=================================================================
Total params: 18,135,915
Trainable params: 462,092
Non-trainable params: 17,673,823
_________________________________________________________________
None
creating ImageDataGenerators...
Found 262937 images belonging to 12 classes.
Found 42251 images belonging to 12 classes.
training...
/usr/local/lib/python3.8/dist-packages/keras/engine/functional.py:1410: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.
  layer_config = serialize_layer_fn(layer)
Epoch 1/200
2022-05-09 10:43:38.630974: I tensorflow/stream_executor/cuda/cuda_dnn.cc:377] Loaded cuDNN version 8302
8217/8217 [==============================] - ETA: 0s - loss: 2.5337 - accuracy: 0.3838
2022-05-09 11:01:28.444617: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
INFO:tensorflow:Assets written to: ../../ai/gb_32_shuffled/model/efficientnet_pooling_256_12_best/assets
/usr/local/lib/python3.8/dist-packages/keras/engine/functional.py:1410: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.
  layer_config = serialize_layer_fn(layer)
/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:112: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.
  return generic_utils.serialize_keras_object(obj)
8217/8217 [==============================] - 1110s 134ms/step - loss: 2.5337 - accuracy: 0.3838 - val_loss: 1.6744 - val_accuracy: 0.3807
Epoch 2/200
8217/8217 [==============================] - ETA: 0s - loss: 2.2021 - accuracy: 0.4353INFO:tensorflow:Assets written to: ../../ai/gb_32_shuffled/model/efficientnet_pooling_256_12_best/assets
/usr/local/lib/python3.8/dist-packages/keras/engine/functional.py:1410: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.
  layer_config = serialize_layer_fn(layer)
/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:112: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.
  return generic_utils.serialize_keras_object(obj)
8217/8217 [==============================] - 1106s 135ms/step - loss: 2.2021 - accuracy: 0.4353 - val_loss: 1.6776 - val_accuracy: 0.3881
Epoch 3/200
8217/8217 [==============================] - ETA: 0s - loss: 2.0392 - accuracy: 0.4612INFO:tensorflow:Assets written to: ../../ai/gb_32_shuffled/model/efficientnet_pooling_256_12_best/assets
/usr/local/lib/python3.8/dist-packages/keras/engine/functional.py:1410: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.
  layer_config = serialize_layer_fn(layer)
/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:112: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.
  return generic_utils.serialize_keras_object(obj)
8217/8217 [==============================] - 1107s 135ms/step - loss: 2.0392 - accuracy: 0.4612 - val_loss: 1.6744 - val_accuracy: 0.3967
Epoch 4/200
8217/8217 [==============================] - ETA: 0s - loss: 1.9295 - accuracy: 0.4762INFO:tensorflow:Assets written to: ../../ai/gb_32_shuffled/model/efficientnet_pooling_256_12_best/assets
/usr/local/lib/python3.8/dist-packages/keras/engine/functional.py:1410: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.
  layer_config = serialize_layer_fn(layer)
/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:112: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.
  return generic_utils.serialize_keras_object(obj)
8217/8217 [==============================] - 1109s 135ms/step - loss: 1.9295 - accuracy: 0.4762 - val_loss: 1.6667 - val_accuracy: 0.4117
Epoch 5/200
8217/8217 [==============================] - ETA: 0s - loss: 1.8435 - accuracy: 0.4902INFO:tensorflow:Assets written to: ../../ai/gb_32_shuffled/model/efficientnet_pooling_256_12_best/assets
/usr/local/lib/python3.8/dist-packages/keras/engine/functional.py:1410: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.
  layer_config = serialize_layer_fn(layer)
/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:112: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.
  return generic_utils.serialize_keras_object(obj)
8217/8217 [==============================] - 1108s 135ms/step - loss: 1.8435 - accuracy: 0.4902 - val_loss: 1.6850 - val_accuracy: 0.4187
Epoch 6/200
8217/8217 [==============================] - 1058s 129ms/step - loss: 1.7738 - accuracy: 0.5013 - val_loss: 1.7244 - val_accuracy: 0.4138
Epoch 00006: early stopping
time elapsed:    6599.4s
INFO:tensorflow:Assets written to: ../../ai/gb_32_shuffled/model/efficientnet_pooling_256_12/assets
/usr/local/lib/python3.8/dist-packages/keras/engine/functional.py:1410: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.
  layer_config = serialize_layer_fn(layer)
/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:112: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.
  return generic_utils.serialize_keras_object(obj)
creating ImageDataGenerators
Found 262937 images belonging to 12 classes.
Found 42251 images belonging to 12 classes.
Found 38677 images belonging to 12 classes.
assessing performance of train dataset
prediction of train saved
perf_model_accuracy for train: 0.5069313049316406
assessing performance of val dataset
prediction of val saved
perf_model_accuracy for val: 0.4137890338897705
assessing performance of secret dataset
prediction of secret saved
perf_model_accuracy for secret: 0.4087183475494385

verify results

model = keras.models.load_model("../../urbangrammar_samba/spatial_signatures/ai/gb_8/model/efficientnet_pooling_256_12_best")
datagen = keras.preprocessing.image.ImageDataGenerator()
generator = datagen.flow_from_directory(
    "../../chips_gb/8_shuffled_balanced_12/train/",
    target_size=(224, 224),
    batch_size=32,
    class_mode='sparse',
    shuffle=False)
Found 395690 images belonging to 12 classes.
y_pred_probs = model.predict(generator)
y_pred = numpy.argmax(y_pred_probs, axis=1)
y = generator.labels
def accuracy(y, y_pred):
    a = tf.keras.metrics.Accuracy()
    a.update_state(y, y_pred)
    return a.result().numpy()


def within_class_metrics(y, y_pred, y_probs):
    top_prob = numpy.zeros(y_pred.shape)
    wc_accuracy = numpy.zeros(y_probs.shape[1]).tolist()
    wc_top_prob = numpy.zeros(y_probs.shape[1]).tolist()
    for c in range(y_probs.shape[1]):
        c_id = y == c
        # Top prob
        top_prob[c_id] = y_probs[c_id, c]
        # WC accuracy
        wc_accuracy[c] = accuracy(y[c_id], y_pred[c_id])
        # WC top prob
        wc_top_prob[c] = y_probs[c_id, c].mean()
    top_prob = top_prob.mean()
    return top_prob, wc_accuracy, wc_top_prob
accuracy(y, y_pred)
0.2887412
within_class_metrics(y, y_pred, y_pred_probs)[1]
[0.75011426,
 0.5083132,
 0.5638286,
 0.44788584,
 0.4550801,
 0.39879662,
 0.70287836,
 0.5022857,
 0.25628573,
 0.8010856,
 0.43285716,
 0.75637144]