CNN/Models/model_sk_convnext_v1.py

import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import ConvNeXtTiny
from tensorflow.keras.applications.convnext import preprocess_input
from time import time
import matplotlib.pyplot as plt

# This is the new 2026 version
# This model was trained with 13,185 images
# See CNNImageProcessor solution for create the test images for training this model


# Figure out if we are training in CPU or GPU
print("GPUs:", tf.config.list_physical_devices('GPU'))
# -----------------------
# ConvNeXt-Tiny Base Model
# -----------------------
modelname='convnext_20260228_90.h5.keras'

# convneXt was pretrained with 224 but our image data is 128 so we upscale our images to match the 224 requirements of the model
actualImageDimension=224
convneXtImageDimension=224

# Tensorboard
log_dir = f'logs/convnext_{int(time())}'
tensorboard = TensorBoard(log_dir=log_dir)

# -----------------------
# Configuration
# -----------------------
shuffle_count=3000
dataset_path = 'C:\\boneyard\\DeepLearning\\data'
image_size = (actualImageDimension, actualImageDimension)
batch_size = 16    # try 16  was 32
image_size=(actualImageDimension, actualImageDimension)


# -----------------------
# Dataset Loading
# -----------------------

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    dataset_path,
    label_mode="binary",
    subset="training",
    validation_split=0.2,
    image_size=image_size,
    color_mode='rgb',   # IMPORTANT for grayscale datasets
    batch_size=batch_size,
    seed=50
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    dataset_path,
    label_mode="binary",
    subset="validation",
    validation_split=0.2,
    image_size=image_size,
    color_mode='rgb',
    batch_size=batch_size,
    seed=50
)

# -----------------------
# Data Augmentation
# -----------------------

# data_augmentation = tf.keras.Sequential([
#     layers.RandomFlip("horizontal"),
#     layers.RandomRotation(0.1)
# ])
#data_augmentation = tf.keras.Sequential([
#    layers.RandomFlip("horizontal"),
#    layers.RandomRotation(0.1),
#    layers.RandomRotation(0.1, fill_mode="nearest"),
#    layers.RandomZoom(0.1)
#])


# def preprocess_train(x, y):
#     x = data_augmentation(x, training=True)
#     return x, y

def preprocess_val(x, y):
    return x, y

val_ds = (
    val_ds
    .prefetch(tf.data.AUTOTUNE)
)

train_ds = (
    train_ds
    .shuffle(3000)
    .prefetch(tf.data.AUTOTUNE)
)


# for images, labels in train_ds.take(1):

#     plt.figure(figsize=(10,10))

#     for i in range(12):
#         ax = plt.subplot(3,4,i+1)
#         plt.imshow(images[i].numpy().astype("uint8"))
#         plt.title(int(labels[i].numpy()))
#         plt.axis("off")

#     plt.tight_layout()
#     plt.show()


# -----------------------
# ConvNeXt-Tiny Base Model
# -----------------------

base_model = ConvNeXtTiny(
    weights='imagenet',
    include_top=False,
    input_shape=(convneXtImageDimension, convneXtImageDimension, 3)
)

base_model.trainable = False  # Freeze for initial training

# -----------------------
# Build Full Model (Preprocessing Inside Model)
# -----------------------
inputs = tf.keras.Input(shape=(actualImageDimension, actualImageDimension, 3))


x = preprocess_input(inputs)
x = base_model(x)
# Dense Head
# x = layers.GlobalAveragePooling2D()(x)
# x = layers.BatchNormalization()(x)
# x = layers.Dense(512, activation="relu")(x)
# x = layers.Dropout(0.3)(x)
# x = layers.Dense(128, activation="relu")(x)

x = layers.GlobalAveragePooling2D()(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(256, activation="relu")(x)
x = layers.Dropout(0.4)(x)

# End Dense Head
outputs = layers.Dense(1, activation="sigmoid")(x)
model = tf.keras.Model(inputs, outputs)


model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()

# -----------------------
# Callbacks
# -----------------------


early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=15,
    restore_best_weights=True,
    verbose=1
)

checkpointer = ModelCheckpoint(
    filepath=modelname,
    monitor='val_accuracy',
    save_best_only=True,
    verbose=1
)

lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=1e-6,
    verbose=1
)

# -----------------------
# Initial Training
# -----------------------

history = model.fit(
    train_ds,
    epochs=50,
    validation_data=val_ds,
    callbacks=[tensorboard, lr_scheduler, early_stopping, checkpointer]
)

# -----------------------
# Fine-Tuning
# -----------------------

base_model.trainable = True

# Freeze early layers (recommended)
for layer in base_model.layers[:-40]:
    layer.trainable = False

model.compile(
    optimizer=Adam(1e-5),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

history_fine = model.fit(
    train_ds,
    epochs=50,
    validation_data=val_ds,
    callbacks=[tensorboard, lr_scheduler, early_stopping, checkpointer]
)


# -----------------------
# Plot Results
# -----------------------

def plot_history(hist, title_prefix=""):
    plt.figure()
    plt.plot(hist.history['accuracy'], label='Train Accuracy')
    plt.plot(hist.history['val_accuracy'], label='Val Accuracy')
    plt.title(f'{title_prefix} Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

    plt.figure()
    plt.plot(hist.history['loss'], label='Train Loss')
    plt.plot(hist.history['val_loss'], label='Val Loss')
    plt.title(f'{title_prefix} Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

plot_history(history, "Initial Training")
plot_history(history_fine, "Fine-Tuning")

# -----------------------
# Save Final Model
# -----------------------

#model.save(modelname)