Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Object Detection

Introduction

Learning Objectives

Understanding Object Detection

Classification vs. Detection

Key Concepts

Detection Architectures

Two-Stage Detectors

Single-Stage Detectors

Transformer-Based Detectors

Zero-Shot Detection

Choosing an Architecture

Preparing Detection Datasets

Annotation Formats

The NWPU-VHR-10 Dataset

Evaluating Detection Results

Mean Average Precision (mAP)

Precision-Recall Curves

IoU Thresholds

Import Libraries

import os
import json

import geoai

Download the NWPU-VHR-10 Dataset

url = "https://data.source.coop/opengeos/geoai/NWPU-VHR-10.zip"
data_dir = geoai.download_file(url)
print(f"Dataset directory: {data_dir}")
print(f"Contents: {os.listdir(data_dir)}")

Explore the Dataset

print(f"\nNWPU-VHR-10 Classes:")
for i, name in enumerate(geoai.NWPU_VHR10_CLASSES):
    print(f"  {i}: {name}")

Prepare the Dataset

splits = geoai.prepare_nwpu_vhr10(data_dir, val_split=0.2, seed=42)
print(f"Images directory: {splits['images_dir']}")
print(f"Number of classes: {splits['num_classes']}")
print(f"Class names: {splits['class_names']}")
print(f"Training images: {len(splits['train_image_ids'])}")
print(f"Validation images: {len(splits['val_image_ids'])}")

Visualize Sample Annotations

geoai.visualize_coco_annotations(
    annotations_path=splits["annotations_path"],
    images_dir=splits["images_dir"],
    num_samples=6,
    random=True,
    seed=1,
    cols=3,
    figsize=(12, 6),
)

Train a Multi-Class Detection Model

output_dir = "nwpu_output"

model_path = geoai.train_multiclass_detector(
    images_dir=splits["images_dir"],
    annotations_path=splits["train_annotations"],
    output_dir=output_dir,
    model_name="fasterrcnn_resnet50_fpn_v2",
    class_names=splits["class_names"],
    num_channels=3,
    batch_size=4,
    num_epochs=10,
    learning_rate=0.005,
    val_split=0.1,
    seed=42,
    pretrained=True,
    verbose=True,
)

Plot Training Metrics

geoai.plot_detection_training_history(
    history_path=os.path.join(output_dir, "training_history.pth"),
)

Evaluate with COCO Metrics

metrics = geoai.evaluate_multiclass_detector(
    model_path=model_path,
    images_dir=splits["images_dir"],
    annotations_path=splits["val_annotations"],
    num_classes=splits["num_classes"],
    class_names=splits["class_names"][1:],  # Exclude background
    batch_size=4,
)

Run Inference on Sample Images

# Load validation data to pick a test image
with open(splits["val_annotations"], "r") as f:
    val_data = json.load(f)

test_img_info = val_data["images"][0]
test_img_path = os.path.join(splits["images_dir"], test_img_info["file_name"])
print(f"Test image: {test_img_path}")
output_raster = "nwpu_detection_output.tif"

result_path, inference_time, detections = geoai.multiclass_detection(
    input_path=test_img_path,
    output_path=output_raster,
    model_path=model_path,
    num_classes=splits["num_classes"],
    class_names=splits["class_names"],
    window_size=512,
    overlap=256,
    confidence_threshold=0.5,
    batch_size=4,
    num_channels=3,
)

print(f"\nInference time: {inference_time:.2f}s")
print(f"Total detections: {len(detections)}")

Visualize Detections

geoai.visualize_multiclass_detections(
    image_path=test_img_path,
    detections=detections,
    class_names=splits["class_names"],
    confidence_threshold=0.5,
    figsize=(12, 10),
)

Batch Inference on Multiple Images

val_image_paths = [
    os.path.join(splits["images_dir"], img["file_name"])
    for img in val_data["images"][:4]
]

results = geoai.batch_multiclass_detection(
    image_paths=val_image_paths,
    output_dir="nwpu_batch_output",
    model_path=model_path,
    num_classes=splits["num_classes"],
    class_names=splits["class_names"],
    confidence_threshold=0.5,
    num_channels=3,
    figsize=(16, 12),
)

Publish and Reuse Models

Push to Hugging Face Hub

from huggingface_hub import notebook_login

notebook_login()
url = geoai.push_detector_to_hub(
    model_path=model_path,
    repo_id="your-username/nwpu-vhr10-fasterrcnn",
    model_name="fasterrcnn_resnet50_fpn_v2",
    num_classes=splits["num_classes"],
    class_names=splits["class_names"],
)

Run Inference from Hub

sample_img_path = os.path.join(splits["images_dir"], "608.jpg")

result_path, inference_time, detections = geoai.predict_detector_from_hub(
    input_path=sample_img_path,
    output_path="hub_detection.tif",
    repo_id="giswqs/nwpu-vhr10-fasterrcnn",
    confidence_threshold=0.5,
)

print(f"Inference time: {inference_time:.2f}s")
print(f"Total detections: {len(detections)}")

# Clean up
if os.path.exists("hub_detection.tif"):
    os.remove("hub_detection.tif")
geoai.visualize_multiclass_detections(
    image_path=sample_img_path,
    detections=detections,
    class_names=geoai.NWPU_VHR10_CLASSES,
    confidence_threshold=0.5,
    figsize=(12, 10),
)

Key Takeaways

Exercises

Exercise 1: Training with a Different Architecture

Exercise 2: Confidence Threshold Analysis

Exercise 3: Hyperparameter Sensitivity