Object Detection - GeoAI with Python

Introduction¶

Learning Objectives¶

Understanding Object Detection¶

Classification vs. Detection¶

Key Concepts¶

Detection Architectures¶

Two-Stage Detectors¶

Single-Stage Detectors¶

Transformer-Based Detectors¶

Zero-Shot Detection¶

Choosing an Architecture¶

Preparing Detection Datasets¶

Annotation Formats¶

The NWPU-VHR-10 Dataset¶

Evaluating Detection Results¶

Mean Average Precision (mAP)¶

Precision-Recall Curves¶

IoU Thresholds¶

Import Libraries¶

import os
import json

import geoai

Download the NWPU-VHR-10 Dataset¶

url = "https://data.source.coop/opengeos/geoai/NWPU-VHR-10.zip"
data_dir = geoai.download_file(url)

print(f"Dataset directory: {data_dir}")
print(f"Contents: {os.listdir(data_dir)}")

Explore the Dataset¶

print(f"\nNWPU-VHR-10 Classes:")
for i, name in enumerate(geoai.NWPU_VHR10_CLASSES):
    print(f"  {i}: {name}")

Prepare the Dataset¶

splits = geoai.prepare_nwpu_vhr10(data_dir, val_split=0.2, seed=42)

print(f"Images directory: {splits['images_dir']}")
print(f"Number of classes: {splits['num_classes']}")
print(f"Class names: {splits['class_names']}")
print(f"Training images: {len(splits['train_image_ids'])}")
print(f"Validation images: {len(splits['val_image_ids'])}")

Visualize Sample Annotations¶

geoai.visualize_coco_annotations(
    annotations_path=splits["annotations_path"],
    images_dir=splits["images_dir"],
    num_samples=6,
    random=True,
    seed=1,
    cols=3,
    figsize=(12, 6),
)

Train a Multi-Class Detection Model¶

output_dir = "nwpu_output"

model_path = geoai.train_multiclass_detector(
    images_dir=splits["images_dir"],
    annotations_path=splits["train_annotations"],
    output_dir=output_dir,
    model_name="fasterrcnn_resnet50_fpn_v2",
    class_names=splits["class_names"],
    num_channels=3,
    batch_size=4,
    num_epochs=10,
    learning_rate=0.005,
    val_split=0.1,
    seed=42,
    pretrained=True,
    verbose=True,
)

Plot Training Metrics¶

geoai.plot_detection_training_history(
    history_path=os.path.join(output_dir, "training_history.pth"),
)

Evaluate with COCO Metrics¶

metrics = geoai.evaluate_multiclass_detector(
    model_path=model_path,
    images_dir=splits["images_dir"],
    annotations_path=splits["val_annotations"],
    num_classes=splits["num_classes"],
    class_names=splits["class_names"][1:],  # Exclude background
    batch_size=4,
)

Run Inference on Sample Images¶

# Load validation data to pick a test image
with open(splits["val_annotations"], "r") as f:
    val_data = json.load(f)

test_img_info = val_data["images"][0]
test_img_path = os.path.join(splits["images_dir"], test_img_info["file_name"])
print(f"Test image: {test_img_path}")

output_raster = "nwpu_detection_output.tif"

result_path, inference_time, detections = geoai.multiclass_detection(
    input_path=test_img_path,
    output_path=output_raster,
    model_path=model_path,
    num_classes=splits["num_classes"],
    class_names=splits["class_names"],
    window_size=512,
    overlap=256,
    confidence_threshold=0.5,
    batch_size=4,
    num_channels=3,
)

print(f"\nInference time: {inference_time:.2f}s")
print(f"Total detections: {len(detections)}")

Visualize Detections¶

geoai.visualize_multiclass_detections(
    image_path=test_img_path,
    detections=detections,
    class_names=splits["class_names"],
    confidence_threshold=0.5,
    figsize=(12, 10),
)

Batch Inference on Multiple Images¶

val_image_paths = [
    os.path.join(splits["images_dir"], img["file_name"])
    for img in val_data["images"][:4]
]

results = geoai.batch_multiclass_detection(
    image_paths=val_image_paths,
    output_dir="nwpu_batch_output",
    model_path=model_path,
    num_classes=splits["num_classes"],
    class_names=splits["class_names"],
    confidence_threshold=0.5,
    num_channels=3,
    figsize=(16, 12),
)

Publish and Reuse Models¶

Push to Hugging Face Hub¶

from huggingface_hub import notebook_login

notebook_login()

url = geoai.push_detector_to_hub(
    model_path=model_path,
    repo_id="your-username/nwpu-vhr10-fasterrcnn",
    model_name="fasterrcnn_resnet50_fpn_v2",
    num_classes=splits["num_classes"],
    class_names=splits["class_names"],
)

Run Inference from Hub¶

sample_img_path = os.path.join(splits["images_dir"], "608.jpg")

result_path, inference_time, detections = geoai.predict_detector_from_hub(
    input_path=sample_img_path,
    output_path="hub_detection.tif",
    repo_id="giswqs/nwpu-vhr10-fasterrcnn",
    confidence_threshold=0.5,
)

print(f"Inference time: {inference_time:.2f}s")
print(f"Total detections: {len(detections)}")

# Clean up
if os.path.exists("hub_detection.tif"):
    os.remove("hub_detection.tif")

geoai.visualize_multiclass_detections(
    image_path=sample_img_path,
    detections=detections,
    class_names=geoai.NWPU_VHR10_CLASSES,
    confidence_threshold=0.5,
    figsize=(12, 10),
)

Introduction¶

Learning Objectives¶

Understanding Object Detection¶

Classification vs. Detection¶

Key Concepts¶

Detection Architectures¶

Two-Stage Detectors¶

Single-Stage Detectors¶

Transformer-Based Detectors¶

Zero-Shot Detection¶

Choosing an Architecture¶

Preparing Detection Datasets¶

Annotation Formats¶

The NWPU-VHR-10 Dataset¶

Evaluating Detection Results¶

Mean Average Precision (mAP)¶

Precision-Recall Curves¶

IoU Thresholds¶

Import Libraries¶

Download the NWPU-VHR-10 Dataset¶

Explore the Dataset¶

Prepare the Dataset¶

Visualize Sample Annotations¶

Train a Multi-Class Detection Model¶

Plot Training Metrics¶

Evaluate with COCO Metrics¶

Run Inference on Sample Images¶

Visualize Detections¶

Batch Inference on Multiple Images¶

Publish and Reuse Models¶

Push to Hugging Face Hub¶

Run Inference from Hub¶

Key Takeaways¶

Exercises¶

Exercise 1: Training with a Different Architecture¶

Exercise 2: Confidence Threshold Analysis¶

Exercise 3: Hyperparameter Sensitivity¶