update dataloader

This commit is contained in:
2026-04-04 17:49:31 +03:00
parent 2561ba1cbf
commit 15cad7fa65
2 changed files with 358 additions and 569 deletions

View File

@@ -1,30 +1,30 @@
config = {
# Параметры оптимизатора
"learning_rate": 2e-4,
"beta1": 0.5,
"beta2": 0.999,
# Параметры обучения
"batch_size": 4,
"epochs": 100,
# Параметры GAN
"gan_mode": "vanilla", # "vanilla", "lsgan", или "wgangp"
"lambda_L1": 100.0, # Вес L1 потерь
# Регуляризация
"grad_clip": 1.0,
# Ранняя остановка
"early_stopping_patience": 20,
# Выходные данные
"output_dir": "runs/gan_training",
# Логирование
"log_interval": 10, # Логировать каждые N батчей
"save_interval": 5, # Сохранять чекпоинт каждые N эпох
"data_dir": r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
"batch_size": 32,
"epochs": 100,
"gan_mode": "vanilla",
"lambda_L1": 100.0,
"grad_clip": 1.0,
"early_stopping_patience": 20,
"output_dir": "runs/gan_training",
"log_interval": 10,
"save_interval": 5,
"data_dir": r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
"image_size": [256, 256],
"train_split": 0.8,
"num_workers": 0,
}
import os
from typing import Dict, List, Tuple
import torch
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import os
import random
@@ -34,17 +34,11 @@ import cv2
import numpy as np
import torch
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import DataLoader, Dataset, Subset
from torchvision import transforms
class YaGoDataset(Dataset):
"""
Dataset for homography estimation between Yandex and Google map image pairs.
This dataset loads pairs of images (Yandex and Google maps) and provides
homography matrices for data augmentation and training.
"""
def __init__(
self,
root_dir: str,
@@ -53,132 +47,76 @@ class YaGoDataset(Dataset):
max_samples: Optional[int] = None,
image_size: Tuple[int, int] = (700, 700),
cache_homographies: bool = True,
device: torch.device = None,
device=None,
):
"""
Initialize the YaGoDataset.
Args:
root_dir: Directory containing image pairs (format: {idx:04d}_google.png, {idx:04d}_yandex.png)
transform: Optional torchvision transforms to apply
augment: Whether to apply homography-based data augmentation
max_samples: Maximum number of samples to load (None for all)
image_size: Target size for images (height, width)
cache_homographies: Whether to cache generated homography matrices to disk
"""
self.root_dir = root_dir
self.transform = transform
self.augment = augment
self.image_size = image_size
self.cache_homographies = cache_homographies
self.device = device
# Find all image pairs
self.image_pairs = self._discover_image_pairs()
if max_samples is not None:
self.image_pairs = self.image_pairs[:max_samples]
print(f"Found {len(self.image_pairs)} image pairs in {root_dir}")
def _discover_image_pairs(self) -> List[Dict[str, Any]]:
"""Discover all Google-Yandex image pairs in the dataset directory."""
image_pairs = []
# Get all Google images
google_files = [
f for f in os.listdir(self.root_dir) if f.endswith("_google.png")
]
google_files = [f for f in os.listdir(self.root_dir) if f.endswith("_google.png")]
for google_file in sorted(google_files):
# Extract index from filename
idx_str = google_file.split("_")[0]
try:
idx = int(idx_str)
except ValueError:
continue
# Check if corresponding Yandex image exists
yandex_file = f"{idx:04d}_yandex.png"
yandex_path = os.path.join(self.root_dir, yandex_file)
if os.path.exists(yandex_path):
image_pairs.append(
{
"idx": idx,
"google_path": os.path.join(self.root_dir, google_file),
"yandex_path": yandex_path,
}
)
image_pairs.append({
"idx": idx,
"google_path": os.path.join(self.root_dir, google_file),
"yandex_path": yandex_path,
})
return image_pairs
def __len__(self) -> int:
"""Return the number of image pairs in the dataset."""
return len(self.image_pairs)
def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
"""
Get a sample from the dataset.
Returns a dictionary with:
- 'google_img': Google map image tensor
- 'yandex_img': Yandex map image tensor
- 'homography': Ground truth homography matrix (3x3)
- 'idx': Sample index
"""
pair_info = self.image_pairs[idx]
google_path = pair_info["google_path"]
yandex_path = pair_info["yandex_path"]
same_domain = True
if np.random.rand() > 0.5:
random_idx = np.random.randint(0, len(self))
google_path = self.image_pairs[random_idx]["google_path"]
same_domain = random_idx == idx
# Load images
yandex_img = Image.open(yandex_path).convert("RGB")
google_img = Image.open(google_path).convert("RGB")
# Resize images to target size
google_img = google_img.resize(
(self.image_size[1], self.image_size[0]), Image.BILINEAR
)
yandex_img = yandex_img.resize(
(self.image_size[1], self.image_size[0]), Image.BILINEAR
)
# Get or generate homography matrix
matrices: Tuple[np.ndarray, np.ndarray, np.ndarray] = (
self._get_homography_matrix(pair_info["idx"])
)
# Apply data augmentation if enabled
google_img = google_img.resize((self.image_size[1], self.image_size[0]), Image.BILINEAR)
yandex_img = yandex_img.resize((self.image_size[1], self.image_size[0]), Image.BILINEAR)
matrices = self._get_homography_matrix(pair_info["idx"])
if self.augment:
google_img, yandex_img, homography_matrix = self._apply_augmentation(
google_img, yandex_img, matrices
)
# Convert images to tensors
homography_tensor = torch.from_numpy(homography_matrix).float()
else:
homography_tensor = torch.from_numpy(np.eye(3))
if self.transform:
google_img = self.transform(google_img)
yandex_img = self.transform(yandex_img)
else:
# Default conversion to tensor
google_img = (
torch.from_numpy(np.array(google_img)).float().permute(2, 0, 1) / 255.0
)
yandex_img = (
torch.from_numpy(np.array(yandex_img)).float().permute(2, 0, 1) / 255.0
)
# Convert homography to tensor
if self.augment:
homography_tensor = torch.from_numpy(homography_matrix).float()
else:
homography_tensor = torch.from_numpy(np.eye(3))
google_img = torch.from_numpy(np.array(google_img)).float().permute(2, 0, 1) / 255.0
yandex_img = torch.from_numpy(np.array(yandex_img)).float().permute(2, 0, 1) / 255.0
return {
"google_img": google_img,
"yandex_img": yandex_img,
@@ -187,94 +125,36 @@ class YaGoDataset(Dataset):
"idx": torch.tensor(pair_info["idx"], dtype=torch.long),
}
def _get_homography_matrix(
self, idx: int
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Get homography matrices for a given index.
If cached homography exists, load it. Otherwise generate a new one.
"""
# Generate new homography matrix
def _get_homography_matrix(self, idx: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
homography_matrix_1 = self.generate_random_homography()
homography_matrix_2 = self.generate_random_homography()
homography_matrix_r = np.linalg.inv(homography_matrix_1) @ homography_matrix_2
result = (homography_matrix_1, homography_matrix_2, homography_matrix_r)
return result
return (homography_matrix_1, homography_matrix_2, homography_matrix_r)
def generate_random_homography(self) -> np.ndarray:
"""
Generate a random homography matrix for data augmentation.
Returns:
np.ndarray: 3x3 homography matrix.
"""
# Generate random affine transformation parameters
scale = np.random.uniform(0.8, 1.2) # scaling factor
tx = np.random.uniform(-0.50, 0.50) # translation in x
ty = np.random.uniform(-0.50, 0.50) # translation in y
# rotation
scale = np.random.uniform(0.8, 1.2)
tx = np.random.uniform(-0.50, 0.50)
ty = np.random.uniform(-0.50, 0.50)
angle_x = np.random.uniform(np.radians(-10), np.radians(10))
angle_y = np.random.uniform(np.radians(-10), np.radians(10))
angle_z = np.random.uniform(np.radians(-10), np.radians(10))
cy, sy = np.cos(angle_z), np.sin(angle_z)
cp, sp = np.cos(angle_y), np.sin(angle_y)
cr, sr = np.cos(angle_x), np.sin(angle_x)
Rz = np.array(
[
[cy, -sy, 0],
[sy, cy, 0],
[0, 0, 1],
]
)
Ry = np.array(
[
[cp, 0, sp],
[0, 1, 0],
[-sp, 0, cp],
]
)
Rx = np.array(
[
[1, 0, 0],
[0, cr, -sr],
[0, sr, cr],
]
)
# Create affine transformation matrix
T = np.array(
[
[1, 0, tx],
[0, 1, ty],
[0, 0, scale],
]
)
Rz = np.array([[cy, -sy, 0], [sy, cy, 0], [0, 0, 1]])
Ry = np.array([[cp, 0, sp], [0, 1, 0], [-sp, 0, cp]])
Rx = np.array([[1, 0, 0], [0, cr, -sr], [0, sr, cr]])
T = np.array([[1, 0, tx], [0, 1, ty], [0, 0, scale]])
K = self.get_camera_matrix()
return K @ Rx @ Ry @ Rz @ T @ np.linalg.inv(K)
def get_camera_matrix(self) -> np.ndarray:
w, h = config["image_size"]
K = np.array(
[
[w / 2, 0, w / 2],
[0, h / 2, h / 2],
[0, 0, 1],
]
)
return K
return np.array([[w / 2, 0, w / 2], [0, h / 2, h / 2], [0, 0, 1]])
def _apply_augmentation(
self,
@@ -282,90 +162,22 @@ class YaGoDataset(Dataset):
yandex_img: Image.Image,
matrices: Tuple[np.ndarray, np.ndarray, np.ndarray],
) -> Tuple[Image.Image, Image.Image, np.ndarray]:
"""
Apply homography-based data augmentation to image pair.
Args:
google_img: Google map image
yandex_img: Yandex map image
matrices: homography matrices
Returns:
Tuple of (augmented_google_img, augmented_yandex_img, augmented_homography)
"""
# Combine with base homography
combined_homography = matrices[2]
# Apply augmentation to both images
# google_aug = self._apply_homography_to_image(google_img, aug_homography)
yandex_aug = self._apply_homography_to_image(yandex_img, matrices[0])
google_aug = self._apply_homography_to_image(google_img, matrices[1])
print("F", combined_homography, np.linalg.inv(matrices[0]) @ matrices[1])
return google_aug, yandex_aug, combined_homography
def _apply_homography_to_image(
self, img: Image.Image, homography: np.ndarray
) -> Image.Image:
"""
Apply homography transformation to a single image.
Args:
img: PIL Image to transform
homography: 3x3 homography matrix
Returns:
Transformed PIL Image
"""
# Convert to numpy array
img_np = np.array(img)
# Get image dimensions
h, w = img_np.shape[:2]
# Apply homography transformation
transformed = cv2.warpPerspective(
img_np,
homography,
(w, h),
flags=cv2.INTER_LINEAR,
# borderMode=cv2.BORDER_REFLECT,
img_np, homography, (w, h), flags=cv2.INTER_LINEAR
)
# Convert back to PIL Image
return Image.fromarray(transformed)
def get_sample_without_augmentation(self, idx: int) -> Dict[str, Any]:
"""
Get a sample without data augmentation.
Useful for visualization and evaluation.
"""
pair_info = self.image_pairs[idx]
# Load images
google_img = Image.open(pair_info["google_path"]).convert("RGB")
yandex_img = Image.open(pair_info["yandex_path"]).convert("RGB")
# Resize
google_img = google_img.resize(
(self.image_size[1], self.image_size[0]), Image.BILINEAR
)
yandex_img = yandex_img.resize(
(self.image_size[1], self.image_size[0]), Image.BILINEAR
)
# Get homography matrix
homography_matrix = self._get_homography_matrix(pair_info["idx"])
return {
"google_img": google_img,
"yandex_img": yandex_img,
"homography": homography_matrix,
"idx": pair_info["idx"],
"google_path": pair_info["google_path"],
"yandex_path": pair_info["yandex_path"],
}
def create_data_loaders(
root_dir: str,
@@ -375,104 +187,46 @@ def create_data_loaders(
image_size: Tuple[int, int] = (256, 256),
augment_train: bool = True,
augment_val: bool = False,
device: torch.device = None,
device=None,
) -> Tuple[DataLoader, DataLoader]:
"""
Create train and validation data loaders for homography estimation.
Args:
root_dir: Directory containing image pairs
batch_size: Batch size for data loaders
train_split: Fraction of data to use for training
num_workers: Number of worker processes for data loading
image_size: Target image size (height, width)
augment_train: Whether to augment training data
augment_val: Whether to augment validation data
device: Target device for tensors (optional)
Returns:
Tuple of (train_loader, val_loader)
"""
from torchvision import transforms
# Define transforms
transform = transforms.Compose(
[
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
)
# Create full dataset
transform = transforms.Compose([
transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
full_dataset = YaGoDataset(
root_dir=root_dir,
transform=transform,
augment=False, # We'll handle augmentation separately
augment=False,
image_size=image_size,
cache_homographies=True,
device=device,
)
# Split dataset
aug_dataset = YaGoDataset(
root_dir=root_dir,
transform=transform,
augment=True,
image_size=image_size,
cache_homographies=False,
device=device,
)
dataset_size = len(full_dataset)
train_size = int(train_split * dataset_size)
val_size = dataset_size - train_size
# Create indices for splitting
indices = list(range(dataset_size))
random.shuffle(indices)
train_indices = indices[:train_size]
val_indices = indices[train_size:]
# Create subset samplers
from torch.utils.data import Subset
train_dataset = Subset(full_dataset, train_indices)
val_dataset = Subset(full_dataset, val_indices)
# Apply augmentation by overriding __getitem__ for train dataset
if augment_train:
class AugmentedSubset(Subset):
def __init__(self, dataset, indices, device=None):
super().__init__(dataset, indices)
self.device = device
def __getitem__(self, idx):
sample = self.dataset[self.indices[idx]]
# Apply augmentation
google_img = sample["google_img"]
yandex_img = sample["yandex_img"]
homography = sample["homography"]
if self.device is not None:
google_img = google_img.to(self.device)
yandex_img = yandex_img.to(self.device)
homography = homography.to(self.device)
# Generate augmentation homography
aug_homography = torch.from_numpy(
full_dataset.generate_random_homography()
).float()
if self.device is not None:
aug_homography = aug_homography.to(self.device)
# Combine homographies
combined_homography = aug_homography @ homography
# Apply augmentation (simplified - in practice would warp images)
# For now, we just return the combined homography
return {
"google_img": google_img,
"yandex_img": yandex_img,
"homography": combined_homography,
"idx": sample["idx"],
}
train_dataset = AugmentedSubset(full_dataset, train_indices, device=device)
# Create data loaders
train_dataset = Subset(aug_dataset, train_indices)
train_loader = DataLoader(
train_dataset,
batch_size=batch_size,
@@ -480,7 +234,7 @@ def create_data_loaders(
num_workers=num_workers,
pin_memory=True,
)
val_loader = DataLoader(
val_dataset,
batch_size=batch_size,
@@ -488,14 +242,15 @@ def create_data_loaders(
num_workers=num_workers,
pin_memory=True,
)
return train_loader, val_loader
# Example usage
dataset = YaGoDataset(
root_dir=config["data_dir"],
augment=False,
augment=True,
image_size=(256, 256),
)

File diff suppressed because one or more lines are too long