add auto codegen
This commit is contained in:
52
models/SiaN/src/analyze.py
Normal file
52
models/SiaN/src/analyze.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def analyze_training(trainer):
|
||||
print("=== Training Analysis ===\n")
|
||||
|
||||
if trainer.writer:
|
||||
print("TensorBoard logs available at:", trainer.writer.log_dir)
|
||||
|
||||
print(f"\nBest val loss: {trainer.best_val_loss:.4f}")
|
||||
|
||||
trainer.model.eval()
|
||||
with torch.no_grad():
|
||||
batch = next(iter(trainer.val_loader))
|
||||
google_img = batch["google_img"].to(trainer.device)
|
||||
yandex_img = batch["yandex_img"].to(trainer.device)
|
||||
target_params = batch["homography_params"].to(trainer.device)
|
||||
|
||||
pred_params = trainer.model(google_img, yandex_img)
|
||||
|
||||
print(f"\nSample predictions (first 3 of batch):")
|
||||
print(f"{'Param':<8} {'Target':>12} {'Predicted':>12} {'Error':>12}")
|
||||
print("-" * 46)
|
||||
names = ["rx", "ry", "rz", "tx", "ty", "scale"]
|
||||
for i in range(6):
|
||||
t = target_params[0, i].item()
|
||||
p = pred_params[0, i].item()
|
||||
print(f"{names[i]:<8} {t:>12.4f} {p:>12.4f} {abs(t-p):>12.4f}")
|
||||
|
||||
print(f"\nBatch mean abs error: {torch.mean(torch.abs(pred_params - target_params)).item():.4f}")
|
||||
|
||||
print("\n=== Visualization ===")
|
||||
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
|
||||
img1 = google_img[0].cpu()
|
||||
img2 = yandex_img[0].cpu()
|
||||
axes[0].imshow(img1.permute(1, 2, 0))
|
||||
axes[0].set_title("Google")
|
||||
axes[0].axis("off")
|
||||
axes[1].imshow(img2.permute(1, 2, 0))
|
||||
axes[1].set_title("Yandex")
|
||||
axes[1].axis("off")
|
||||
axes[2].bar(names, pred_params[0].cpu().numpy())
|
||||
axes[2].set_title("Predicted params")
|
||||
axes[2].axhline(y=0, color="k", lw=0.5)
|
||||
plt.tight_layout()
|
||||
plt.savefig("prediction_sample.png")
|
||||
print("Saved prediction_sample.png")
|
||||
plt.show()
|
||||
|
||||
return {"best_val_loss": trainer.best_val_loss}
|
||||
93
models/SiaN/src/dataloader.py
Normal file
93
models/SiaN/src/dataloader.py
Normal file
@@ -0,0 +1,93 @@
|
||||
import os
|
||||
import random
|
||||
from typing import Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from torch.utils.data import DataLoader, Dataset, Subset
|
||||
from torchvision import transforms
|
||||
|
||||
from .utils import config, get_camera_matrix, generate_random_homography_params, homography_params_to_matrix, matrix_to_homography_params
|
||||
|
||||
|
||||
class YaGoDataset(Dataset):
|
||||
def __init__(self, root_dir: str, transform=None, augment: bool = True,
|
||||
image_size: Tuple[int, int] = (256, 256)):
|
||||
self.root_dir = root_dir
|
||||
self.transform = transform
|
||||
self.augment = augment
|
||||
self.image_size = image_size
|
||||
self.K = get_camera_matrix(image_size[1], image_size[0])
|
||||
self.image_pairs = self._discover_image_pairs()
|
||||
|
||||
def _discover_image_pairs(self):
|
||||
pairs = []
|
||||
for f in os.listdir(self.root_dir):
|
||||
if f.endswith("_google.png"):
|
||||
idx = f.split("_")[0]
|
||||
yandex_path = os.path.join(self.root_dir, f"{idx}_yandex.png")
|
||||
if os.path.exists(yandex_path):
|
||||
pairs.append({"idx": int(idx), "google": os.path.join(self.root_dir, f), "yandex": yandex_path})
|
||||
return sorted(pairs, key=lambda x: x["idx"])
|
||||
|
||||
def __len__(self):
|
||||
return len(self.image_pairs)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
pair = self.image_pairs[idx]
|
||||
google_img = Image.open(pair["google"]).convert("RGB").resize((self.image_size[1], self.image_size[0]), Image.BILINEAR)
|
||||
yandex_img = Image.open(pair["yandex"]).convert("RGB").resize((self.image_size[1], self.image_size[0]), Image.BILINEAR)
|
||||
|
||||
if self.augment:
|
||||
params1 = generate_random_homography_params()
|
||||
params2 = generate_random_homography_params()
|
||||
H1 = homography_params_to_matrix(params1, self.K)
|
||||
H2 = homography_params_to_matrix(params2, self.K)
|
||||
H_combined = np.linalg.inv(H1) @ H2
|
||||
yandex_img = Image.fromarray(cv2.warpPerspective(np.array(yandex_img), H1, self.image_size))
|
||||
google_img = Image.fromarray(cv2.warpPerspective(np.array(google_img), H2, self.image_size))
|
||||
target_params = matrix_to_homography_params(H_combined, self.K)
|
||||
target_matrix = H_combined
|
||||
else:
|
||||
target_params = np.zeros(6, dtype=np.float32)
|
||||
target_matrix = np.eye(3, dtype=np.float32)
|
||||
|
||||
if self.transform:
|
||||
google_img = self.transform(google_img)
|
||||
yandex_img = self.transform(yandex_img)
|
||||
|
||||
return {
|
||||
"google_img": google_img,
|
||||
"yandex_img": yandex_img,
|
||||
"homography_matrix": torch.from_numpy(target_matrix).float(),
|
||||
"homography_params": torch.from_numpy(target_params).float(),
|
||||
}
|
||||
|
||||
|
||||
def create_data_loaders(root_dir, batch_size=32, train_split=0.8, num_workers=0,
|
||||
image_size=(256, 256), augment_train=True):
|
||||
transform = transforms.Compose([transforms.ToTensor()])
|
||||
|
||||
full_ds = YaGoDataset(root_dir, transform=transform, augment=False, image_size=image_size)
|
||||
aug_ds = YaGoDataset(root_dir, transform=transform, augment=True, image_size=image_size)
|
||||
|
||||
indices = list(range(len(full_ds)))
|
||||
random.shuffle(indices)
|
||||
split = int(train_split * len(indices))
|
||||
|
||||
train_ds = Subset(aug_ds if augment_train else full_ds, indices[:split])
|
||||
val_ds = Subset(full_ds, indices[split:])
|
||||
|
||||
return (DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True),
|
||||
DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True))
|
||||
|
||||
|
||||
def get_dataset_info():
|
||||
ds = YaGoDataset(config["data_dir"], augment=True, image_size=config["image_size"])
|
||||
return {
|
||||
"size": len(ds),
|
||||
"sample_keys": list(ds[0].keys()),
|
||||
"sample_params": ds[0]["homography_params"].numpy()
|
||||
}
|
||||
80
models/SiaN/src/main.py
Normal file
80
models/SiaN/src/main.py
Normal file
@@ -0,0 +1,80 @@
|
||||
import logging
|
||||
|
||||
import torch
|
||||
|
||||
from .dataloader import create_data_loaders, get_dataset_info
|
||||
from .model import HomographyCNN6, count_parameters
|
||||
from .train import HomographyTrainer
|
||||
from .analyze import analyze_training
|
||||
from .utils import config
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def create_dataset():
|
||||
logger.info("Creating data loaders...")
|
||||
train_loader, val_loader = create_data_loaders(
|
||||
root_dir=config["data_dir"],
|
||||
batch_size=config["batch_size"],
|
||||
train_split=config["train_split"],
|
||||
num_workers=config["num_workers"],
|
||||
image_size=config["image_size"],
|
||||
)
|
||||
logger.info(f"Data loaders created: train={len(train_loader.dataset)}, val={len(val_loader.dataset)}")
|
||||
return train_loader, val_loader
|
||||
|
||||
|
||||
def create_model():
|
||||
logger.info("Creating model...")
|
||||
model = HomographyCNN6(
|
||||
input_channels=3,
|
||||
backbone_name=config["backbone"],
|
||||
pretrained=True,
|
||||
dropout_rate=config["dropout_rate"]
|
||||
)
|
||||
logger.info(f"Model created with {count_parameters(model):,} parameters")
|
||||
return model
|
||||
|
||||
|
||||
def train_model(model, train_loader, val_loader):
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
logger.info(f"Using device: {device}")
|
||||
|
||||
trainer = HomographyTrainer(model, train_loader, val_loader, device)
|
||||
logger.info("Starting training...")
|
||||
trainer.train(config["epochs"])
|
||||
logger.info("Training completed")
|
||||
return trainer
|
||||
|
||||
|
||||
def analyze_model(trainer):
|
||||
logger.info("Analyzing model...")
|
||||
results = analyze_training(trainer)
|
||||
logger.info(f"Analysis complete: best_val_loss={results['best_val_loss']:.4f}")
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
logger.info("=" * 50)
|
||||
logger.info("SiaN Training Pipeline")
|
||||
logger.info("=" * 50)
|
||||
|
||||
dataset_info = get_dataset_info()
|
||||
logger.info(f"Dataset: {dataset_info['size']} samples, keys={dataset_info['sample_keys']}")
|
||||
|
||||
train_loader, val_loader = create_dataset()
|
||||
model = create_model()
|
||||
trainer = train_model(model, train_loader, val_loader)
|
||||
results = analyze_model(trainer)
|
||||
|
||||
logger.info("=" * 50)
|
||||
logger.info("Pipeline completed successfully")
|
||||
logger.info("=" * 50)
|
||||
|
||||
return trainer, results
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
41
models/SiaN/src/model.py
Normal file
41
models/SiaN/src/model.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torchvision import models
|
||||
|
||||
|
||||
class HomographyCNN6(nn.Module):
|
||||
def __init__(self, input_channels=3, backbone_name="resnet18", pretrained=True, dropout_rate=0.3):
|
||||
super().__init__()
|
||||
backbone = getattr(models, backbone_name)(weights=models.ResNet18_Weights.IMAGENET1K_V1 if pretrained else None)
|
||||
self.feature_dim = backbone.fc.in_features
|
||||
backbone.fc = nn.Identity()
|
||||
self.backbone = backbone
|
||||
|
||||
self.head = nn.Sequential(
|
||||
nn.Linear(self.feature_dim * 4, 512),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Dropout(dropout_rate),
|
||||
nn.Linear(512, 256),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Dropout(dropout_rate),
|
||||
nn.Linear(256, 6),
|
||||
)
|
||||
|
||||
def forward(self, img1, img2):
|
||||
f1 = self.backbone(img1)
|
||||
f2 = self.backbone(img2)
|
||||
combined = torch.cat([f1, f2, torch.abs(f1 - f2), f1 * f2], dim=1)
|
||||
return self.head(combined)
|
||||
|
||||
|
||||
class HomographyLoss6(nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.criterion = nn.MSELoss()
|
||||
|
||||
def forward(self, pred, target):
|
||||
return self.criterion(pred, target)
|
||||
|
||||
|
||||
def count_parameters(model):
|
||||
return sum(p.numel() for p in model.parameters())
|
||||
83
models/SiaN/src/train.py
Normal file
83
models/SiaN/src/train.py
Normal file
@@ -0,0 +1,83 @@
|
||||
import os
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from tqdm import tqdm
|
||||
|
||||
from .dataloader import create_data_loaders
|
||||
from .model import HomographyCNN6, HomographyLoss6, count_parameters
|
||||
from .utils import config
|
||||
|
||||
|
||||
class HomographyTrainer:
|
||||
def __init__(self, model, train_loader, val_loader, device):
|
||||
self.model = model.to(device)
|
||||
self.train_loader = train_loader
|
||||
self.val_loader = val_loader
|
||||
self.device = device
|
||||
self.criterion = HomographyLoss6()
|
||||
self.optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])
|
||||
self.writer = None
|
||||
self.best_val_loss = float("inf")
|
||||
|
||||
def train_epoch(self, epoch):
|
||||
self.model.train()
|
||||
total_loss, total_samples = 0, 0
|
||||
pbar = tqdm(self.train_loader, desc=f"Epoch {epoch}")
|
||||
for batch_idx, batch in enumerate(pbar):
|
||||
google_img = batch["google_img"].to(self.device)
|
||||
yandex_img = batch["yandex_img"].to(self.device)
|
||||
target = batch["homography_params"].to(self.device)
|
||||
|
||||
self.optimizer.zero_grad()
|
||||
output = self.model(google_img, yandex_img)
|
||||
loss = self.criterion(output, target)
|
||||
loss.backward()
|
||||
self.optimizer.step()
|
||||
|
||||
total_loss += loss.item() * google_img.size(0)
|
||||
total_samples += google_img.size(0)
|
||||
pbar.set_postfix({"loss": loss.item()})
|
||||
|
||||
return {"loss": total_loss / total_samples}
|
||||
|
||||
def validate(self):
|
||||
self.model.eval()
|
||||
total_loss, total_samples = 0, 0
|
||||
with torch.no_grad():
|
||||
for batch in tqdm(self.val_loader, desc="Validation"):
|
||||
google_img = batch["google_img"].to(self.device)
|
||||
yandex_img = batch["yandex_img"].to(self.device)
|
||||
target = batch["homography_params"].to(self.device)
|
||||
output = self.model(google_img, yandex_img)
|
||||
loss = self.criterion(output, target)
|
||||
total_loss += loss.item() * google_img.size(0)
|
||||
total_samples += google_img.size(0)
|
||||
return {"loss": total_loss / total_samples}
|
||||
|
||||
def train(self, num_epochs):
|
||||
log_dir = config["output_dir"]
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
self.writer = SummaryWriter(log_dir)
|
||||
|
||||
for epoch in range(1, num_epochs + 1):
|
||||
train_metrics = self.train_epoch(epoch)
|
||||
val_metrics = self.validate()
|
||||
print(f"Train Loss: {train_metrics['loss']:.4f}, Val Loss: {val_metrics['loss']:.4f}")
|
||||
|
||||
if val_metrics["loss"] < self.best_val_loss:
|
||||
self.best_val_loss = val_metrics["loss"]
|
||||
self.save_checkpoint(epoch, is_best=True)
|
||||
print(f"Best model saved (val loss: {val_metrics['loss']:.4f})")
|
||||
|
||||
self.writer.close()
|
||||
|
||||
def save_checkpoint(self, epoch, is_best=False):
|
||||
ckpt_dir = os.path.join(config["output_dir"], "checkpoints")
|
||||
os.makedirs(ckpt_dir, exist_ok=True)
|
||||
ckpt = {"epoch": epoch, "model_state_dict": self.model.state_dict(), "val_loss": self.best_val_loss}
|
||||
torch.save(ckpt, os.path.join(ckpt_dir, f"checkpoint_epoch_{epoch}.pt"))
|
||||
if is_best:
|
||||
torch.save(ckpt, os.path.join(ckpt_dir, "best_model.pt"))
|
||||
54
models/SiaN/src/utils.py
Normal file
54
models/SiaN/src/utils.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
config = {
|
||||
"data_dir": r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
|
||||
"image_size": (256, 256),
|
||||
"batch_size": 32,
|
||||
"train_split": 0.8,
|
||||
"num_workers": 0,
|
||||
"epochs": 100,
|
||||
"learning_rate": 2e-4,
|
||||
"dropout_rate": 0.3,
|
||||
"backbone": "resnet18",
|
||||
"output_dir": r"C:\Users\admin\Projects\autopilot\models\SiaN\runs",
|
||||
}
|
||||
|
||||
|
||||
def get_camera_matrix(w, h):
|
||||
return np.array([[w / 2, 0, w / 2], [0, h / 2, h / 2], [0, 0, 1]], dtype=np.float32)
|
||||
|
||||
|
||||
def generate_random_homography_params(angle_range=10, translation_range=0.1, scale_range=(0.9, 1.1)):
|
||||
scale = np.random.uniform(*scale_range)
|
||||
tx = np.random.uniform(-translation_range, translation_range)
|
||||
ty = np.random.uniform(-translation_range, translation_range)
|
||||
rx = np.radians(np.random.uniform(-angle_range, angle_range))
|
||||
ry = np.radians(np.random.uniform(-angle_range, angle_range))
|
||||
rz = np.radians(np.random.uniform(-angle_range, angle_range))
|
||||
return np.array([rx, ry, rz, tx, ty, scale])
|
||||
|
||||
|
||||
def homography_params_to_matrix(params, K):
|
||||
rx, ry, rz, tx, ty, scale = params
|
||||
cy, sy = np.cos(rz), np.sin(rz)
|
||||
cp, sp = np.cos(ry), np.sin(ry)
|
||||
cr, sr = np.cos(rx), np.sin(rx)
|
||||
Rz = np.array([[cy, -sy, 0], [sy, cy, 0], [0, 0, 1]], dtype=np.float32)
|
||||
Ry = np.array([[cp, 0, sp], [0, 1, 0], [-sp, 0, cp]], dtype=np.float32)
|
||||
Rx = np.array([[1, 0, 0], [0, cr, -sr], [0, sr, cr]], dtype=np.float32)
|
||||
T = np.array([[1, 0, tx], [0, 1, ty], [0, 0, scale]], dtype=np.float32)
|
||||
return K @ Rx @ Ry @ Rz @ T @ np.linalg.inv(K)
|
||||
|
||||
|
||||
def matrix_to_homography_params(H, K):
|
||||
K_inv = np.linalg.inv(K)
|
||||
E = K_inv @ H @ K
|
||||
scale = np.sqrt(np.linalg.det(E[:2, :2]))
|
||||
R = E[:2, :2] / scale
|
||||
tx, ty = E[0, 2], E[1, 2]
|
||||
rz = np.arctan2(R[1, 0], R[0, 0])
|
||||
r20, r21 = E[2, 0], E[2, 1]
|
||||
ry = np.arctan2(r20, r21)
|
||||
rx = np.arctan2(-E[1, 2], E[1, 1])
|
||||
return np.array([rx, ry, rz, tx, ty, scale], dtype=np.float32)
|
||||
Reference in New Issue
Block a user