add auto codegen

This commit is contained in:
2026-04-04 21:32:50 +03:00
parent 703ea8dbaf
commit b2cc714d79
12 changed files with 901 additions and 1320 deletions

View File

@@ -0,0 +1,52 @@
import torch
import numpy as np
import matplotlib.pyplot as plt
def analyze_training(trainer):
print("=== Training Analysis ===\n")
if trainer.writer:
print("TensorBoard logs available at:", trainer.writer.log_dir)
print(f"\nBest val loss: {trainer.best_val_loss:.4f}")
trainer.model.eval()
with torch.no_grad():
batch = next(iter(trainer.val_loader))
google_img = batch["google_img"].to(trainer.device)
yandex_img = batch["yandex_img"].to(trainer.device)
target_params = batch["homography_params"].to(trainer.device)
pred_params = trainer.model(google_img, yandex_img)
print(f"\nSample predictions (first 3 of batch):")
print(f"{'Param':<8} {'Target':>12} {'Predicted':>12} {'Error':>12}")
print("-" * 46)
names = ["rx", "ry", "rz", "tx", "ty", "scale"]
for i in range(6):
t = target_params[0, i].item()
p = pred_params[0, i].item()
print(f"{names[i]:<8} {t:>12.4f} {p:>12.4f} {abs(t-p):>12.4f}")
print(f"\nBatch mean abs error: {torch.mean(torch.abs(pred_params - target_params)).item():.4f}")
print("\n=== Visualization ===")
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
img1 = google_img[0].cpu()
img2 = yandex_img[0].cpu()
axes[0].imshow(img1.permute(1, 2, 0))
axes[0].set_title("Google")
axes[0].axis("off")
axes[1].imshow(img2.permute(1, 2, 0))
axes[1].set_title("Yandex")
axes[1].axis("off")
axes[2].bar(names, pred_params[0].cpu().numpy())
axes[2].set_title("Predicted params")
axes[2].axhline(y=0, color="k", lw=0.5)
plt.tight_layout()
plt.savefig("prediction_sample.png")
print("Saved prediction_sample.png")
plt.show()
return {"best_val_loss": trainer.best_val_loss}

View File

@@ -0,0 +1,93 @@
import os
import random
from typing import Tuple
import cv2
import numpy as np
import torch
from PIL import Image
from torch.utils.data import DataLoader, Dataset, Subset
from torchvision import transforms
from .utils import config, get_camera_matrix, generate_random_homography_params, homography_params_to_matrix, matrix_to_homography_params
class YaGoDataset(Dataset):
def __init__(self, root_dir: str, transform=None, augment: bool = True,
image_size: Tuple[int, int] = (256, 256)):
self.root_dir = root_dir
self.transform = transform
self.augment = augment
self.image_size = image_size
self.K = get_camera_matrix(image_size[1], image_size[0])
self.image_pairs = self._discover_image_pairs()
def _discover_image_pairs(self):
pairs = []
for f in os.listdir(self.root_dir):
if f.endswith("_google.png"):
idx = f.split("_")[0]
yandex_path = os.path.join(self.root_dir, f"{idx}_yandex.png")
if os.path.exists(yandex_path):
pairs.append({"idx": int(idx), "google": os.path.join(self.root_dir, f), "yandex": yandex_path})
return sorted(pairs, key=lambda x: x["idx"])
def __len__(self):
return len(self.image_pairs)
def __getitem__(self, idx):
pair = self.image_pairs[idx]
google_img = Image.open(pair["google"]).convert("RGB").resize((self.image_size[1], self.image_size[0]), Image.BILINEAR)
yandex_img = Image.open(pair["yandex"]).convert("RGB").resize((self.image_size[1], self.image_size[0]), Image.BILINEAR)
if self.augment:
params1 = generate_random_homography_params()
params2 = generate_random_homography_params()
H1 = homography_params_to_matrix(params1, self.K)
H2 = homography_params_to_matrix(params2, self.K)
H_combined = np.linalg.inv(H1) @ H2
yandex_img = Image.fromarray(cv2.warpPerspective(np.array(yandex_img), H1, self.image_size))
google_img = Image.fromarray(cv2.warpPerspective(np.array(google_img), H2, self.image_size))
target_params = matrix_to_homography_params(H_combined, self.K)
target_matrix = H_combined
else:
target_params = np.zeros(6, dtype=np.float32)
target_matrix = np.eye(3, dtype=np.float32)
if self.transform:
google_img = self.transform(google_img)
yandex_img = self.transform(yandex_img)
return {
"google_img": google_img,
"yandex_img": yandex_img,
"homography_matrix": torch.from_numpy(target_matrix).float(),
"homography_params": torch.from_numpy(target_params).float(),
}
def create_data_loaders(root_dir, batch_size=32, train_split=0.8, num_workers=0,
image_size=(256, 256), augment_train=True):
transform = transforms.Compose([transforms.ToTensor()])
full_ds = YaGoDataset(root_dir, transform=transform, augment=False, image_size=image_size)
aug_ds = YaGoDataset(root_dir, transform=transform, augment=True, image_size=image_size)
indices = list(range(len(full_ds)))
random.shuffle(indices)
split = int(train_split * len(indices))
train_ds = Subset(aug_ds if augment_train else full_ds, indices[:split])
val_ds = Subset(full_ds, indices[split:])
return (DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True),
DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True))
def get_dataset_info():
ds = YaGoDataset(config["data_dir"], augment=True, image_size=config["image_size"])
return {
"size": len(ds),
"sample_keys": list(ds[0].keys()),
"sample_params": ds[0]["homography_params"].numpy()
}

80
models/SiaN/src/main.py Normal file
View File

@@ -0,0 +1,80 @@
import logging
import torch
from .dataloader import create_data_loaders, get_dataset_info
from .model import HomographyCNN6, count_parameters
from .train import HomographyTrainer
from .analyze import analyze_training
from .utils import config
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")
logger = logging.getLogger(__name__)
def create_dataset():
logger.info("Creating data loaders...")
train_loader, val_loader = create_data_loaders(
root_dir=config["data_dir"],
batch_size=config["batch_size"],
train_split=config["train_split"],
num_workers=config["num_workers"],
image_size=config["image_size"],
)
logger.info(f"Data loaders created: train={len(train_loader.dataset)}, val={len(val_loader.dataset)}")
return train_loader, val_loader
def create_model():
logger.info("Creating model...")
model = HomographyCNN6(
input_channels=3,
backbone_name=config["backbone"],
pretrained=True,
dropout_rate=config["dropout_rate"]
)
logger.info(f"Model created with {count_parameters(model):,} parameters")
return model
def train_model(model, train_loader, val_loader):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.info(f"Using device: {device}")
trainer = HomographyTrainer(model, train_loader, val_loader, device)
logger.info("Starting training...")
trainer.train(config["epochs"])
logger.info("Training completed")
return trainer
def analyze_model(trainer):
logger.info("Analyzing model...")
results = analyze_training(trainer)
logger.info(f"Analysis complete: best_val_loss={results['best_val_loss']:.4f}")
return results
def main():
logger.info("=" * 50)
logger.info("SiaN Training Pipeline")
logger.info("=" * 50)
dataset_info = get_dataset_info()
logger.info(f"Dataset: {dataset_info['size']} samples, keys={dataset_info['sample_keys']}")
train_loader, val_loader = create_dataset()
model = create_model()
trainer = train_model(model, train_loader, val_loader)
results = analyze_model(trainer)
logger.info("=" * 50)
logger.info("Pipeline completed successfully")
logger.info("=" * 50)
return trainer, results
if __name__ == "__main__":
main()

41
models/SiaN/src/model.py Normal file
View File

@@ -0,0 +1,41 @@
import torch
import torch.nn as nn
from torchvision import models
class HomographyCNN6(nn.Module):
def __init__(self, input_channels=3, backbone_name="resnet18", pretrained=True, dropout_rate=0.3):
super().__init__()
backbone = getattr(models, backbone_name)(weights=models.ResNet18_Weights.IMAGENET1K_V1 if pretrained else None)
self.feature_dim = backbone.fc.in_features
backbone.fc = nn.Identity()
self.backbone = backbone
self.head = nn.Sequential(
nn.Linear(self.feature_dim * 4, 512),
nn.ReLU(inplace=True),
nn.Dropout(dropout_rate),
nn.Linear(512, 256),
nn.ReLU(inplace=True),
nn.Dropout(dropout_rate),
nn.Linear(256, 6),
)
def forward(self, img1, img2):
f1 = self.backbone(img1)
f2 = self.backbone(img2)
combined = torch.cat([f1, f2, torch.abs(f1 - f2), f1 * f2], dim=1)
return self.head(combined)
class HomographyLoss6(nn.Module):
def __init__(self):
super().__init__()
self.criterion = nn.MSELoss()
def forward(self, pred, target):
return self.criterion(pred, target)
def count_parameters(model):
return sum(p.numel() for p in model.parameters())

83
models/SiaN/src/train.py Normal file
View File

@@ -0,0 +1,83 @@
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
from .dataloader import create_data_loaders
from .model import HomographyCNN6, HomographyLoss6, count_parameters
from .utils import config
class HomographyTrainer:
def __init__(self, model, train_loader, val_loader, device):
self.model = model.to(device)
self.train_loader = train_loader
self.val_loader = val_loader
self.device = device
self.criterion = HomographyLoss6()
self.optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])
self.writer = None
self.best_val_loss = float("inf")
def train_epoch(self, epoch):
self.model.train()
total_loss, total_samples = 0, 0
pbar = tqdm(self.train_loader, desc=f"Epoch {epoch}")
for batch_idx, batch in enumerate(pbar):
google_img = batch["google_img"].to(self.device)
yandex_img = batch["yandex_img"].to(self.device)
target = batch["homography_params"].to(self.device)
self.optimizer.zero_grad()
output = self.model(google_img, yandex_img)
loss = self.criterion(output, target)
loss.backward()
self.optimizer.step()
total_loss += loss.item() * google_img.size(0)
total_samples += google_img.size(0)
pbar.set_postfix({"loss": loss.item()})
return {"loss": total_loss / total_samples}
def validate(self):
self.model.eval()
total_loss, total_samples = 0, 0
with torch.no_grad():
for batch in tqdm(self.val_loader, desc="Validation"):
google_img = batch["google_img"].to(self.device)
yandex_img = batch["yandex_img"].to(self.device)
target = batch["homography_params"].to(self.device)
output = self.model(google_img, yandex_img)
loss = self.criterion(output, target)
total_loss += loss.item() * google_img.size(0)
total_samples += google_img.size(0)
return {"loss": total_loss / total_samples}
def train(self, num_epochs):
log_dir = config["output_dir"]
os.makedirs(log_dir, exist_ok=True)
self.writer = SummaryWriter(log_dir)
for epoch in range(1, num_epochs + 1):
train_metrics = self.train_epoch(epoch)
val_metrics = self.validate()
print(f"Train Loss: {train_metrics['loss']:.4f}, Val Loss: {val_metrics['loss']:.4f}")
if val_metrics["loss"] < self.best_val_loss:
self.best_val_loss = val_metrics["loss"]
self.save_checkpoint(epoch, is_best=True)
print(f"Best model saved (val loss: {val_metrics['loss']:.4f})")
self.writer.close()
def save_checkpoint(self, epoch, is_best=False):
ckpt_dir = os.path.join(config["output_dir"], "checkpoints")
os.makedirs(ckpt_dir, exist_ok=True)
ckpt = {"epoch": epoch, "model_state_dict": self.model.state_dict(), "val_loss": self.best_val_loss}
torch.save(ckpt, os.path.join(ckpt_dir, f"checkpoint_epoch_{epoch}.pt"))
if is_best:
torch.save(ckpt, os.path.join(ckpt_dir, "best_model.pt"))

54
models/SiaN/src/utils.py Normal file
View File

@@ -0,0 +1,54 @@
import numpy as np
config = {
"data_dir": r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
"image_size": (256, 256),
"batch_size": 32,
"train_split": 0.8,
"num_workers": 0,
"epochs": 100,
"learning_rate": 2e-4,
"dropout_rate": 0.3,
"backbone": "resnet18",
"output_dir": r"C:\Users\admin\Projects\autopilot\models\SiaN\runs",
}
def get_camera_matrix(w, h):
return np.array([[w / 2, 0, w / 2], [0, h / 2, h / 2], [0, 0, 1]], dtype=np.float32)
def generate_random_homography_params(angle_range=10, translation_range=0.1, scale_range=(0.9, 1.1)):
scale = np.random.uniform(*scale_range)
tx = np.random.uniform(-translation_range, translation_range)
ty = np.random.uniform(-translation_range, translation_range)
rx = np.radians(np.random.uniform(-angle_range, angle_range))
ry = np.radians(np.random.uniform(-angle_range, angle_range))
rz = np.radians(np.random.uniform(-angle_range, angle_range))
return np.array([rx, ry, rz, tx, ty, scale])
def homography_params_to_matrix(params, K):
rx, ry, rz, tx, ty, scale = params
cy, sy = np.cos(rz), np.sin(rz)
cp, sp = np.cos(ry), np.sin(ry)
cr, sr = np.cos(rx), np.sin(rx)
Rz = np.array([[cy, -sy, 0], [sy, cy, 0], [0, 0, 1]], dtype=np.float32)
Ry = np.array([[cp, 0, sp], [0, 1, 0], [-sp, 0, cp]], dtype=np.float32)
Rx = np.array([[1, 0, 0], [0, cr, -sr], [0, sr, cr]], dtype=np.float32)
T = np.array([[1, 0, tx], [0, 1, ty], [0, 0, scale]], dtype=np.float32)
return K @ Rx @ Ry @ Rz @ T @ np.linalg.inv(K)
def matrix_to_homography_params(H, K):
K_inv = np.linalg.inv(K)
E = K_inv @ H @ K
scale = np.sqrt(np.linalg.det(E[:2, :2]))
R = E[:2, :2] / scale
tx, ty = E[0, 2], E[1, 2]
rz = np.arctan2(R[1, 0], R[0, 0])
r20, r21 = E[2, 0], E[2, 1]
ry = np.arctan2(r20, r21)
rx = np.arctan2(-E[1, 2], E[1, 1])
return np.array([rx, ry, rz, tx, ty, scale], dtype=np.float32)