feat: add similarity model

2026-03-03 21:42:23 +03:00
parent 1de150b386
commit 43cd4222bc
7 changed files with 1801 additions and 0 deletions
--- a/models/SiaN-similarity/README.md
+++ b/models/SiaN-similarity/README.md
@@ -0,0 +1,131 @@
+# SiaN-Similarity: Модель для оценки схожести изображений
+
+Модель для оценки схожести между двумя изображениями 256x256. Возвращает значение от 0 до 1, где 1 означает полную схожесть, 0 - полное различие.
+
+## Архитектура модели
+
+Модель основана на CNN с residual блоками:
+- Общий энкодер для обоих изображений
+- Residual blocks с batch normalization
+- Слой слияния признаков
+- Регрессионная голова с сигмоидой на выходе
+
+## Использование
+
+### Установка зависимостей
+```bash
+pip install torch torchvision pillow
+```
+
+### Быстрый старт
+
+```python
+import torch
+from model import SimilarityCNN
+
+# Создание модели
+model = SimilarityCNN(
+    input_channels=3,
+    hidden_channels=64,
+    num_blocks=4,
+    dropout_rate=0.3,
+    use_batch_norm=True,
+)
+
+# Предсказание схожести
+img1 = torch.randn(1, 3, 256, 256)  # Изображение 1
+img2 = torch.randn(1, 3, 256, 256)  # Изображение 2
+
+similarity = model.predict_similarity(img1, img2)
+print(f"Схожесть: {similarity.item():.4f}")
+```
+
+### Обучение модели
+
+```bash
+python train_similarity.py \
+    --data_dir "путь/к/данным" \
+    --batch_size 32 \
+    --epochs 100 \
+    --learning_rate 2e-4 \
+    --output_dir "runs/similarity"
+```
+
+### Предсказание на новых изображениях
+
+```bash
+python predict.py \
+    --image1 "путь/к/изображению1.png" \
+    --image2 "путь/к/изображению2.png" \
+    --checkpoint "runs/similarity/checkpoints/best_model.pt"
+```
+
+## Структура проекта
+
+```
+SiaN-similarity/
+├── model.py              # Основная модель
+├── dataloader.py         # Даталоадер для обучения
+├── train_similarity.py   # Скрипт для обучения
+├── predict.py            # Скрипт для предсказания
+├── train.py              # Оригинальный тренировочный скрипт
+└── README.md             # Этот файл
+```
+
+## Конфигурация модели
+
+Параметры по умолчанию:
+- `input_channels`: 3 (RGB)
+- `hidden_channels`: 64
+- `num_blocks`: 4
+- `dropout_rate`: 0.3
+- `use_batch_norm`: True
+- `image_size`: (256, 256)
+
+## Формат данных
+
+Модель ожидает изображения размером 256x256 пикселей в формате RGB.
+Для обучения используется датасет с парами изображений и метками схожести.
+
+## Примеры использования
+
+### 1. Создание и тестирование модели
+```python
+from model import create_similarity_model
+
+model = create_similarity_model(
+    model_type="cnn",
+    input_size=(256, 256),
+    hidden_channels=32,
+    num_blocks=3,
+)
+```
+
+### 2. Использование функции потерь
+```python
+from model import SimilarityLoss
+
+loss_fn = SimilarityLoss()
+pred = torch.tensor([[0.8], [0.2]])
+target = torch.tensor([[1.0], [0.0]])
+loss = loss_fn(pred, target)
+```
+
+### 3. Расчет метрик
+```python
+metrics = loss_fn.compute_metrics(pred, target)
+print(f"Accuracy: {metrics['accuracy']:.4f}")
+print(f"F1-score: {metrics['f1']:.4f}")
+```
+
+## Требования
+
+- Python 3.8+
+- PyTorch 1.9+
+- torchvision
+- Pillow
+- numpy
+
+## Лицензия
+
+MIT
--- a/models/SiaN-similarity/dataloader.py
+++ b/models/SiaN-similarity/dataloader.py
@@ -0,0 +1,519 @@
+config = {
+    # Параметры оптимизатора
+    "learning_rate": 2e-4,
+    "beta1": 0.5,
+    "beta2": 0.999,
+    # Параметры обучения
+    "batch_size": 4,
+    "epochs": 100,
+    # Параметры GAN
+    "gan_mode": "vanilla",  # "vanilla", "lsgan", или "wgangp"
+    "lambda_L1": 100.0,  # Вес L1 потерь
+    # Регуляризация
+    "grad_clip": 1.0,
+    # Ранняя остановка
+    "early_stopping_patience": 20,
+    # Выходные данные
+    "output_dir": "runs/gan_training",
+    # Логирование
+    "log_interval": 10,  # Логировать каждые N батчей
+    "save_interval": 5,  # Сохранять чекпоинт каждые N эпох
+    "data_dir": r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
+    "batch_size": 32,
+    "image_size": [256, 256],
+    "train_split": 0.8,
+    "num_workers": 0,
+}
+
+
+import os
+import random
+from typing import Any, Dict, List, Optional, Tuple
+
+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from torch.utils.data import DataLoader, Dataset
+
+
+class YaGoDataset(Dataset):
+    """
+    Dataset for homography estimation between Yandex and Google map image pairs.
+
+    This dataset loads pairs of images (Yandex and Google maps) and provides
+    homography matrices for data augmentation and training.
+    """
+
+    def __init__(
+        self,
+        root_dir: str,
+        transform=None,
+        augment: bool = True,
+        max_samples: Optional[int] = None,
+        image_size: Tuple[int, int] = (700, 700),
+        cache_homographies: bool = True,
+        device: torch.device = None,
+    ):
+        """
+        Initialize the YaGoDataset.
+
+        Args:
+            root_dir: Directory containing image pairs (format: {idx:04d}_google.png, {idx:04d}_yandex.png)
+            transform: Optional torchvision transforms to apply
+            augment: Whether to apply homography-based data augmentation
+            max_samples: Maximum number of samples to load (None for all)
+            image_size: Target size for images (height, width)
+            cache_homographies: Whether to cache generated homography matrices to disk
+        """
+        self.root_dir = root_dir
+        self.transform = transform
+        self.augment = augment
+        self.image_size = image_size
+        self.cache_homographies = cache_homographies
+        self.device = device
+
+        # Find all image pairs
+        self.image_pairs = self._discover_image_pairs()
+
+        if max_samples is not None:
+            self.image_pairs = self.image_pairs[:max_samples]
+
+        print(f"Found {len(self.image_pairs)} image pairs in {root_dir}")
+
+    def _discover_image_pairs(self) -> List[Dict[str, Any]]:
+        """Discover all Google-Yandex image pairs in the dataset directory."""
+        image_pairs = []
+
+        # Get all Google images
+        google_files = [
+            f for f in os.listdir(self.root_dir) if f.endswith("_google.png")
+        ]
+
+        for google_file in sorted(google_files):
+            # Extract index from filename
+            idx_str = google_file.split("_")[0]
+            try:
+                idx = int(idx_str)
+            except ValueError:
+                continue
+
+            # Check if corresponding Yandex image exists
+            yandex_file = f"{idx:04d}_yandex.png"
+            yandex_path = os.path.join(self.root_dir, yandex_file)
+
+            if os.path.exists(yandex_path):
+                image_pairs.append(
+                    {
+                        "idx": idx,
+                        "google_path": os.path.join(self.root_dir, google_file),
+                        "yandex_path": yandex_path,
+                    }
+                )
+
+        return image_pairs
+
+    def __len__(self) -> int:
+        """Return the number of image pairs in the dataset."""
+        return len(self.image_pairs)
+
+    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
+        """
+        Get a sample from the dataset.
+
+        Returns a dictionary with:
+            - 'google_img': Google map image tensor
+            - 'yandex_img': Yandex map image tensor
+            - 'homography': Ground truth homography matrix (3x3)
+            - 'idx': Sample index
+        """
+        pair_info = self.image_pairs[idx]
+        google_path = pair_info["google_path"]
+        yandex_path = pair_info["yandex_path"]
+        same_domain = True
+
+        if np.random.rand() > 0.5:
+            random_idx = np.random.randint(0, len(self))
+            google_path = self.image_pairs[random_idx]["google_path"]
+            same_domain = random_idx == idx
+
+        # Load images
+        yandex_img = Image.open(yandex_path).convert("RGB")
+        google_img = Image.open(google_path).convert("RGB")
+
+        # Resize images to target size
+        google_img = google_img.resize(
+            (self.image_size[1], self.image_size[0]), Image.BILINEAR
+        )
+        yandex_img = yandex_img.resize(
+            (self.image_size[1], self.image_size[0]), Image.BILINEAR
+        )
+
+        # Get or generate homography matrix
+        matrices: Tuple[np.ndarray, np.ndarray, np.ndarray] = (
+            self._get_homography_matrix(pair_info["idx"])
+        )
+
+        # Apply data augmentation if enabled
+        if self.augment:
+            google_img, yandex_img, homography_matrix = self._apply_augmentation(
+                google_img, yandex_img, matrices
+            )
+
+        # Convert images to tensors
+        if self.transform:
+            google_img = self.transform(google_img)
+            yandex_img = self.transform(yandex_img)
+        else:
+            # Default conversion to tensor
+            google_img = (
+                torch.from_numpy(np.array(google_img)).float().permute(2, 0, 1) / 255.0
+            )
+            yandex_img = (
+                torch.from_numpy(np.array(yandex_img)).float().permute(2, 0, 1) / 255.0
+            )
+
+        # Convert homography to tensor
+        if self.augment:
+            homography_tensor = torch.from_numpy(homography_matrix).float()
+        else:
+            homography_tensor = torch.from_numpy(np.eye(3))
+
+        return {
+            "google_img": google_img,
+            "yandex_img": yandex_img,
+            "homography": homography_tensor,
+            "same_domain": same_domain,
+            "idx": torch.tensor(pair_info["idx"], dtype=torch.long),
+        }
+
+    def _get_homography_matrix(
+        self, idx: int
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Get homography matrices for a given index.
+
+        If cached homography exists, load it. Otherwise generate a new one.
+        """
+
+        # Generate new homography matrix
+        homography_matrix_1 = self.generate_random_homography()
+        homography_matrix_2 = self.generate_random_homography()
+        homography_matrix_r = np.linalg.inv(homography_matrix_1) @ homography_matrix_2
+
+        result = (homography_matrix_1, homography_matrix_2, homography_matrix_r)
+
+        return result
+
+    def generate_random_homography(self) -> np.ndarray:
+        """
+        Generate a random homography matrix for data augmentation.
+
+        Returns:
+            np.ndarray: 3x3 homography matrix.
+        """
+        # Generate random affine transformation parameters
+        scale = np.random.uniform(0.8, 1.2)  # scaling factor
+        tx = np.random.uniform(-0.50, 0.50)  # translation in x
+        ty = np.random.uniform(-0.50, 0.50)  # translation in y
+
+        # rotation
+        angle_x = np.random.uniform(np.radians(-10), np.radians(10))
+        angle_y = np.random.uniform(np.radians(-10), np.radians(10))
+        angle_z = np.random.uniform(np.radians(-10), np.radians(10))
+
+        cy, sy = np.cos(angle_z), np.sin(angle_z)
+        cp, sp = np.cos(angle_y), np.sin(angle_y)
+        cr, sr = np.cos(angle_x), np.sin(angle_x)
+
+        Rz = np.array(
+            [
+                [cy, -sy, 0],
+                [sy, cy, 0],
+                [0, 0, 1],
+            ]
+        )
+
+        Ry = np.array(
+            [
+                [cp, 0, sp],
+                [0, 1, 0],
+                [-sp, 0, cp],
+            ]
+        )
+
+        Rx = np.array(
+            [
+                [1, 0, 0],
+                [0, cr, -sr],
+                [0, sr, cr],
+            ]
+        )
+
+        # Create affine transformation matrix
+        T = np.array(
+            [
+                [1, 0, tx],
+                [0, 1, ty],
+                [0, 0, scale],
+            ]
+        )
+
+        K = self.get_camera_matrix()
+
+        return K @ Rx @ Ry @ Rz @ T @ np.linalg.inv(K)
+
+    def get_camera_matrix(self) -> np.ndarray:
+        w, h = config["image_size"]
+
+        K = np.array(
+            [
+                [w / 2, 0, w / 2],
+                [0, h / 2, h / 2],
+                [0, 0, 1],
+            ]
+        )
+
+        return K
+
+    def _apply_augmentation(
+        self,
+        google_img: Image.Image,
+        yandex_img: Image.Image,
+        matrices: Tuple[np.ndarray, np.ndarray, np.ndarray],
+    ) -> Tuple[Image.Image, Image.Image, np.ndarray]:
+        """
+        Apply homography-based data augmentation to image pair.
+
+        Args:
+            google_img: Google map image
+            yandex_img: Yandex map image
+            matrices: homography matrices
+
+        Returns:
+            Tuple of (augmented_google_img, augmented_yandex_img, augmented_homography)
+        """
+        # Combine with base homography
+        combined_homography = matrices[2]
+
+        # Apply augmentation to both images
+        # google_aug = self._apply_homography_to_image(google_img, aug_homography)
+        yandex_aug = self._apply_homography_to_image(yandex_img, matrices[0])
+        google_aug = self._apply_homography_to_image(google_img, matrices[1])
+
+        return google_aug, yandex_aug, combined_homography
+
+    def _apply_homography_to_image(
+        self, img: Image.Image, homography: np.ndarray
+    ) -> Image.Image:
+        """
+        Apply homography transformation to a single image.
+
+        Args:
+            img: PIL Image to transform
+            homography: 3x3 homography matrix
+
+        Returns:
+            Transformed PIL Image
+        """
+        # Convert to numpy array
+        img_np = np.array(img)
+
+        # Get image dimensions
+        h, w = img_np.shape[:2]
+
+        # Apply homography transformation
+        transformed = cv2.warpPerspective(
+            img_np,
+            homography,
+            (w, h),
+            flags=cv2.INTER_LINEAR,
+            # borderMode=cv2.BORDER_REFLECT,
+        )
+
+        # Convert back to PIL Image
+        return Image.fromarray(transformed)
+
+    def get_sample_without_augmentation(self, idx: int) -> Dict[str, Any]:
+        """
+        Get a sample without data augmentation.
+
+        Useful for visualization and evaluation.
+        """
+        pair_info = self.image_pairs[idx]
+
+        # Load images
+        google_img = Image.open(pair_info["google_path"]).convert("RGB")
+        yandex_img = Image.open(pair_info["yandex_path"]).convert("RGB")
+
+        # Resize
+        google_img = google_img.resize(
+            (self.image_size[1], self.image_size[0]), Image.BILINEAR
+        )
+        yandex_img = yandex_img.resize(
+            (self.image_size[1], self.image_size[0]), Image.BILINEAR
+        )
+
+        # Get homography matrix
+        homography_matrix = self._get_homography_matrix(pair_info["idx"])
+
+        return {
+            "google_img": google_img,
+            "yandex_img": yandex_img,
+            "homography": homography_matrix,
+            "idx": pair_info["idx"],
+            "google_path": pair_info["google_path"],
+            "yandex_path": pair_info["yandex_path"],
+        }
+
+
+def create_data_loaders(
+    root_dir: str,
+    batch_size: int = 32,
+    train_split: float = 0.8,
+    num_workers: int = 4,
+    image_size: Tuple[int, int] = (256, 256),
+    augment_train: bool = True,
+    augment_val: bool = False,
+    device: torch.device = None,
+) -> Tuple[DataLoader, DataLoader]:
+    """
+    Create train and validation data loaders for homography estimation.
+
+    Args:
+        root_dir: Directory containing image pairs
+        batch_size: Batch size for data loaders
+        train_split: Fraction of data to use for training
+        num_workers: Number of worker processes for data loading
+        image_size: Target image size (height, width)
+        augment_train: Whether to augment training data
+        augment_val: Whether to augment validation data
+        device: Target device for tensors (optional)
+
+    Returns:
+        Tuple of (train_loader, val_loader)
+    """
+    from torchvision import transforms
+
+    # Define transforms
+    transform = transforms.Compose(
+        [
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ]
+    )
+
+    # Create full dataset
+    full_dataset = YaGoDataset(
+        root_dir=root_dir,
+        transform=transform,
+        augment=False,  # We'll handle augmentation separately
+        image_size=image_size,
+        cache_homographies=True,
+        device=device,
+    )
+
+    # Split dataset
+    dataset_size = len(full_dataset)
+    train_size = int(train_split * dataset_size)
+    val_size = dataset_size - train_size
+
+    # Create indices for splitting
+    indices = list(range(dataset_size))
+    random.shuffle(indices)
+    train_indices = indices[:train_size]
+    val_indices = indices[train_size:]
+
+    # Create subset samplers
+    from torch.utils.data import Subset
+
+    train_dataset = Subset(full_dataset, train_indices)
+    val_dataset = Subset(full_dataset, val_indices)
+
+    # Apply augmentation by overriding __getitem__ for train dataset
+    if augment_train:
+
+        class AugmentedSubset(Subset):
+            def __init__(self, dataset, indices, device=None):
+                super().__init__(dataset, indices)
+                self.device = device
+
+            def __getitem__(self, idx):
+                sample = self.dataset[self.indices[idx]]
+                # Apply augmentation
+                google_img = sample["google_img"]
+                yandex_img = sample["yandex_img"]
+                homography = sample["homography"]
+
+                if self.device is not None:
+                    google_img = google_img.to(self.device)
+                    yandex_img = yandex_img.to(self.device)
+                    homography = homography.to(self.device)
+
+                # Generate augmentation homography
+                aug_homography = torch.from_numpy(
+                    full_dataset.generate_random_homography()
+                ).float()
+
+                if self.device is not None:
+                    aug_homography = aug_homography.to(self.device)
+
+                # Combine homographies
+                combined_homography = aug_homography @ homography
+
+                # Apply augmentation (simplified - in practice would warp images)
+                # For now, we just return the combined homography
+                return {
+                    "google_img": google_img,
+                    "yandex_img": yandex_img,
+                    "homography": combined_homography,
+                    "idx": sample["idx"],
+                }
+
+        train_dataset = AugmentedSubset(full_dataset, train_indices, device=device)
+
+    # Create data loaders
+    train_loader = DataLoader(
+        train_dataset,
+        batch_size=batch_size,
+        shuffle=True,
+        num_workers=num_workers,
+        pin_memory=True,
+    )
+
+    val_loader = DataLoader(
+        val_dataset,
+        batch_size=batch_size,
+        shuffle=False,
+        num_workers=num_workers,
+        pin_memory=True,
+    )
+
+    return train_loader, val_loader
+
+
+# Example usage
+dataset = YaGoDataset(
+    root_dir=config["data_dir"],
+    augment=False,
+    image_size=(256, 256),
+)
+
+print(f"Dataset size: {len(dataset)}")
+
+# Get a sample
+sample = dataset[0]
+print(f"Sample keys: {list(sample.keys())}")
+print(f"Google image shape: {sample['google_img'].shape}")
+print(f"Yandex image shape: {sample['yandex_img'].shape}")
+print(f"Homography shape: {sample['homography'].shape}")
+
+# Create data loaders
+train_loader, val_loader = create_data_loaders(
+    root_dir=config["data_dir"],
+    batch_size=16,
+    train_split=0.8,
+)
+
+print(f"Train batches: {len(train_loader)}")
+print(f"Val batches: {len(val_loader)}")
--- a/models/SiaN-similarity/demo.py
+++ b/models/SiaN-similarity/demo.py
@@ -0,0 +1,192 @@
+"""
+Демонстрационный скрипт для модели оценки схожести изображений.
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from model import SimilarityCNN, SimilarityLoss
+from PIL import Image, ImageDraw, ImageFont
+from torchvision import transforms
+
+
+def create_test_images():
+    """Создание тестовых изображений для демонстрации."""
+    images = []
+
+    # Изображение 1: Красный квадрат
+    img1 = Image.new("RGB", (256, 256), color="white")
+    draw = ImageDraw.Draw(img1)
+    draw.rectangle([50, 50, 200, 200], fill="red", outline="black", width=2)
+    images.append(("Красный квадрат", img1))
+
+    # Изображение 2: Тот же красный квадрат (похожее)
+    img2 = Image.new("RGB", (256, 256), color="white")
+    draw = ImageDraw.Draw(img2)
+    draw.rectangle([55, 55, 205, 205], fill="red", outline="black", width=2)
+    images.append(("Похожий красный квадрат", img2))
+
+    # Изображение 3: Синий круг (разное)
+    img3 = Image.new("RGB", (256, 256), color="white")
+    draw = ImageDraw.Draw(img3)
+    draw.ellipse([50, 50, 200, 200], fill="blue", outline="black", width=2)
+    images.append(("Синий круг", img3))
+
+    # Изображение 4: Зеленый треугольник (разное)
+    img4 = Image.new("RGB", (256, 256), color="white")
+    draw = ImageDraw.Draw(img4)
+    draw.polygon(
+        [(128, 50), (50, 200), (200, 200)], fill="green", outline="black", width=2
+    )
+    images.append(("Зеленый треугольник", img4))
+
+    return images
+
+
+def preprocess_image(image):
+    """Преобразование PIL Image в тензор."""
+    transform = transforms.Compose(
+        [
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ]
+    )
+    return transform(image).unsqueeze(0)  # Добавляем batch dimension
+
+
+def display_results(images, similarities):
+    """Отображение результатов сравнения."""
+    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
+    axes = axes.flatten()
+
+    for idx, (title, img) in enumerate(images):
+        ax = axes[idx]
+        ax.imshow(img)
+        ax.set_title(title, fontsize=12, fontweight="bold")
+        ax.axis("off")
+
+    plt.suptitle("Тестовые изображения", fontsize=16, fontweight="bold")
+    plt.tight_layout()
+    plt.show()
+
+    # Вывод результатов сравнения
+    print("\n" + "=" * 60)
+    print("РЕЗУЛЬТАТЫ СРАВНЕНИЯ ИЗОБРАЖЕНИЙ")
+    print("=" * 60)
+
+    comparisons = [
+        ("Красный квадрат", "Похожий красный квадрат"),
+        ("Красный квадрат", "Синий круг"),
+        ("Красный квадрат", "Зеленый треугольник"),
+        ("Похожий красный квадрат", "Синий круг"),
+    ]
+
+    for i, (name1, name2) in enumerate(comparisons):
+        idx1 = [idx for idx, (name, _) in enumerate(images) if name == name1][0]
+        idx2 = [idx for idx, (name, _) in enumerate(images) if name == name2][0]
+
+        sim = similarities[idx1, idx2]
+        interpretation = "ПОХОЖИ" if sim > 0.5 else "РАЗНЫЕ"
+
+        print(f"\n{name1} vs {name2}:")
+        print(f"  Схожесть: {sim:.4f}")
+        print(f"  Интерпретация: {interpretation}")
+        print(f"  Уверенность: {'Высокая' if sim > 0.7 or sim < 0.3 else 'Средняя'}")
+
+
+def test_loss_function():
+    """Тестирование функции потерь."""
+    print("\n" + "=" * 60)
+    print("ТЕСТИРОВАНИЕ ФУНКЦИИ ПОТЕРЬ")
+    print("=" * 60)
+
+    loss_fn = SimilarityLoss()
+
+    # Тестовые данные
+    predictions = torch.tensor([[0.9], [0.1], [0.7], [0.3]])
+    targets = torch.tensor([[1.0], [0.0], [1.0], [0.0]])
+
+    # Расчет потерь
+    loss = loss_fn(predictions, targets)
+    print(f"\nПотери: {loss.item():.4f}")
+
+    # Расчет метрик
+    metrics = loss_fn.compute_metrics(predictions, targets)
+    print("\nМетрики:")
+    for key, value in metrics.items():
+        print(f"  {key}: {value:.4f}")
+
+
+def main():
+    """Основная функция демонстрации."""
+    print("ДЕМОНСТРАЦИЯ МОДЕЛИ ОЦЕНКИ СХОЖЕСТИ ИЗОБРАЖЕНИЙ")
+    print("=" * 60)
+
+    # Создание модели
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"\nУстройство: {device}")
+
+    model = SimilarityCNN(
+        input_channels=3,
+        hidden_channels=64,
+        num_blocks=4,
+        dropout_rate=0.3,
+        use_batch_norm=True,
+    ).to(device)
+
+    print(f"Параметры модели: {sum(p.numel() for p in model.parameters()):,}")
+
+    # Создание тестовых изображений
+    print("\nСоздание тестовых изображений...")
+    test_images = create_test_images()
+
+    # Преобразование изображений в тензоры
+    tensors = []
+    for name, img in test_images:
+        tensor = preprocess_image(img).to(device)
+        tensors.append(tensor)
+
+    # Расчет схожести между всеми парами изображений
+    print("\nРасчет схожести между изображениями...")
+    n_images = len(test_images)
+    similarity_matrix = np.zeros((n_images, n_images))
+
+    model.eval()
+    with torch.no_grad():
+        for i in range(n_images):
+            for j in range(n_images):
+                if i <= j:  # Рассчитываем только верхний треугольник
+                    sim = model.predict_similarity(tensors[i], tensors[j])
+                    similarity_matrix[i, j] = sim.item()
+                    similarity_matrix[j, i] = sim.item()  # Симметричная матрица
+
+    # Отображение результатов
+    display_results(test_images, similarity_matrix)
+
+    # Тестирование функции потерь
+    test_loss_function()
+
+    # Дополнительная информация
+    print("\n" + "=" * 60)
+    print("ИНФОРМАЦИЯ О МОДЕЛИ")
+    print("=" * 60)
+    print("\nАрхитектура модели:")
+    print("-" * 40)
+    print("Вход: два изображения 256x256x3")
+    print("Энкодер: CNN с residual блоками")
+    print("Слой слияния: объединение признаков")
+    print("Выход: значение схожести [0, 1]")
+    print("\nИнтерпретация результатов:")
+    print("- 0.8-1.0: Очень похожи")
+    print("- 0.6-0.8: Похожи")
+    print("- 0.4-0.6: Нейтрально")
+    print("- 0.2-0.4: Разные")
+    print("- 0.0-0.2: Совершенно разные")
+
+    print("\n" + "=" * 60)
+    print("ДЕМОНСТРАЦИЯ ЗАВЕРШЕНА")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()
--- a/models/SiaN-similarity/example.py
+++ b/models/SiaN-similarity/example.py
@@ -0,0 +1,216 @@
+"""
+Пример использования модели оценки схожести с даталоадером.
+"""
+
+import torch
+from dataloader import YaGoDataset, create_data_loaders
+from model import SimilarityCNN, SimilarityLoss
+
+
+def main():
+    """Основной пример использования."""
+    print("ПРИМЕР ИСПОЛЬЗОВАНИЯ МОДЕЛИ СХОЖЕСТИ С ДАТАЛОАДЕРОМ")
+    print("=" * 60)
+
+    # Конфигурация
+    config = {
+        "data_dir": r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
+        "batch_size": 4,
+        "image_size": (256, 256),
+        "train_split": 0.8,
+        "num_workers": 0,
+    }
+
+    # Устройство
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Устройство: {device}")
+
+    # 1. Создание датасета
+    print("\n1. СОЗДАНИЕ ДАТАСЕТА")
+    print("-" * 40)
+
+    dataset = YaGoDataset(
+        root_dir=config["data_dir"],
+        augment=False,
+        image_size=config["image_size"],
+    )
+
+    print(f"Размер датасета: {len(dataset)} пар изображений")
+
+    # Получение примера из датасета
+    sample = dataset[0]
+    print(f"\nПример из датасета:")
+    print(f"  Google image shape: {sample['google_img'].shape}")
+    print(f"  Yandex image shape: {sample['yandex_img'].shape}")
+    print(f"  Same domain: {sample['same_domain']}")
+    print(f"  Index: {sample['idx'].item()}")
+
+    # 2. Создание даталоадеров
+    print("\n2. СОЗДАНИЕ ДАТАЛОАДЕРОВ")
+    print("-" * 40)
+
+    train_loader, val_loader = create_data_loaders(
+        root_dir=config["data_dir"],
+        batch_size=config["batch_size"],
+        train_split=config["train_split"],
+        num_workers=config["num_workers"],
+        image_size=config["image_size"],
+        augment_train=True,
+        augment_val=False,
+        device=device,
+    )
+
+    print(f"Train batches: {len(train_loader)}")
+    print(f"Val batches: {len(val_loader)}")
+
+    # 3. Создание модели
+    print("\n3. СОЗДАНИЕ МОДЕЛИ")
+    print("-" * 40)
+
+    model = SimilarityCNN(
+        input_channels=3,
+        hidden_channels=64,
+        num_blocks=4,
+        dropout_rate=0.3,
+        use_batch_norm=True,
+    ).to(device)
+
+    print(f"Параметры модели: {sum(p.numel() for p in model.parameters()):,}")
+
+    # 4. Тестирование на одном батче
+    print("\n4. ТЕСТИРОВАНИЕ НА ОДНОМ БАТЧЕ")
+    print("-" * 40)
+
+    # Получаем батч из train_loader
+    for batch in train_loader:
+        google_img = batch["google_img"].to(device)
+        yandex_img = batch["yandex_img"].to(device)
+        same_domain = batch["same_domain"].float().to(device).unsqueeze(1)
+
+        print(f"Batch size: {google_img.shape[0]}")
+        print(f"Image shape: {google_img.shape[1:]}")
+        print(f"Same domain labels: {same_domain.squeeze().tolist()}")
+
+        # Предсказание схожести
+        with torch.no_grad():
+            predictions = model.predict_similarity(google_img, yandex_img)
+            print(f"\nПредсказания схожести:")
+            for i in range(len(predictions)):
+                print(
+                    f"  Sample {i}: {predictions[i].item():.4f} (target: {same_domain[i].item():.1f})"
+                )
+
+        # Расчет потерь
+        loss_fn = SimilarityLoss().to(device)
+        loss = loss_fn(predictions, same_domain)
+        print(f"\nПотери на батче: {loss.item():.4f}")
+
+        # Расчет метрик
+        metrics = loss_fn.compute_metrics(predictions, same_domain)
+        print("\nМетрики на батче:")
+        for key, value in metrics.items():
+            print(f"  {key}: {value:.4f}")
+
+        break  # Только первый батч
+
+    # 5. Обучение на одном эпохе (демонстрация)
+    print("\n5. ДЕМОНСТРАЦИЯ ОБУЧЕНИЯ НА ОДНОЙ ЭПОХЕ")
+    print("-" * 40)
+
+    optimizer = torch.optim.Adam(model.parameters(), lr=2e-4)
+    model.train()
+
+    total_loss = 0
+    total_samples = 0
+
+    for batch_idx, batch in enumerate(train_loader):
+        if batch_idx >= 3:  # Ограничиваем 3 батчами для демонстрации
+            break
+
+        google_img = batch["google_img"].to(device)
+        yandex_img = batch["yandex_img"].to(device)
+        same_domain = batch["same_domain"].float().to(device).unsqueeze(1)
+
+        optimizer.zero_grad()
+
+        predictions = model(google_img, yandex_img)
+        loss = loss_fn(predictions, same_domain)
+
+        loss.backward()
+        optimizer.step()
+
+        batch_loss = loss.item() * google_img.size(0)
+        total_loss += batch_loss
+        total_samples += google_img.size(0)
+
+        print(f"Batch {batch_idx + 1}: loss = {loss.item():.4f}")
+
+    avg_loss = total_loss / total_samples
+    print(f"\nСредние потери за 3 батча: {avg_loss:.4f}")
+
+    # 6. Валидация
+    print("\n6. ВАЛИДАЦИЯ")
+    print("-" * 40)
+
+    model.eval()
+    val_loss = 0
+    val_samples = 0
+
+    with torch.no_grad():
+        for batch_idx, batch in enumerate(val_loader):
+            if batch_idx >= 2:  # Ограничиваем 2 батчами для демонстрации
+                break
+
+            google_img = batch["google_img"].to(device)
+            yandex_img = batch["yandex_img"].to(device)
+            same_domain = batch["same_domain"].float().to(device).unsqueeze(1)
+
+            predictions = model.predict_similarity(google_img, yandex_img)
+            loss = loss_fn(predictions, same_domain)
+
+            val_loss += loss.item() * google_img.size(0)
+            val_samples += google_img.size(0)
+
+            print(f"Val batch {batch_idx + 1}: loss = {loss.item():.4f}")
+
+    avg_val_loss = val_loss / val_samples
+    print(f"\nСредние потери на валидации: {avg_val_loss:.4f}")
+
+    # 7. Пример использования для отдельных изображений
+    print("\n7. ПРИМЕР ДЛЯ ОТДЕЛЬНЫХ ИЗОБРАЖЕНИЙ")
+    print("-" * 40)
+
+    # Берем два примера из датасета
+    sample1 = dataset[0]
+    sample2 = dataset[1]
+
+    # Подготавливаем тензоры
+    img1_1 = sample1["google_img"].unsqueeze(0).to(device)
+    img1_2 = sample1["yandex_img"].unsqueeze(0).to(device)
+
+    img2_1 = sample2["google_img"].unsqueeze(0).to(device)
+    img2_2 = sample2["yandex_img"].unsqueeze(0).to(device)
+
+    # Предсказания
+    with torch.no_grad():
+        # Сравнение пар из одного домена
+        sim_same1 = model.predict_similarity(img1_1, img1_2)
+        sim_same2 = model.predict_similarity(img2_1, img2_2)
+
+        # Сравнение пар из разных доменов
+        sim_diff1 = model.predict_similarity(img1_1, img2_2)
+        sim_diff2 = model.predict_similarity(img2_1, img1_2)
+
+    print("Сравнение пар изображений:")
+    print(f"  Пара 1 (один домен): {sim_same1.item():.4f}")
+    print(f"  Пара 2 (один домен): {sim_same2.item():.4f}")
+    print(f"  Разные домены 1: {sim_diff1.item():.4f}")
+    print(f"  Разные домены 2: {sim_diff2.item():.4f}")
+
+    print("\n" + "=" * 60)
+    print("ПРИМЕР ЗАВЕРШЕН")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()
--- a/models/SiaN-similarity/model.py
+++ b/models/SiaN-similarity/model.py
@@ -0,0 +1,322 @@
+from typing import Optional, Tuple
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class SimilarityCNN(nn.Module):
+    """
+    CNN model for similarity estimation between two images.
+
+    Takes two images as input and outputs a similarity score between 0 and 1.
+    """
+
+    def __init__(
+        self,
+        input_channels: int = 3,
+        hidden_channels: int = 64,
+        num_blocks: int = 4,
+        dropout_rate: float = 0.3,
+        use_batch_norm: bool = True,
+    ):
+        super().__init__()
+
+        self.input_channels = input_channels
+        self.hidden_channels = hidden_channels
+        self.num_blocks = num_blocks
+        self.dropout_rate = dropout_rate
+        self.use_batch_norm = use_batch_norm
+
+        self.encoder = self._build_encoder()
+
+        self.fusion_layers = self._build_fusion_layers()
+
+        self.regression_head = self._build_regression_head()
+
+        self._initialize_weights()
+
+    def _build_encoder(self) -> nn.Module:
+        layers = []
+        in_channels = self.input_channels
+        out_channels = self.hidden_channels
+
+        layers.append(
+            nn.Conv2d(in_channels, out_channels, kernel_size=7, stride=2, padding=3)
+        )
+        if self.use_batch_norm:
+            layers.append(nn.BatchNorm2d(out_channels))
+        layers.append(nn.ReLU(inplace=True))
+        layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
+
+        for i in range(self.num_blocks):
+            block_in_channels = out_channels
+            block_out_channels = out_channels * 2 if i < 2 else out_channels
+
+            layers.append(
+                ResidualBlock(
+                    in_channels=block_in_channels,
+                    out_channels=block_out_channels,
+                    stride=1 if i == 0 else 2,
+                    dropout_rate=self.dropout_rate,
+                    use_batch_norm=self.use_batch_norm,
+                )
+            )
+
+            if i < 2:
+                out_channels = block_out_channels
+
+        return nn.Sequential(*layers)
+
+    def _build_fusion_layers(self) -> nn.Module:
+        fused_channels = self.hidden_channels * 8
+
+        layers = [
+            nn.Conv2d(
+                fused_channels, self.hidden_channels * 4, kernel_size=3, padding=1
+            ),
+            nn.BatchNorm2d(self.hidden_channels * 4)
+            if self.use_batch_norm
+            else nn.Identity(),
+            nn.ReLU(inplace=True),
+            nn.Dropout2d(self.dropout_rate),
+            nn.Conv2d(
+                self.hidden_channels * 4,
+                self.hidden_channels * 2,
+                kernel_size=3,
+                padding=1,
+            ),
+            nn.BatchNorm2d(self.hidden_channels * 2)
+            if self.use_batch_norm
+            else nn.Identity(),
+            nn.ReLU(inplace=True),
+            nn.Dropout2d(self.dropout_rate),
+            nn.AdaptiveAvgPool2d((1, 1)),
+        ]
+
+        return nn.Sequential(*layers)
+
+    def _build_regression_head(self) -> nn.Module:
+        input_features = self.hidden_channels * 2
+
+        layers = [
+            nn.Flatten(),
+            nn.Linear(input_features, 512),
+            nn.BatchNorm1d(512) if self.use_batch_norm else nn.Identity(),
+            nn.ReLU(inplace=True),
+            nn.Dropout(self.dropout_rate),
+            nn.Linear(512, 256),
+            nn.BatchNorm1d(256) if self.use_batch_norm else nn.Identity(),
+            nn.ReLU(inplace=True),
+            nn.Dropout(self.dropout_rate),
+            nn.Linear(256, 128),
+            nn.BatchNorm1d(128) if self.use_batch_norm else nn.Identity(),
+            nn.ReLU(inplace=True),
+            nn.Dropout(self.dropout_rate),
+            nn.Linear(128, 1),
+            nn.Sigmoid(),
+        ]
+
+        return nn.Sequential(*layers)
+
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                nn.init.constant_(m.bias, 0)
+
+    def forward(
+        self,
+        img1: torch.Tensor,
+        img2: torch.Tensor,
+    ) -> torch.Tensor:
+        features1 = self.encoder(img1)
+        features2 = self.encoder(img2)
+
+        combined_features = torch.cat([features1, features2], dim=1)
+
+        fused_features = self.fusion_layers(combined_features)
+
+        similarity = self.regression_head(fused_features)
+
+        return similarity
+
+    def predict_similarity(
+        self,
+        img1: torch.Tensor,
+        img2: torch.Tensor,
+    ) -> torch.Tensor:
+        original_training = self.training
+        self.eval()
+        with torch.no_grad():
+            similarity = self.forward(img1, img2)
+            if original_training:
+                self.train()
+            return similarity
+
+
+class ResidualBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        stride: int = 1,
+        dropout_rate: float = 0.3,
+        use_batch_norm: bool = True,
+    ):
+        super().__init__()
+
+        self.conv1 = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias=False,
+        )
+        self.bn1 = nn.BatchNorm2d(out_channels) if use_batch_norm else nn.Identity()
+        self.relu1 = nn.ReLU(inplace=True)
+        self.dropout1 = (
+            nn.Dropout2d(dropout_rate) if dropout_rate > 0 else nn.Identity()
+        )
+
+        self.conv2 = nn.Conv2d(
+            out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False
+        )
+        self.bn2 = nn.BatchNorm2d(out_channels) if use_batch_norm else nn.Identity()
+        self.relu2 = nn.ReLU(inplace=True)
+        self.dropout2 = (
+            nn.Dropout2d(dropout_rate) if dropout_rate > 0 else nn.Identity()
+        )
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_channels != out_channels:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(
+                    in_channels, out_channels, kernel_size=1, stride=stride, bias=False
+                ),
+                nn.BatchNorm2d(out_channels) if use_batch_norm else nn.Identity(),
+            )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        identity = self.shortcut(x)
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu1(out)
+        out = self.dropout1(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        out += identity
+        out = self.relu2(out)
+        out = self.dropout2(out)
+
+        return out
+
+
+class SimilarityLoss(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.criterion = nn.BCELoss()
+
+    def forward(
+        self,
+        pred_similarity: torch.Tensor,
+        target_same: torch.Tensor,
+    ) -> torch.Tensor:
+        return self.criterion(pred_similarity, target_same)
+
+    def compute_metrics(
+        self,
+        pred_similarity: torch.Tensor,
+        target_same: torch.Tensor,
+        threshold: float = 0.5,
+    ) -> dict:
+        with torch.no_grad():
+            pred_binary = (pred_similarity > threshold).float()
+            target_binary = (target_same > 0.5).float()
+
+            correct = (pred_binary == target_binary).float()
+            accuracy = correct.mean().item()
+
+            tp = ((pred_binary == 1) & (target_binary == 1)).float().sum().item()
+            fp = ((pred_binary == 1) & (target_binary == 0)).float().sum().item()
+            fn = ((pred_binary == 0) & (target_binary == 1)).float().sum().item()
+            tn = ((pred_binary == 0) & (target_binary == 0)).float().sum().item()
+
+            precision = tp / (tp + fp + 1e-8)
+            recall = tp / (tp + fn + 1e-8)
+            f1 = 2 * precision * recall / (precision + recall + 1e-8)
+
+            return {
+                "accuracy": accuracy,
+                "precision": precision,
+                "recall": recall,
+                "f1": f1,
+                "mean_similarity": pred_similarity.mean().item(),
+            }
+
+
+def create_similarity_model(
+    model_type: str = "cnn",
+    input_size: Tuple[int, int] = (256, 256),
+    **kwargs,
+) -> nn.Module:
+    if model_type == "cnn":
+        return SimilarityCNN(**kwargs)
+    else:
+        raise ValueError(f"Unknown model type: {model_type}")
+
+
+if __name__ == "__main__":
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Using device: {device}")
+
+    model = SimilarityCNN(
+        input_channels=3,
+        hidden_channels=64,
+        num_blocks=4,
+        dropout_rate=0.3,
+        use_batch_norm=True,
+    ).to(device)
+
+    print(
+        f"Model created with {sum(p.numel() for p in model.parameters()):,} parameters"
+    )
+
+    batch_size = 4
+    height, width = 256, 256
+
+    img1 = torch.randn(batch_size, 3, height, width).to(device)
+    img2 = torch.randn(batch_size, 3, height, width).to(device)
+
+    print("\nTesting forward pass...")
+    output = model(img1, img2)
+    print(f"Output shape: {output.shape}")
+    print(f"Sample output: {output[0].item():.4f}")
+
+    print("\nTesting prediction...")
+    pred = model.predict_similarity(img1, img2)
+    print(f"Prediction shape: {pred.shape}")
+
+    print("\nTesting loss function...")
+    target = torch.rand(batch_size, 1).to(device)
+    loss_fn = SimilarityLoss().to(device)
+    loss = loss_fn(output, target)
+    print(f"Loss value: {loss.item():.6f}")
+
+    print("\nTesting metrics...")
+    metrics = loss_fn.compute_metrics(output, target)
+    for key, value in metrics.items():
+        print(f"{key}: {value:.6f}")
+
+    print("\nAll tests completed successfully!")
--- a/models/SiaN-similarity/predict.py
+++ b/models/SiaN-similarity/predict.py
@@ -0,0 +1,146 @@
+"""
+Script for predicting similarity between two images.
+"""
+
+import argparse
+import os
+from pathlib import Path
+
+import torch
+from model import SimilarityCNN
+from PIL import Image
+from torchvision import transforms
+
+
+def load_image(image_path: str, image_size: tuple = (256, 256)) -> torch.Tensor:
+    """Load and preprocess image."""
+    transform = transforms.Compose(
+        [
+            transforms.Resize(image_size),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ]
+    )
+
+    image = Image.open(image_path).convert("RGB")
+    return transform(image).unsqueeze(0)  # Add batch dimension
+
+
+def predict_similarity(
+    model: SimilarityCNN,
+    image1_path: str,
+    image2_path: str,
+    device: torch.device,
+    image_size: tuple = (256, 256),
+) -> float:
+    """Predict similarity between two images."""
+    model.eval()
+
+    img1 = load_image(image1_path, image_size).to(device)
+    img2 = load_image(image2_path, image_size).to(device)
+
+    with torch.no_grad():
+        similarity = model(img1, img2)
+
+    return similarity.item()
+
+
+def load_model(
+    checkpoint_path: str,
+    device: torch.device,
+    **model_kwargs,
+) -> SimilarityCNN:
+    """Load model from checkpoint."""
+    model = SimilarityCNN(**model_kwargs).to(device)
+
+    checkpoint = torch.load(checkpoint_path, map_location=device)
+    model.load_state_dict(checkpoint["model_state_dict"])
+
+    return model
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Predict similarity between two images"
+    )
+    parser.add_argument("--image1", type=str, required=True, help="Path to first image")
+    parser.add_argument(
+        "--image2", type=str, required=True, help="Path to second image"
+    )
+    parser.add_argument(
+        "--checkpoint",
+        type=str,
+        default="runs/similarity/checkpoints/best_model.pt",
+        help="Path to model checkpoint",
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="cuda" if torch.cuda.is_available() else "cpu",
+        help="Device to use for inference",
+    )
+    parser.add_argument(
+        "--image_size",
+        type=int,
+        default=256,
+        help="Image size for model input",
+    )
+
+    args = parser.parse_args()
+
+    device = torch.device(args.device)
+    print(f"Using device: {device}")
+
+    if not os.path.exists(args.image1):
+        print(f"Error: Image not found: {args.image1}")
+        return
+
+    if not os.path.exists(args.image2):
+        print(f"Error: Image not found: {args.image2}")
+        return
+
+    if not os.path.exists(args.checkpoint):
+        print(f"Warning: Checkpoint not found: {args.checkpoint}")
+        print("Using randomly initialized model for demonstration")
+        model = SimilarityCNN(
+            input_channels=3,
+            hidden_channels=64,
+            num_blocks=4,
+            dropout_rate=0.3,
+            use_batch_norm=True,
+        ).to(device)
+    else:
+        print(f"Loading model from: {args.checkpoint}")
+        model = load_model(
+            checkpoint_path=args.checkpoint,
+            device=device,
+            input_channels=3,
+            hidden_channels=64,
+            num_blocks=4,
+            dropout_rate=0.3,
+            use_batch_norm=True,
+        )
+
+    print(
+        f"Model loaded with {sum(p.numel() for p in model.parameters()):,} parameters"
+    )
+
+    similarity = predict_similarity(
+        model=model,
+        image1_path=args.image1,
+        image2_path=args.image2,
+        device=device,
+        image_size=(args.image_size, args.image_size),
+    )
+
+    print(f"\nSimilarity between images:")
+    print(f"  Image 1: {args.image1}")
+    print(f"  Image 2: {args.image2}")
+    print(f"  Similarity score: {similarity:.4f}")
+    print(f"  Interpretation: {'Similar' if similarity > 0.5 else 'Different'}")
+
+    return similarity
+
+
+if __name__ == "__main__":
+    main()
--- a/models/SiaN-similarity/train.py
+++ b/models/SiaN-similarity/train.py
@@ -0,0 +1,275 @@
+"""
+Training script for image similarity estimation.
+"""
+
+import argparse
+import os
+import time
+from datetime import datetime
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from dataloader import create_data_loaders
+from model import SimilarityCNN, SimilarityLoss, create_similarity_model
+from torch.utils.data import DataLoader
+from torch.utils.tensorboard import SummaryWriter
+from tqdm import tqdm
+
+
+class SimilarityTrainer:
+    def __init__(
+        self,
+        model: nn.Module,
+        train_loader: DataLoader,
+        val_loader: DataLoader,
+        device: torch.device,
+        config: dict,
+    ):
+        self.model = model.to(device)
+        self.train_loader = train_loader
+        self.val_loader = val_loader
+        self.device = device
+        self.config = config
+
+        self.criterion = SimilarityLoss()
+        self.optimizer = optim.Adam(
+            model.parameters(),
+            lr=config.get("learning_rate", 2e-4),
+            betas=(config.get("beta1", 0.5), config.get("beta2", 0.999)),
+        )
+
+        self.writer = None
+        self.best_val_loss = float("inf")
+        self.epochs_without_improvement = 0
+
+    def train_epoch(self, epoch: int) -> dict:
+        self.model.train()
+        total_loss = 0
+        total_samples = 0
+
+        pbar = tqdm(self.train_loader, desc=f"Epoch {epoch}")
+        for batch_idx, batch in enumerate(pbar):
+            google_img = batch["google_img"].to(self.device)
+            yandex_img = batch["yandex_img"].to(self.device)
+            target = batch["same_domain"].float().to(self.device).unsqueeze(1)
+
+            self.optimizer.zero_grad()
+
+            output = self.model(google_img, yandex_img)
+            loss = self.criterion(output, target)
+
+            loss.backward()
+            self.optimizer.step()
+
+            total_loss += loss.item() * google_img.size(0)
+            total_samples += google_img.size(0)
+
+            if batch_idx % self.config.get("log_interval", 10) == 0:
+                metrics = self.criterion.compute_metrics(output, target)
+                pbar.set_postfix(
+                    {
+                        "loss": loss.item(),
+                        "acc": metrics["accuracy"],
+                    }
+                )
+
+                if self.writer:
+                    self.writer.add_scalar(
+                        "train/loss",
+                        loss.item(),
+                        epoch * len(self.train_loader) + batch_idx,
+                    )
+                    self.writer.add_scalar(
+                        "train/accuracy",
+                        metrics["accuracy"],
+                        epoch * len(self.train_loader) + batch_idx,
+                    )
+
+        avg_loss = total_loss / total_samples
+        return {"loss": avg_loss}
+
+    def validate(self) -> dict:
+        self.model.eval()
+        total_loss = 0
+        total_samples = 0
+        all_metrics = []
+
+        with torch.no_grad():
+            for batch in tqdm(self.val_loader, desc="Validation"):
+                google_img = batch["google_img"].to(self.device)
+                yandex_img = batch["yandex_img"].to(self.device)
+                target = batch["same_domain"].float().to(self.device).unsqueeze(1)
+
+                output = self.model(google_img, yandex_img)
+                loss = self.criterion(output, target)
+
+                total_loss += loss.item() * google_img.size(0)
+                total_samples += google_img.size(0)
+
+                metrics = self.criterion.compute_metrics(output, target)
+                all_metrics.append(metrics)
+
+        avg_loss = total_loss / total_samples
+
+        avg_metrics = {}
+        for key in all_metrics[0].keys():
+            avg_metrics[key] = sum(m[key] for m in all_metrics) / len(all_metrics)
+
+        return {"loss": avg_loss, **avg_metrics}
+
+    def train(self, num_epochs: int):
+        log_dir = self.config.get("output_dir", "runs/similarity")
+        os.makedirs(log_dir, exist_ok=True)
+        self.writer = SummaryWriter(log_dir)
+
+        print(f"Starting training for {num_epochs} epochs")
+        print(f"Logging to: {log_dir}")
+
+        for epoch in range(1, num_epochs + 1):
+            print(f"\nEpoch {epoch}/{num_epochs}")
+
+            train_metrics = self.train_epoch(epoch)
+            val_metrics = self.validate()
+
+            print(f"Train Loss: {train_metrics['loss']:.4f}")
+            print(f"Val Loss: {val_metrics['loss']:.4f}")
+            print(f"Val Accuracy: {val_metrics['accuracy']:.4f}")
+            print(f"Val F1: {val_metrics['f1']:.4f}")
+
+            if self.writer:
+                self.writer.add_scalar("epoch/train_loss", train_metrics["loss"], epoch)
+                self.writer.add_scalar("epoch/val_loss", val_metrics["loss"], epoch)
+                self.writer.add_scalar(
+                    "epoch/val_accuracy", val_metrics["accuracy"], epoch
+                )
+
+            if val_metrics["loss"] < self.best_val_loss:
+                self.best_val_loss = val_metrics["loss"]
+                self.epochs_without_improvement = 0
+                self.save_checkpoint(epoch, val_metrics["loss"], is_best=True)
+                print(f"New best model saved with val loss: {val_metrics['loss']:.4f}")
+            else:
+                self.epochs_without_improvement += 1
+                self.save_checkpoint(epoch, val_metrics["loss"], is_best=False)
+
+            patience = self.config.get("early_stopping_patience", 20)
+            if self.epochs_without_improvement >= patience:
+                print(
+                    f"Early stopping triggered after {patience} epochs without improvement"
+                )
+                break
+
+        self.writer.close()
+
+    def save_checkpoint(self, epoch: int, val_loss: float, is_best: bool = False):
+        checkpoint_dir = os.path.join(
+            self.config.get("output_dir", "runs/similarity"), "checkpoints"
+        )
+        os.makedirs(checkpoint_dir, exist_ok=True)
+
+        checkpoint = {
+            "epoch": epoch,
+            "model_state_dict": self.model.state_dict(),
+            "optimizer_state_dict": self.optimizer.state_dict(),
+            "val_loss": val_loss,
+            "config": self.config,
+        }
+
+        checkpoint_path = os.path.join(checkpoint_dir, f"checkpoint_epoch_{epoch}.pt")
+        torch.save(checkpoint, checkpoint_path)
+
+        if is_best:
+            best_path = os.path.join(checkpoint_dir, "best_model.pt")
+            torch.save(checkpoint, best_path)
+
+    def load_checkpoint(self, checkpoint_path: str):
+        checkpoint = torch.load(checkpoint_path, map_location=self.device)
+        self.model.load_state_dict(checkpoint["model_state_dict"])
+        self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
+        return checkpoint["epoch"], checkpoint["val_loss"]
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Train similarity estimation model")
+    parser.add_argument(
+        "--data_dir",
+        type=str,
+        default=r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
+    )
+    parser.add_argument("--batch_size", type=int, default=32)
+    parser.add_argument("--epochs", type=int, default=100)
+    parser.add_argument("--learning_rate", type=float, default=2e-4)
+    parser.add_argument("--image_size", type=int, default=256)
+    parser.add_argument("--train_split", type=float, default=0.8)
+    parser.add_argument("--output_dir", type=str, default="runs/similarity")
+    parser.add_argument("--num_workers", type=int, default=0)
+    parser.add_argument(
+        "--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu"
+    )
+
+    args = parser.parse_args()
+
+    config = {
+        "data_dir": args.data_dir,
+        "batch_size": args.batch_size,
+        "epochs": args.epochs,
+        "learning_rate": args.learning_rate,
+        "image_size": (args.image_size, args.image_size),
+        "train_split": args.train_split,
+        "output_dir": args.output_dir,
+        "num_workers": args.num_workers,
+        "log_interval": 10,
+        "save_interval": 5,
+        "early_stopping_patience": 20,
+        "beta1": 0.5,
+        "beta2": 0.999,
+    }
+
+    device = torch.device(args.device)
+    print(f"Using device: {device}")
+
+    print("Creating data loaders...")
+    train_loader, val_loader = create_data_loaders(
+        root_dir=config["data_dir"],
+        batch_size=config["batch_size"],
+        train_split=config["train_split"],
+        num_workers=config["num_workers"],
+        image_size=config["image_size"],
+        augment_train=True,
+        augment_val=False,
+        device=device,
+    )
+
+    print(f"Train batches: {len(train_loader)}")
+    print(f"Val batches: {len(val_loader)}")
+
+    print("Creating model...")
+    model = create_similarity_model(
+        model_type="cnn",
+        input_size=config["image_size"],
+        input_channels=3,
+        hidden_channels=64,
+        num_blocks=4,
+        dropout_rate=0.3,
+        use_batch_norm=True,
+    )
+
+    print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
+
+    trainer = SimilarityTrainer(
+        model=model,
+        train_loader=train_loader,
+        val_loader=val_loader,
+        device=device,
+        config=config,
+    )
+
+    print("Starting training...")
+    trainer.train(config["epochs"])
+
+    print("Training completed!")
+
+
+if __name__ == "__main__":
+    main()