autopilot/models/SiaN-similarity/train-adv.py


import os
import time
from datetime import datetime

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

# =============================================================================
# TRAINING LOOP WITH VISUALIZATION
# =============================================================================

class SimilarityTrainer:
    def __init__(
        self,
        model: nn.Module,
        trainloader: DataLoader,
        valloader: DataLoader,
        device: torch.device,
        config: dict,
    ):
        self.model = model.to(device)
        self.trainloader = trainloader
        self.valloader = valloader
        self.device = device
        self.config = config

        self.criterion = SimilarityLoss()
        self.optimizer = optim.Adam(
            model.parameters(),
            lr=config.get('learning_rate', 2e-4),
            betas=(config.get('beta1', 0.5), config.get('beta2', 0.999))
        )

        self.writer = None
        self.best_val_loss = float('inf')
        self.epochs_without_improvement = 0

        # Для хранения истории метрик
        self.history = {
            'train_loss': [],
            'val_loss': [],
            'val_accuracy': [],
            'val_precision': [],
            'val_recall': [],
            'val_f1': [],
            'learning_rate': []
        }

    def train_epoch(self, epoch: int) -> dict:
        """Обучение на одной эпохе"""
        self.model.train()
        total_loss = 0
        total_samples = 0
        all_metrics = []

        pbar = tqdm(self.trainloader, desc=f'Epoch {epoch}')
        for batch_idx, batch in enumerate(pbar):
            google_img = batch['google_img'].to(self.device)
            yandex_img = batch['yandex_img'].to(self.device)
            target = batch['same_domain'].float().to(self.device).unsqueeze(1)

            self.optimizer.zero_grad()

            # Forward pass
            output = self.model(google_img, yandex_img)
            loss = self.criterion(output, target)

            # Backward pass
            loss.backward()

            # Gradient clipping
            if self.config.get('grad_clip', None):
                torch.nn.utils.clip_grad_norm_(
                    self.model.parameters(),
                    self.config['grad_clip']
                )

            self.optimizer.step()

            total_loss += loss.item() * google_img.size(0)
            total_samples += google_img.size(0)

            # Compute metrics
            if batch_idx % self.config.get('log_interval', 10) == 0:
                metrics = self.criterion.compute_metrics(output, target)
                all_metrics.append(metrics)
                pbar.set_postfix({
                    'loss': f"{loss.item():.4f}",
                    'acc': f"{metrics['accuracy']:.4f}"
                })

                if self.writer:
                    global_step = epoch * len(self.trainloader) + batch_idx
                    self.writer.add_scalar('train/loss', loss.item(), global_step)
                    self.writer.add_scalar('train/accuracy', metrics['accuracy'], global_step)

        avg_loss = total_loss / total_samples

        # Average metrics
        if all_metrics:
            avg_metrics = {
                key: sum(m[key] for m in all_metrics) / len(all_metrics)
                for key in all_metrics[0].keys()
            }
        else:
            avg_metrics = {}

        return {'loss': avg_loss, **avg_metrics}

    def validate(self) -> dict:
        """Валидация модели"""
        self.model.eval()
        total_loss = 0
        total_samples = 0
        all_metrics = []

        # Для ROC и confusion matrix
        all_predictions = []
        all_targets = []

        with torch.no_grad():
            for batch in tqdm(self.valloader, desc='Validation'):
                google_img = batch['google_img'].to(self.device)
                yandex_img = batch['yandex_img'].to(self.device)
                target = batch['same_domain'].float().to(self.device).unsqueeze(1)

                output = self.model(google_img, yandex_img)
                loss = self.criterion(output, target)

                total_loss += loss.item() * google_img.size(0)
                total_samples += google_img.size(0)

                metrics = self.criterion.compute_metrics(output, target)
                all_metrics.append(metrics)

                all_predictions.append(output.cpu())
                all_targets.append(target.cpu())

        avg_loss = total_loss / total_samples
        avg_metrics = {
            key: sum(m[key] for m in all_metrics) / len(all_metrics)
            for key in all_metrics[0].keys()
        }

        # Concatenate all predictions and targets
        all_predictions = torch.cat(all_predictions, dim=0)
        all_targets = torch.cat(all_targets, dim=0)

        return {
            'loss': avg_loss,
            **avg_metrics,
            'predictions': all_predictions,
            'targets': all_targets
        }

    def train(self, num_epochs: int):
        """Основной цикл обучения"""
        log_dir = os.path.join(self.config.get('output_dir', 'runs/similarity'))
        os.makedirs(log_dir, exist_ok=True)
        self.writer = SummaryWriter(log_dir)

        print(f'\n{"="*70}')
        print(f'Starting training for {num_epochs} epochs')
        print(f'Logging to {log_dir}')
        print(f'{"="*70}\n')

        start_time = time.time()

        for epoch in range(1, num_epochs + 1):
            epoch_start = time.time()
            print(f'\n--- Epoch {epoch}/{num_epochs} ---')

            # Train
            train_metrics = self.train_epoch(epoch)

            # Validate
            val_metrics = self.validate()

            # Store history
            self.history['train_loss'].append(train_metrics['loss'])
            self.history['val_loss'].append(val_metrics['loss'])
            self.history['val_accuracy'].append(val_metrics['accuracy'])
            self.history['val_precision'].append(val_metrics['precision'])
            self.history['val_recall'].append(val_metrics['recall'])
            self.history['val_f1'].append(val_metrics['f1'])
            self.history['learning_rate'].append(
                self.optimizer.param_groups[0]['lr']
            )

            # Print metrics
            print(f'\nTrain Loss: {train_metrics["loss"]:.4f}')
            print(f'Val Loss:   {val_metrics["loss"]:.4f}')
            print(f'Val Accuracy:  {val_metrics["accuracy"]:.4f}')
            print(f'Val Precision: {val_metrics["precision"]:.4f}')
            print(f'Val Recall:    {val_metrics["recall"]:.4f}')
            print(f'Val F1:        {val_metrics["f1"]:.4f}')

            epoch_time = time.time() - epoch_start
            print(f'Epoch time: {epoch_time:.2f}s')

            # TensorBoard logging
            if self.writer:
                self.writer.add_scalar('epoch/train_loss', train_metrics['loss'], epoch)
                self.writer.add_scalar('epoch/val_loss', val_metrics['loss'], epoch)
                self.writer.add_scalar('epoch/val_accuracy', val_metrics['accuracy'], epoch)
                self.writer.add_scalar('epoch/val_precision', val_metrics['precision'], epoch)
                self.writer.add_scalar('epoch/val_recall', val_metrics['recall'], epoch)
                self.writer.add_scalar('epoch/val_f1', val_metrics['f1'], epoch)

            # Save checkpoint
            if val_metrics['loss'] < self.best_val_loss:
                self.best_val_loss = val_metrics['loss']
                self.epochs_without_improvement = 0
                self.save_checkpoint(epoch, val_metrics['loss'], is_best=True)
                print(f'✓ New best model saved with val loss: {val_metrics["loss"]:.4f}')
            else:
                self.epochs_without_improvement += 1
                if epoch % self.config.get('save_interval', 5) == 0:
                    self.save_checkpoint(epoch, val_metrics['loss'], is_best=False)

            # Early stopping
            patience = self.config.get('early_stopping_patience', 20)
            if self.epochs_without_improvement >= patience:
                print(f'\n⚠ Early stopping triggered after {patience} epochs without improvement')
                break

        total_time = time.time() - start_time
        print(f'\n{"="*70}')
        print(f'Training completed in {total_time/60:.2f} minutes')
        print(f'Best validation loss: {self.best_val_loss:.4f}')
        print(f'{"="*70}\n')

        self.writer.close()

        return self.history

    def save_checkpoint(self, epoch: int, val_loss: float, is_best: bool = False):
        """Сохранение чекпоинта модели"""
        checkpoint_dir = os.path.join(
            self.config.get('output_dir', 'runs/similarity'),
            'checkpoints'
        )
        os.makedirs(checkpoint_dir, exist_ok=True)

        checkpoint = {
            'epoch': epoch,
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'val_loss': val_loss,
            'config': self.config,
            'history': self.history
        }

        checkpoint_path = os.path.join(checkpoint_dir, f'checkpoint_epoch{epoch}.pt')
        torch.save(checkpoint, checkpoint_path)

        if is_best:
            best_path = os.path.join(checkpoint_dir, 'best_model.pt')
            torch.save(checkpoint, best_path)

    def load_checkpoint(self, checkpoint_path: str):
        """Загрузка чекпоинта"""
        checkpoint = torch.load(checkpoint_path, map_location=self.device)
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        if 'history' in checkpoint:
            self.history = checkpoint['history']
        return checkpoint['epoch'], checkpoint['val_loss']


# =============================================================================
# VISUALIZATION FUNCTIONS
# =============================================================================

def plot_training_history(history: dict, save_path: str = None):
    """Построение графиков обучения"""
    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
    fig.suptitle('Training History - Siamese Network для корреляции снимков',
                 fontsize=16, fontweight='bold')

    epochs = range(1, len(history['train_loss']) + 1)

    # Loss
    axes[0, 0].plot(epochs, history['train_loss'], 'b-', label='Train Loss', linewidth=2)
    axes[0, 0].plot(epochs, history['val_loss'], 'r-', label='Val Loss', linewidth=2)
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Loss')
    axes[0, 0].set_title('Loss Curves')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)

    # Accuracy
    axes[0, 1].plot(epochs, history['val_accuracy'], 'g-', linewidth=2)
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Accuracy')
    axes[0, 1].set_title('Validation Accuracy')
    axes[0, 1].grid(True, alpha=0.3)
    axes[0, 1].set_ylim([0, 1])

    # F1 Score
    axes[0, 2].plot(epochs, history['val_f1'], 'm-', linewidth=2)
    axes[0, 2].set_xlabel('Epoch')
    axes[0, 2].set_ylabel('F1 Score')
    axes[0, 2].set_title('Validation F1 Score')
    axes[0, 2].grid(True, alpha=0.3)
    axes[0, 2].set_ylim([0, 1])

    # Precision
    axes[1, 0].plot(epochs, history['val_precision'], 'c-', linewidth=2)
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('Precision')
    axes[1, 0].set_title('Validation Precision')
    axes[1, 0].grid(True, alpha=0.3)
    axes[1, 0].set_ylim([0, 1])

    # Recall
    axes[1, 1].plot(epochs, history['val_recall'], 'y-', linewidth=2)
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('Recall')
    axes[1, 1].set_title('Validation Recall')
    axes[1, 1].grid(True, alpha=0.3)
    axes[1, 1].set_ylim([0, 1])

    # Learning Rate
    axes[1, 2].plot(epochs, history['learning_rate'], 'k-', linewidth=2)
    axes[1, 2].set_xlabel('Epoch')
    axes[1, 2].set_ylabel('Learning Rate')
    axes[1, 2].set_title('Learning Rate Schedule')
    axes[1, 2].grid(True, alpha=0.3)
    axes[1, 2].set_yscale('log')

    plt.tight_layout()

    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f'Training history plot saved to {save_path}')

    plt.show()


def plot_roc_curve(predictions: torch.Tensor, targets: torch.Tensor, save_path: str = None):
    """Построение ROC кривой"""
    from sklearn.metrics import roc_curve, auc

    predictions_np = predictions.numpy().flatten()
    targets_np = targets.numpy().flatten()

    fpr, tpr, thresholds = roc_curve(targets_np, predictions_np)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(10, 8))
    plt.plot(fpr, tpr, color='darkorange', lw=2,
             label=f'ROC curve (AUC = {roc_auc:.3f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate', fontsize=12)
    plt.ylabel('True Positive Rate', fontsize=12)
    plt.title('ROC Curve - Siamese Network', fontsize=14, fontweight='bold')
    plt.legend(loc="lower right", fontsize=12)
    plt.grid(True, alpha=0.3)

    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f'ROC curve saved to {save_path}')

    plt.show()

    return roc_auc


def plot_confusion_matrix(predictions: torch.Tensor, targets: torch.Tensor,
                         threshold: float = 0.5, save_path: str = None):
    """Построение матрицы ошибок"""
    from sklearn.metrics import confusion_matrix

    predictions_binary = (predictions.numpy().flatten() >= threshold).astype(int)
    targets_binary = (targets.numpy().flatten() >= 0.5).astype(int)

    cm = confusion_matrix(targets_binary, predictions_binary)

    plt.figure(figsize=(10, 8))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix - Correlation Detection', fontsize=14, fontweight='bold')
    plt.colorbar()

    classes = ['Different Domains', 'Same Domain']
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45, fontsize=12)
    plt.yticks(tick_marks, classes, fontsize=12)

    # Add text annotations
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(j, i, format(cm[i, j], 'd'),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black",
                    fontsize=16, fontweight='bold')

    plt.ylabel('True Label', fontsize=12)
    plt.xlabel('Predicted Label', fontsize=12)
    plt.tight_layout()

    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f'Confusion matrix saved to {save_path}')

    plt.show()


def plot_similarity_distribution(predictions: torch.Tensor, targets: torch.Tensor,
                                 save_path: str = None):
    """Распределение предсказанных значений схожести"""
    predictions_np = predictions.numpy().flatten()
    targets_np = targets.numpy().flatten()

    same_domain = predictions_np[targets_np >= 0.5]
    diff_domain = predictions_np[targets_np < 0.5]

    plt.figure(figsize=(12, 6))

    plt.subplot(1, 2, 1)
    plt.hist(same_domain, bins=50, alpha=0.7, color='green', edgecolor='black', label='Same Domain')
    plt.hist(diff_domain, bins=50, alpha=0.7, color='red', edgecolor='black', label='Different Domains')
    plt.xlabel('Predicted Similarity Score', fontsize=12)
    plt.ylabel('Frequency', fontsize=12)
    plt.title('Distribution of Similarity Scores', fontsize=14, fontweight='bold')
    plt.legend(fontsize=11)
    plt.grid(True, alpha=0.3)

    plt.subplot(1, 2, 2)
    plt.boxplot([diff_domain, same_domain], labels=['Different', 'Same'])
    plt.ylabel('Similarity Score', fontsize=12)
    plt.xlabel('Domain Match', fontsize=12)
    plt.title('Similarity Score by Domain Match', fontsize=14, fontweight='bold')
    plt.grid(True, alpha=0.3, axis='y')

    plt.tight_layout()

    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f'Similarity distribution plot saved to {save_path}')

    plt.show()

    # Print statistics
    print(f'\n--- Similarity Score Statistics ---')
    print(f'Same Domain:')
    print(f'  Mean: {same_domain.mean():.4f}')
    print(f'  Std:  {same_domain.std():.4f}')
    print(f'  Min:  {same_domain.min():.4f}')
    print(f'  Max:  {same_domain.max():.4f}')
    print(f'\nDifferent Domains:')
    print(f'  Mean: {diff_domain.mean():.4f}')
    print(f'  Std:  {diff_domain.std():.4f}')
    print(f'  Min:  {diff_domain.min():.4f}')
    print(f'  Max:  {diff_domain.max():.4f}')


def visualize_sample_predictions(model: nn.Module, dataset, device: torch.device,
                                 num_samples: int = 8, save_path: str = None):
    """Визуализация примеров предсказаний"""
    model.eval()

    # Get random samples
    indices = np.random.choice(len(dataset), num_samples, replace=False)

    fig, axes = plt.subplots(num_samples, 3, figsize=(15, 5*num_samples))
    if num_samples == 1:
        axes = axes.reshape(1, -1)

    fig.suptitle('Sample Predictions - Siamese Network для корреляции карт',
                 fontsize=16, fontweight='bold')

    with torch.no_grad():
        for idx, sample_idx in enumerate(indices):
            sample = dataset[sample_idx]

            google_img = sample['google_img'].unsqueeze(0).to(device)
            yandex_img = sample['yandex_img'].unsqueeze(0).to(device)
            true_label = sample['same_domain'].item()

            # Predict
            pred_similarity = model(google_img, yandex_img).item()
            pred_label = int(pred_similarity >= 0.5)

            # Denormalize images for visualization
            google_np = google_img.squeeze(0).cpu().numpy().transpose(1, 2, 0)
            yandex_np = yandex_img.squeeze(0).cpu().numpy().transpose(1, 2, 0)

            # Denormalize (assuming ImageNet normalization)
            mean = np.array([0.485, 0.456, 0.406])
            std = np.array([0.229, 0.224, 0.225])
            google_np = std * google_np + mean
            yandex_np = std * yandex_np + mean
            google_np = np.clip(google_np, 0, 1)
            yandex_np = np.clip(yandex_np, 0, 1)

            # Plot Google image
            axes[idx, 0].imshow(google_np)
            axes[idx, 0].set_title('Google Map', fontsize=12, fontweight='bold')
            axes[idx, 0].axis('off')

            # Plot Yandex image
            axes[idx, 1].imshow(yandex_np)
            axes[idx, 1].set_title('Yandex Map', fontsize=12, fontweight='bold')
            axes[idx, 1].axis('off')

            # Plot prediction info
            axes[idx, 2].axis('off')

            # Determine color based on correctness
            is_correct = (pred_label == true_label)
            color = 'green' if is_correct else 'red'
            result = '✓ Correct' if is_correct else '✗ Incorrect'

            info_text = f"""
Prediction: {pred_similarity:.4f}
Predicted Label: {'Same' if pred_label == 1 else 'Different'}
True Label: {'Same' if true_label == 1 else 'Different'}

{result}
            """

            axes[idx, 2].text(0.5, 0.5, info_text,
                            ha='center', va='center',
                            fontsize=12,
                            bbox=dict(boxstyle='round', facecolor=color, alpha=0.2),
                            transform=axes[idx, 2].transAxes)

    plt.tight_layout()

    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f'Sample predictions saved to {save_path}')

    plt.show()


def visualize_feature_space(model: nn.Module, dataloader: DataLoader,
                           device: torch.device, max_samples: int = 500,
                           save_path: str = None):
    """Визуализация пространства признаков с помощью t-SNE"""
    from sklearn.manifold import TSNE

    model.eval()

    all_features_google = []
    all_features_yandex = []
    all_labels = []

    with torch.no_grad():
        for i, batch in enumerate(tqdm(dataloader, desc='Extracting features')):
            if i * dataloader.batch_size >= max_samples:
                break

            google_img = batch['google_img'].to(device)
            yandex_img = batch['yandex_img'].to(device)
            labels = batch['same_domain'].cpu().numpy()

            # Extract features
            features_google = model.extract_features(google_img).cpu().numpy()
            features_yandex = model.extract_features(yandex_img).cpu().numpy()

            all_features_google.append(features_google)
            all_features_yandex.append(features_yandex)
            all_labels.append(labels)

    all_features_google = np.concatenate(all_features_google, axis=0)
    all_features_yandex = np.concatenate(all_features_yandex, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    # Combine features
    all_features = np.concatenate([all_features_google, all_features_yandex], axis=0)
    all_labels = np.concatenate([all_labels, all_labels], axis=0)

    print(f'\nApplying t-SNE to {all_features.shape[0]} samples...')
    tsne = TSNE(n_components=2, random_state=42, perplexity=30)
    features_2d = tsne.fit_transform(all_features)

    # Split back into Google and Yandex
    n_samples = len(all_labels) // 2
    features_google_2d = features_2d[:n_samples]
    features_yandex_2d = features_2d[n_samples:]
    labels = all_labels[:n_samples]

    # Plot
    fig, axes = plt.subplots(1, 2, figsize=(20, 8))

    # Google features
    for label in [0, 1]:
        mask = labels == label
        axes[0].scatter(
            features_google_2d[mask, 0],
            features_google_2d[mask, 1],
            c='green' if label == 1 else 'red',
            label='Same Domain' if label == 1 else 'Different Domains',
            alpha=0.6,
            s=50
        )
    axes[0].set_title('Google Maps Features (t-SNE)', fontsize=14, fontweight='bold')
    axes[0].set_xlabel('t-SNE Component 1', fontsize=12)
    axes[0].set_ylabel('t-SNE Component 2', fontsize=12)
    axes[0].legend(fontsize=11)
    axes[0].grid(True, alpha=0.3)

    # Yandex features
    for label in [0, 1]:
        mask = labels == label
        axes[1].scatter(
            features_yandex_2d[mask, 0],
            features_yandex_2d[mask, 1],
            c='green' if label == 1 else 'red',
            label='Same Domain' if label == 1 else 'Different Domains',
            alpha=0.6,
            s=50
        )
    axes[1].set_title('Yandex Maps Features (t-SNE)', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('t-SNE Component 1', fontsize=12)
    axes[1].set_ylabel('t-SNE Component 2', fontsize=12)
    axes[1].legend(fontsize=11)
    axes[1].grid(True, alpha=0.3)

    plt.tight_layout()

    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f'Feature space visualization saved to {save_path}')

    plt.show()


def generate_correlation_heatmap(model: nn.Module, dataloader: DataLoader,
                                device: torch.device, num_samples: int = 20,
                                save_path: str = None):
    """Создание тепловой карты корреляций между снимками"""
    model.eval()

    # Collect samples
    google_images = []
    yandex_images = []
    labels = []

    with torch.no_grad():
        for i, batch in enumerate(dataloader):
            if len(google_images) >= num_samples:
                break

            google_img = batch['google_img'].to(device)
            yandex_img = batch['yandex_img'].to(device)
            label = batch['same_domain']

            google_images.append(google_img[:1])
            yandex_images.append(yandex_img[:1])
            labels.append(label[:1].item())

    google_images = torch.cat(google_images[:num_samples], dim=0)
    yandex_images = torch.cat(yandex_images[:num_samples], dim=0)

    # Compute similarity matrix
    similarity_matrix = np.zeros((num_samples, num_samples))

    with torch.no_grad():
        for i in tqdm(range(num_samples), desc='Computing correlations'):
            for j in range(num_samples):
                google_i = google_images[i:i+1]
                yandex_j = yandex_images[j:j+1]
                similarity = model(google_i, yandex_j).item()
                similarity_matrix[i, j] = similarity

    # Plot heatmap
    plt.figure(figsize=(14, 12))
    im = plt.imshow(similarity_matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)

    plt.colorbar(im, label='Similarity Score', fraction=0.046, pad=0.04)
    plt.title('Correlation Heatmap: Google vs Yandex Maps\n(Матрица корреляций снимков)',
              fontsize=16, fontweight='bold', pad=20)
    plt.xlabel('Yandex Map Index', fontsize=12)
    plt.ylabel('Google Map Index', fontsize=12)

    # Add grid
    plt.grid(True, which='both', color='gray', linestyle='-', linewidth=0.5, alpha=0.3)

    # Add text annotations for diagonal (same pairs)
    for i in range(min(num_samples, 10)):  # Annotate first 10 for readability
        if labels[i] == 1:  # True match
            plt.text(i, i, '✓', ha='center', va='center',
                    color='white', fontsize=12, fontweight='bold')

    plt.tight_layout()

    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f'Correlation heatmap saved to {save_path}')

    plt.show()

    # Print statistics
    diagonal = np.diag(similarity_matrix)
    off_diagonal = similarity_matrix[~np.eye(num_samples, dtype=bool)]

    print(f'\n--- Correlation Statistics ---')
    print(f'Diagonal (matched pairs):')
    print(f'  Mean: {diagonal.mean():.4f}')
    print(f'  Std:  {diagonal.std():.4f}')
    print(f'\nOff-diagonal (mismatched pairs):')
    print(f'  Mean: {off_diagonal.mean():.4f}')
    print(f'  Std:  {off_diagonal.std():.4f}')


# =============================================================================
# MAIN TRAINING SCRIPT
# =============================================================================

def main():
    """Основная функция обучения"""

    # Configuration
    config_dict = config.copy()
    if isinstance(config_dict.get('image_size'), list):
        config_dict['image_size'] = tuple(config_dict['image_size'])

    # Device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'\n{"="*70}')
    print(f'Siamese Network Training for Map Correlation')
    print(f'Обучение сиамской сети для корреляции снимков')
    print(f'{"="*70}')
    print(f'Using device: {device}')
    if torch.cuda.is_available():
        print(f'GPU: {torch.cuda.get_device_name(0)}')
        print(f'GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB')

    # Create data loaders
    print(f'\n{"="*70}')
    print('Creating data loaders...')
    print(f'{"="*70}')

    train_loader, val_loader = create_data_loaders(
        root_dir=config_dict['data_dir'],
        batch_size=config_dict['batch_size'],
        train_split=config_dict['train_split'],
        num_workers=config_dict['num_workers'],
        image_size=config_dict['image_size'],
        augment_train=True,
        augment_val=False,
        device=device
    )

    print(f'Train batches: {len(train_loader)}')
    print(f'Val batches: {len(val_loader)}')
    print(f'Train samples: {len(train_loader.dataset)}')
    print(f'Val samples: {len(val_loader.dataset)}')

    # Create model
    print(f'\n{"="*70}')
    print('Creating model...')
    print(f'{"="*70}')

    model = create_similarity_model(
        model_type='backbone',
        input_size=config_dict['image_size'][0] if isinstance(config_dict['image_size'], (tuple, list)) else config_dict['image_size'],
        input_channels=3,
        backbone_name='resnet18',
        pretrained=True,
        dropout_rate=0.3,
        use_batch_norm=True
    )

    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f'Total parameters: {total_params:,}')
    print(f'Trainable parameters: {trainable_params:,}')

    # Create trainer
    trainer = SimilarityTrainer(
        model=model,
        trainloader=train_loader,
        valloader=val_loader,
        device=device,
        config=config_dict
    )

    # Train model
    print(f'\n{"="*70}')
    print('Starting training...')
    print(f'{"="*70}')

    history = trainer.train(config_dict['epochs'])

    # =============================================================================
    # VISUALIZATION AND RESULTS
    # =============================================================================

    print(f'\n{"="*70}')
    print('Generating visualizations...')
    print(f'{"="*70}')

    output_dir = config_dict.get('output_dir', 'runs/similarity')
    vis_dir = os.path.join(output_dir, 'visualizations')
    os.makedirs(vis_dir, exist_ok=True)

    # 1. Training history
    print('\n1. Plotting training history...')
    plot_training_history(
        history,
        save_path=os.path.join(vis_dir, 'training_history.png')
    )

    # 2. Validation metrics
    print('\n2. Computing validation predictions...')
    trainer.model.eval()
    val_predictions = []
    val_targets = []

    with torch.no_grad():
        for batch in tqdm(val_loader, desc='Validation'):
            google_img = batch['google_img'].to(device)
            yandex_img = batch['yandex_img'].to(device)
            target = batch['same_domain'].float().unsqueeze(1)

            output = trainer.model(google_img, yandex_img)
            val_predictions.append(output.cpu())
            val_targets.append(target.cpu())

    val_predictions = torch.cat(val_predictions, dim=0)
    val_targets = torch.cat(val_targets, dim=0)

    # 3. ROC curve
    print('\n3. Plotting ROC curve...')
    roc_auc = plot_roc_curve(
        val_predictions,
        val_targets,
        save_path=os.path.join(vis_dir, 'roc_curve.png')
    )
    print(f'ROC AUC Score: {roc_auc:.4f}')

    # 4. Confusion matrix
    print('\n4. Plotting confusion matrix...')
    plot_confusion_matrix(
        val_predictions,
        val_targets,
        threshold=0.5,
        save_path=os.path.join(vis_dir, 'confusion_matrix.png')
    )

    # 5. Similarity distribution
    print('\n5. Plotting similarity distribution...')
    plot_similarity_distribution(
        val_predictions,
        val_targets,
        save_path=os.path.join(vis_dir, 'similarity_distribution.png')
    )

    # 6. Sample predictions
    print('\n6. Visualizing sample predictions...')
    visualize_sample_predictions(
        trainer.model,
        val_loader.dataset,
        device,
        num_samples=8,
        save_path=os.path.join(vis_dir, 'sample_predictions.png')
    )

    # 7. Feature space visualization
    print('\n7. Visualizing feature space (t-SNE)...')
    visualize_feature_space(
        trainer.model,
        val_loader,
        device,
        max_samples=500,
        save_path=os.path.join(vis_dir, 'feature_space_tsne.png')
    )

    # 8. Correlation heatmap
    print('\n8. Generating correlation heatmap...')
    generate_correlation_heatmap(
        trainer.model,
        val_loader,
        device,
        num_samples=20,
        save_path=os.path.join(vis_dir, 'correlation_heatmap.png')
    )

    # =============================================================================
    # FINAL RESULTS SUMMARY
    # =============================================================================

    print(f'\n{"="*70}')
    print('FINAL RESULTS SUMMARY')
    print('ИТОГОВЫЕ РЕЗУЛЬТАТЫ')
    print(f'{"="*70}')

    print(f'\nBest Validation Loss: {trainer.best_val_loss:.4f}')
    print(f'Final Validation Accuracy: {history["val_accuracy"][-1]:.4f}')
    print(f'Final Validation F1 Score: {history["val_f1"][-1]:.4f}')
    print(f'Final Validation Precision: {history["val_precision"][-1]:.4f}')
    print(f'Final Validation Recall: {history["val_recall"][-1]:.4f}')
    print(f'ROC AUC Score: {roc_auc:.4f}')

    print(f'\nCheckpoints saved to: {os.path.join(output_dir, "checkpoints")}')
    print(f'Visualizations saved to: {vis_dir}')

    print(f'\n{"="*70}')
    print('Training and visualization completed successfully!')
    print('Обучение и визуализация завершены успешно!')
    print(f'{"="*70}\n')


if __name__ == '__main__':
    main()