initialize weights, enchance graphics
This commit is contained in:
@@ -13,6 +13,7 @@ import torch.nn as nn
|
|||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
import seaborn as sns
|
||||||
from torch.utils.data import DataLoader, Dataset, Subset
|
from torch.utils.data import DataLoader, Dataset, Subset
|
||||||
from torch.utils.tensorboard import SummaryWriter
|
from torch.utils.tensorboard import SummaryWriter
|
||||||
from torchvision import transforms, models
|
from torchvision import transforms, models
|
||||||
|
|||||||
@@ -78,15 +78,18 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def matrix_to_homography_params(H, K):\n",
|
"def matrix_to_homography_params(H, K):\n",
|
||||||
|
" if hasattr(H, 'numpy'):\n",
|
||||||
|
" H = H.numpy()\n",
|
||||||
" K_inv = np.linalg.inv(K)\n",
|
" K_inv = np.linalg.inv(K)\n",
|
||||||
" E = K_inv @ H @ K\n",
|
" E = K_inv @ H @ K\n",
|
||||||
" scale = np.sqrt(np.linalg.det(E[:2, :2]))\n",
|
" scale = E[2, 2]\n",
|
||||||
" R = E[:2, :2] / scale\n",
|
" R_normalized = E / scale\n",
|
||||||
" tx, ty = E[0, 2], E[1, 2]\n",
|
" rz = np.arctan2(R_normalized[1, 0], R_normalized[0, 0])\n",
|
||||||
" rz = np.arctan2(R[1, 0], R[0, 0])\n",
|
" ry = np.arctan2(-R_normalized[2, 0], np.sqrt(R_normalized[2, 1]**2 + R_normalized[2, 2]**2))\n",
|
||||||
" r20, r21 = E[2, 0], E[2, 1]\n",
|
" rx = np.arctan2(R_normalized[2, 1], R_normalized[2, 2])\n",
|
||||||
" ry = np.arctan2(r20, r21)\n",
|
" A = R_normalized[:2, :2]\n",
|
||||||
" rx = np.arctan2(-E[1, 2], E[1, 1])\n",
|
" correction = scale * np.array([R_normalized[0, 2], R_normalized[1, 2]])\n",
|
||||||
|
" tx, ty = np.linalg.solve(A, E[:2, 2] - correction)\n",
|
||||||
" return np.array([tx, ty, rx, ry, rz, scale], dtype=np.float32)\n",
|
" return np.array([tx, ty, rx, ry, rz, scale], dtype=np.float32)\n",
|
||||||
"\n"
|
"\n"
|
||||||
]
|
]
|
||||||
@@ -149,6 +152,7 @@
|
|||||||
" self._cached_google = [None] * len(self.image_pairs)\n",
|
" self._cached_google = [None] * len(self.image_pairs)\n",
|
||||||
" self._cached_yandex = [None] * len(self.image_pairs)\n",
|
" self._cached_yandex = [None] * len(self.image_pairs)\n",
|
||||||
" self._cached_homography = [None] * len(self.image_pairs)\n",
|
" self._cached_homography = [None] * len(self.image_pairs)\n",
|
||||||
|
" self._cached_params = [None] * len(self.image_pairs)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" def _generate_augmented(self, idx):\n",
|
" def _generate_augmented(self, idx):\n",
|
||||||
" google_img = self._google_images[idx].copy()\n",
|
" google_img = self._google_images[idx].copy()\n",
|
||||||
@@ -158,14 +162,11 @@
|
|||||||
" params2 = generate_random_homography_params()\n",
|
" params2 = generate_random_homography_params()\n",
|
||||||
" H1 = homography_params_to_matrix(params1, self.K)\n",
|
" H1 = homography_params_to_matrix(params1, self.K)\n",
|
||||||
" H2 = homography_params_to_matrix(params2, self.K)\n",
|
" H2 = homography_params_to_matrix(params2, self.K)\n",
|
||||||
" H_combined = np.linalg.inv(H1) @ H2\n",
|
|
||||||
" \n",
|
" \n",
|
||||||
" google_warped = cv2.warpPerspective(google_img, H2, (self.image_size[1], self.image_size[0]))\n",
|
|
||||||
" yandex_warped = cv2.warpPerspective(yandex_img, H1, (self.image_size[1], self.image_size[0]))\n",
|
" yandex_warped = cv2.warpPerspective(yandex_img, H1, (self.image_size[1], self.image_size[0]))\n",
|
||||||
|
" google_warped = cv2.warpPerspective(google_img, H2 @ H1, (self.image_size[1], self.image_size[0]))\n",
|
||||||
" \n",
|
" \n",
|
||||||
" target_params = matrix_to_homography_params(H_combined, self.K)\n",
|
" return google_warped, yandex_warped, H2, params2\n",
|
||||||
" \n",
|
|
||||||
" return google_warped, yandex_warped, H_combined, target_params\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" def __len__(self):\n",
|
" def __len__(self):\n",
|
||||||
" return len(self.image_pairs)\n",
|
" return len(self.image_pairs)\n",
|
||||||
@@ -179,13 +180,14 @@
|
|||||||
" google_img = self._cached_google[idx]\n",
|
" google_img = self._cached_google[idx]\n",
|
||||||
" yandex_img = self._cached_yandex[idx]\n",
|
" yandex_img = self._cached_yandex[idx]\n",
|
||||||
" target_matrix = self._cached_homography[idx]\n",
|
" target_matrix = self._cached_homography[idx]\n",
|
||||||
" target_params = matrix_to_homography_params(target_matrix, self.K)\n",
|
" target_params = self._cached_params[idx]\n",
|
||||||
" elif self.augment:\n",
|
" elif self.augment:\n",
|
||||||
" google_img, yandex_img, target_matrix, target_params = self._generate_augmented(idx)\n",
|
" google_img, yandex_img, target_matrix, target_params = self._generate_augmented(idx)\n",
|
||||||
" if self.cache_level > 0:\n",
|
" if self.cache_level > 0:\n",
|
||||||
" self._cached_google[idx] = google_img\n",
|
" self._cached_google[idx] = google_img\n",
|
||||||
" self._cached_yandex[idx] = yandex_img\n",
|
" self._cached_yandex[idx] = yandex_img\n",
|
||||||
" self._cached_homography[idx] = target_matrix\n",
|
" self._cached_homography[idx] = target_matrix\n",
|
||||||
|
" self._cached_params[idx] = target_params\n",
|
||||||
" else:\n",
|
" else:\n",
|
||||||
" google_img = self._google_images[idx]\n",
|
" google_img = self._google_images[idx]\n",
|
||||||
" yandex_img = self._yandex_images[idx]\n",
|
" yandex_img = self._yandex_images[idx]\n",
|
||||||
@@ -238,6 +240,29 @@
|
|||||||
"\n"
|
"\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"train_loader, val_loader = create_data_loaders(config['data_dir'])\n",
|
||||||
|
"batch = next(iter(train_loader))\n",
|
||||||
|
"google_img = batch['google_img'][0]\n",
|
||||||
|
"yandex_img = batch['yandex_img'][0]\n",
|
||||||
|
"\n",
|
||||||
|
"# google_img.permute((1, 2, 0)) * 255\n",
|
||||||
|
"batch['homography_params'].mean(axis=0)\n",
|
||||||
|
"\n",
|
||||||
|
"print(batch['homography_matrix'][0])\n",
|
||||||
|
"print(batch['homography_params'][0])\n",
|
||||||
|
"K = get_camera_matrix(config['image_size'][0], config['image_size'][1])\n",
|
||||||
|
"print(homography_params_to_matrix(batch['homography_params'][0], K))\n",
|
||||||
|
"print(matrix_to_homography_params(batch['homography_matrix'][0].numpy(), K))\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -291,7 +316,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" output = torch.tanh(output) # [-1; 1]\n",
|
" output = torch.tanh(output) # [-1; 1]\n",
|
||||||
" modified = output.clone()\n",
|
" modified = output.clone()\n",
|
||||||
" modified[:, 2:5] = torch.mul(output[:, 2:5], torch.pi) # [-pi; pi]\n",
|
" modified[:, 2:6] = torch.mul(output[:, 2:6], torch.pi) # [-pi; pi]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" return modified\n",
|
" return modified\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|||||||
@@ -2,11 +2,14 @@ import os
|
|||||||
import torch
|
import torch
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
|
||||||
from .dataloader import create_data_loaders
|
from .dataloader import create_data_loaders
|
||||||
from .model import angular_difference
|
from .model import angular_difference
|
||||||
from .utils import config
|
from .utils import config
|
||||||
|
|
||||||
|
sns.set_theme(style="whitegrid", palette="muted", font_scale=1.2)
|
||||||
|
|
||||||
IMG_DIR = os.path.join(config["output_dir"], "images")
|
IMG_DIR = os.path.join(config["output_dir"], "images")
|
||||||
os.makedirs(IMG_DIR, exist_ok=True)
|
os.makedirs(IMG_DIR, exist_ok=True)
|
||||||
|
|
||||||
@@ -82,84 +85,132 @@ def analyze_training(trainer):
|
|||||||
mean_errors = [np.mean(all_errors[i]) for i in range(6)]
|
mean_errors = [np.mean(all_errors[i]) for i in range(6)]
|
||||||
std_errors = [np.std(all_errors[i]) for i in range(6)]
|
std_errors = [np.std(all_errors[i]) for i in range(6)]
|
||||||
|
|
||||||
|
angle_errors_deg = [np.degrees(mean_errors[i]) for i in range(2, 5)]
|
||||||
|
|
||||||
|
all_targets_stacked = [np.array(all_targets[i]) for i in range(6)]
|
||||||
|
target_ranges = [np.ptp(all_targets_stacked[i]) for i in range(6)]
|
||||||
|
relative_errors = [mean_errors[i] / target_ranges[i] if target_ranges[i] > 1e-8 else 0 for i in range(6)]
|
||||||
|
|
||||||
if len(trainer.train_losses) > 0:
|
if len(trainer.train_losses) > 0:
|
||||||
epochs = range(1, len(trainer.train_losses) + 1)
|
epochs = range(1, len(trainer.train_losses) + 1)
|
||||||
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
|
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
|
||||||
|
|
||||||
axes[0, 0].plot(epochs, trainer.train_losses, "b-", label="Train Loss")
|
axes[0, 0].plot(epochs, trainer.train_losses, color="#2ecc71", linewidth=2, label="Train Loss")
|
||||||
axes[0, 0].plot(epochs, trainer.val_losses, "r-", label="Val Loss")
|
axes[0, 0].plot(epochs, trainer.val_losses, color="#e74c3c", linewidth=2, label="Val Loss")
|
||||||
axes[0, 0].set_xlabel("Epoch")
|
axes[0, 0].set_xlabel("Epoch")
|
||||||
axes[0, 0].set_ylabel("Loss")
|
axes[0, 0].set_ylabel("Loss")
|
||||||
axes[0, 0].set_title("Training & Validation Loss")
|
axes[0, 0].set_title("Training & Validation Loss", fontweight="bold")
|
||||||
axes[0, 0].legend()
|
axes[0, 0].legend(framealpha=0.9)
|
||||||
axes[0, 0].grid(True, alpha=0.3)
|
axes[0, 0].grid(True, alpha=0.3)
|
||||||
|
|
||||||
axes[0, 1].plot(epochs, trainer.val_losses, "r-", label="Val Loss")
|
axes[0, 1].plot(epochs, trainer.val_losses, color="#e74c3c", linewidth=2, label="Val Loss")
|
||||||
axes[0, 1].set_xlabel("Epoch")
|
axes[0, 1].set_xlabel("Epoch")
|
||||||
axes[0, 1].set_ylabel("Loss")
|
axes[0, 1].set_ylabel("Loss")
|
||||||
axes[0, 1].set_title("Validation Loss")
|
axes[0, 1].set_title("Validation Loss", fontweight="bold")
|
||||||
axes[0, 1].legend()
|
axes[0, 1].legend(framealpha=0.9)
|
||||||
axes[0, 1].grid(True, alpha=0.3)
|
axes[0, 1].grid(True, alpha=0.3)
|
||||||
|
|
||||||
axes[1, 0].plot(epochs, trainer.val_mse_trans, "g-", label="Translation (tx, ty)")
|
axes[1, 0].plot(epochs, trainer.val_mse_trans, color="#3498db", linewidth=2, label="Translation (tx, ty)")
|
||||||
axes[1, 0].plot(epochs, trainer.val_mse_angle, "m-", label="Angle (rx, ry, rz)")
|
axes[1, 0].plot(epochs, trainer.val_mse_angle, color="#9b59b6", linewidth=2, label="Angle (rx, ry, rz)")
|
||||||
axes[1, 0].plot(epochs, trainer.val_mse_scale, "c-", label="Scale")
|
axes[1, 0].plot(epochs, trainer.val_mse_scale, color="#e67e22", linewidth=2, label="Scale")
|
||||||
axes[1, 0].set_xlabel("Epoch")
|
axes[1, 0].set_xlabel("Epoch")
|
||||||
axes[1, 0].set_ylabel("MSE")
|
axes[1, 0].set_ylabel("MSE")
|
||||||
axes[1, 0].set_title("Validation MSE by Category")
|
axes[1, 0].set_title("Validation MSE by Category", fontweight="bold")
|
||||||
axes[1, 0].legend()
|
axes[1, 0].legend(framealpha=0.9)
|
||||||
axes[1, 0].grid(True, alpha=0.3)
|
axes[1, 0].grid(True, alpha=0.3)
|
||||||
|
|
||||||
x_pos = np.arange(6)
|
x_pos = np.arange(6)
|
||||||
axes[1, 1].bar(x_pos, mean_errors, yerr=std_errors, capsize=5, color=["c", "m", "y", "g", "b", "r"], alpha=0.8)
|
colors = ["#3498db", "#e74c3c", "#9b59b6", "#2ecc71", "#f39c12", "#1abc9c"]
|
||||||
|
bars = axes[1, 1].bar(x_pos, mean_errors, yerr=std_errors, capsize=6, color=colors, alpha=0.85, edgecolor="white", linewidth=1.5)
|
||||||
axes[1, 1].set_xticks(x_pos)
|
axes[1, 1].set_xticks(x_pos)
|
||||||
axes[1, 1].set_xticklabels(names)
|
axes[1, 1].set_xticklabels(names)
|
||||||
axes[1, 1].set_ylabel("Mean Absolute Error")
|
axes[1, 1].set_ylabel("Mean Absolute Error")
|
||||||
axes[1, 1].set_title(f"Mean Absolute Error per Parameter ({n_samples} samples)")
|
axes[1, 1].set_title(f"Mean Absolute Error per Parameter ({n_samples} samples)", fontweight="bold")
|
||||||
axes[1, 1].grid(True, alpha=0.3, axis="y")
|
axes[1, 1].grid(True, alpha=0.3, axis="y")
|
||||||
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.savefig(os.path.join(IMG_DIR, "training_loss_plots.png"), dpi=150)
|
plt.savefig(os.path.join(IMG_DIR, "training_loss_plots.png"), dpi=150, bbox_inches="tight")
|
||||||
print("Saved training_loss_plots.png")
|
print("Saved training_loss_plots.png")
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
|
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
|
||||||
|
colors = ["#3498db", "#e74c3c", "#9b59b6", "#2ecc71", "#f39c12", "#1abc9c"]
|
||||||
for j in range(6):
|
for j in range(6):
|
||||||
row = j // 3
|
row = j // 3
|
||||||
col = j % 3
|
col = j % 3
|
||||||
axes[row, col].bar(range(len(all_errors[j])), all_errors[j], color="steelblue", alpha=0.7)
|
axes[row, col].bar(range(len(all_errors[j])), all_errors[j], color=colors[j], alpha=0.75)
|
||||||
axes[row, col].set_xlabel("Sample")
|
axes[row, col].set_xlabel("Sample", fontsize=10)
|
||||||
axes[row, col].set_ylabel("Absolute Error")
|
axes[row, col].set_ylabel("Absolute Error", fontsize=10)
|
||||||
axes[row, col].set_title(f"{names[j]}: Mean={np.mean(all_errors[j]):.4f}, Std={np.std(all_errors[j]):.4f}")
|
axes[row, col].set_title(f"{names[j]}: Mean={np.mean(all_errors[j]):.4f}, Std={np.std(all_errors[j]):.4f}", fontweight="bold", fontsize=11)
|
||||||
axes[row, col].grid(True, alpha=0.3, axis="y")
|
axes[row, col].grid(True, alpha=0.3, axis="y")
|
||||||
plt.suptitle(f"Mean Absolute Error per Parameter ({n_samples} samples)", fontsize=14)
|
plt.suptitle(f"Mean Absolute Error per Parameter ({n_samples} samples)", fontsize=14, fontweight="bold")
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.savefig(os.path.join(IMG_DIR, "mae_per_parameter.png"), dpi=150)
|
plt.savefig(os.path.join(IMG_DIR, "mae_per_parameter.png"), dpi=150, bbox_inches="tight")
|
||||||
print("Saved mae_per_parameter.png")
|
print("Saved mae_per_parameter.png")
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
|
||||||
|
|
||||||
|
x_pos = np.arange(6)
|
||||||
|
colors = ["#3498db", "#e74c3c", "#9b59b6", "#2ecc71", "#f39c12", "#1abc9c"]
|
||||||
|
bars = axes[0].bar(x_pos, mean_errors, yerr=std_errors, capsize=6, color=colors, alpha=0.85, edgecolor="white", linewidth=1.5)
|
||||||
|
axes[0].set_xticks(x_pos)
|
||||||
|
axes[0].set_xticklabels(names)
|
||||||
|
axes[0].set_ylabel("Mean Absolute Error")
|
||||||
|
axes[0].set_title("Mean Absolute Error per Parameter (with std)", fontweight="bold")
|
||||||
|
axes[0].grid(True, alpha=0.3, axis="y")
|
||||||
|
|
||||||
|
bp = axes[1].boxplot([all_errors[i] for i in range(6)], labels=names, patch_artist=True)
|
||||||
|
for patch, color in zip(bp["boxes"], colors):
|
||||||
|
patch.set_facecolor(color)
|
||||||
|
patch.set_alpha(0.8)
|
||||||
|
axes[1].set_ylabel("Absolute Error")
|
||||||
|
axes[1].set_title(f"Error Distribution per Parameter ({n_samples} samples)", fontweight="bold")
|
||||||
|
axes[1].grid(True, alpha=0.3, axis="y")
|
||||||
|
|
||||||
|
rel_err_pos = np.arange(6)
|
||||||
|
bars = axes[2].bar(rel_err_pos, relative_errors, color=colors, alpha=0.85, edgecolor="white", linewidth=1.5)
|
||||||
|
axes[2].set_xticks(rel_err_pos)
|
||||||
|
axes[2].set_xticklabels(names)
|
||||||
|
axes[2].set_ylabel("Relative Error (MAE / Range)")
|
||||||
|
axes[2].set_title("Relative Error per Parameter", fontweight="bold")
|
||||||
|
axes[2].grid(True, alpha=0.3, axis="y")
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(os.path.join(IMG_DIR, "mae_boxplot.png"), dpi=150, bbox_inches="tight")
|
||||||
|
print("Saved mae_boxplot.png")
|
||||||
|
plt.show()
|
||||||
|
|
||||||
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
|
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
|
||||||
|
|
||||||
x_pos = np.arange(6)
|
angle_names = ["rx", "ry", "rz"]
|
||||||
axes[0].bar(x_pos, mean_errors, yerr=std_errors, capsize=5, color=["c", "m", "y", "g", "b", "r"], alpha=0.8)
|
x_pos = np.arange(3)
|
||||||
|
colors_angle = ["#9b59b6", "#2ecc71", "#f39c12"]
|
||||||
|
bars = axes[0].bar(x_pos, angle_errors_deg, color=colors_angle, alpha=0.85, edgecolor="white", linewidth=1.5)
|
||||||
axes[0].set_xticks(x_pos)
|
axes[0].set_xticks(x_pos)
|
||||||
axes[0].set_xticklabels(names)
|
axes[0].set_xticklabels(angle_names)
|
||||||
axes[0].set_ylabel("Mean Absolute Error")
|
axes[0].set_ylabel("Mean Absolute Error (degrees)")
|
||||||
axes[0].set_title("Mean Absolute Error per Parameter (with std)")
|
axes[0].set_title("Angle MAE in Degrees", fontweight="bold")
|
||||||
axes[0].grid(True, alpha=0.3, axis="y")
|
axes[0].grid(True, alpha=0.3, axis="y")
|
||||||
|
for i, e in enumerate(angle_errors_deg):
|
||||||
|
axes[0].text(i, e + 0.5, f"{e:.1f}°", ha="center", va="bottom", fontsize=11, fontweight="bold")
|
||||||
|
|
||||||
bp = axes[1].boxplot([all_errors[i] for i in range(6)], labels=names, patch_artist=True)
|
trans_scale_errs = [mean_errors[0], mean_errors[1], mean_errors[5]]
|
||||||
colors = ["c", "m", "y", "g", "b", "r"]
|
trans_scale_names = ["tx", "ty", "scale"]
|
||||||
for patch, color in zip(bp["boxes"], colors):
|
x_pos = np.arange(3)
|
||||||
patch.set_facecolor(color)
|
colors_trans = ["#3498db", "#e74c3c", "#1abc9c"]
|
||||||
patch.set_alpha(0.7)
|
bars = axes[1].bar(x_pos, trans_scale_errs, color=colors_trans, alpha=0.85, edgecolor="white", linewidth=1.5)
|
||||||
axes[1].set_ylabel("Absolute Error")
|
axes[1].set_xticks(x_pos)
|
||||||
axes[1].set_title(f"Error Distribution per Parameter ({n_samples} samples)")
|
axes[1].set_xticklabels(trans_scale_names)
|
||||||
|
axes[1].set_ylabel("Mean Absolute Error")
|
||||||
|
axes[1].set_title("Translation & Scale MAE", fontweight="bold")
|
||||||
axes[1].grid(True, alpha=0.3, axis="y")
|
axes[1].grid(True, alpha=0.3, axis="y")
|
||||||
|
for i, e in enumerate(trans_scale_errs):
|
||||||
|
axes[1].text(i, e + 0.01, f"{e:.4f}", ha="center", va="bottom", fontsize=11, fontweight="bold")
|
||||||
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.savefig(os.path.join(IMG_DIR, "mae_boxplot.png"), dpi=150)
|
plt.savefig(os.path.join(IMG_DIR, "mae_by_category.png"), dpi=150, bbox_inches="tight")
|
||||||
print("Saved mae_boxplot.png")
|
print("Saved mae_by_category.png")
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
print("\n=== Sample Predictions (20 pairs) ===")
|
print("\n=== Sample Predictions (20 pairs) ===")
|
||||||
@@ -196,50 +247,58 @@ def analyze_training(trainer):
|
|||||||
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
|
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
|
||||||
|
|
||||||
axes[0, 0].imshow(google_img[0].cpu().permute(1, 2, 0))
|
axes[0, 0].imshow(google_img[0].cpu().permute(1, 2, 0))
|
||||||
axes[0, 0].set_title(f"Google Image")
|
axes[0, 0].set_title("Google Image", fontweight="bold", fontsize=12)
|
||||||
axes[0, 0].axis("off")
|
axes[0, 0].axis("off")
|
||||||
|
|
||||||
axes[0, 1].imshow(yandex_img[0].cpu().permute(1, 2, 0))
|
axes[0, 1].imshow(yandex_img[0].cpu().permute(1, 2, 0))
|
||||||
axes[0, 1].set_title(f"Yandex Image")
|
axes[0, 1].set_title("Yandex Image", fontweight="bold", fontsize=12)
|
||||||
axes[0, 1].axis("off")
|
axes[0, 1].axis("off")
|
||||||
|
|
||||||
x_pos = np.arange(6)
|
x_pos = np.arange(6)
|
||||||
width = 0.35
|
width = 0.35
|
||||||
axes[1, 0].bar(x_pos - width/2, targets, width, label="Target", color="steelblue", alpha=0.8)
|
axes[1, 0].bar(x_pos - width/2, targets, width, label="Target", color="#3498db", alpha=0.85)
|
||||||
axes[1, 0].bar(x_pos + width/2, preds, width, label="Predicted", color="coral", alpha=0.8)
|
axes[1, 0].bar(x_pos + width/2, preds, width, label="Predicted", color="#e74c3c", alpha=0.85)
|
||||||
axes[1, 0].set_xticks(x_pos)
|
axes[1, 0].set_xticks(x_pos)
|
||||||
axes[1, 0].set_xticklabels(names)
|
axes[1, 0].set_xticklabels(names)
|
||||||
axes[1, 0].set_ylabel("Parameter Value")
|
axes[1, 0].set_ylabel("Parameter Value")
|
||||||
axes[1, 0].set_title("Target vs Predicted")
|
axes[1, 0].set_title("Target vs Predicted", fontweight="bold", fontsize=12)
|
||||||
axes[1, 0].legend()
|
axes[1, 0].legend(framealpha=0.9)
|
||||||
axes[1, 0].grid(True, alpha=0.3, axis="y")
|
axes[1, 0].grid(True, alpha=0.3, axis="y")
|
||||||
|
|
||||||
axes[1, 1].bar(x_pos, errors, color=["c", "m", "y", "g", "b", "r"], alpha=0.8)
|
colors = ["#3498db", "#e74c3c", "#9b59b6", "#2ecc71", "#f39c12", "#1abc9c"]
|
||||||
|
bars = axes[1, 1].bar(x_pos, errors, color=colors, alpha=0.85, edgecolor="white", linewidth=1.2)
|
||||||
axes[1, 1].set_xticks(x_pos)
|
axes[1, 1].set_xticks(x_pos)
|
||||||
axes[1, 1].set_xticklabels(names)
|
axes[1, 1].set_xticklabels(names)
|
||||||
axes[1, 1].set_ylabel("Absolute Error")
|
axes[1, 1].set_ylabel("Absolute Error")
|
||||||
axes[1, 1].set_title(f"Prediction Error (Mean: {np.mean(errors):.4f})")
|
axes[1, 1].set_title(f"Prediction Error (Mean: {np.mean(errors):.4f})", fontweight="bold", fontsize=12)
|
||||||
axes[1, 1].grid(True, alpha=0.3, axis="y")
|
axes[1, 1].grid(True, alpha=0.3, axis="y")
|
||||||
for i_e, e in enumerate(errors):
|
for i_e, e in enumerate(errors):
|
||||||
axes[1, 1].text(i_e, e + 0.01, f"{e:.3f}", ha="center", va="bottom", fontsize=8)
|
axes[1, 1].text(i_e, e + 0.01, f"{e:.3f}", ha="center", va="bottom", fontsize=9)
|
||||||
|
|
||||||
plt.suptitle(f"Sample {vis_count + 1}", fontsize=14)
|
plt.suptitle(f"Sample {vis_count + 1}", fontsize=14, fontweight="bold")
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.savefig(os.path.join(IMG_DIR, f"prediction_sample_{vis_count + 1:02d}.png"), dpi=100)
|
plt.savefig(os.path.join(IMG_DIR, f"prediction_sample_{vis_count + 1:02d}.png"), dpi=100, bbox_inches="tight")
|
||||||
plt.show()
|
plt.show()
|
||||||
print(f"Saved prediction_sample_{vis_count + 1:02d}.png")
|
print(f"Saved prediction_sample_{vis_count + 1:02d}.png")
|
||||||
|
|
||||||
vis_count += 1
|
vis_count += 1
|
||||||
|
|
||||||
print(f"\nPrediction errors over {n_samples} samples:")
|
print(f"\nPrediction errors over {n_samples} samples:")
|
||||||
print(f"{'Param':<8} {'Mean Error':>12} {'Std Error':>12} {'Min':>8} {'Max':>8}")
|
print(f"{'Param':<8} {'Mean Error':>12} {'Std Error':>12} {'Min':>8} {'Max':>8} {'Rel Err':>10}")
|
||||||
print("-" * 52)
|
print("-" * 62)
|
||||||
for i in range(6):
|
for i in range(6):
|
||||||
mean_err = np.mean(all_errors[i])
|
mean_err = np.mean(all_errors[i])
|
||||||
std_err = np.std(all_errors[i])
|
std_err = np.std(all_errors[i])
|
||||||
min_err = np.min(all_errors[i])
|
min_err = np.min(all_errors[i])
|
||||||
max_err = np.max(all_errors[i])
|
max_err = np.max(all_errors[i])
|
||||||
print(f"{names[i]:<8} {mean_err:>12.4f} {std_err:>12.4f} {min_err:>8.4f} {max_err:>8.4f}")
|
rel_err = relative_errors[i]
|
||||||
|
print(f"{names[i]:<8} {mean_err:>12.4f} {std_err:>12.4f} {min_err:>8.4f} {max_err:>8.4f} {rel_err:>10.4f}")
|
||||||
|
|
||||||
|
print(f"\nAngle errors in degrees:")
|
||||||
|
print(f"{'Param':<8} {'MAE (deg)':>12} {'MAE (rad)':>12}")
|
||||||
|
print("-" * 35)
|
||||||
|
for i, name in enumerate(["rx", "ry", "rz"]):
|
||||||
|
print(f"{name:<8} {angle_errors_deg[i]:>12.2f} {mean_errors[i+2]:>12.4f}")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"best_val_loss": trainer.best_val_loss,
|
"best_val_loss": trainer.best_val_loss,
|
||||||
@@ -248,4 +307,8 @@ def analyze_training(trainer):
|
|||||||
"val_mse_trans": trainer.val_mse_trans,
|
"val_mse_trans": trainer.val_mse_trans,
|
||||||
"val_mse_angle": trainer.val_mse_angle,
|
"val_mse_angle": trainer.val_mse_angle,
|
||||||
"val_mse_scale": trainer.val_mse_scale,
|
"val_mse_scale": trainer.val_mse_scale,
|
||||||
|
"mean_errors": mean_errors,
|
||||||
|
"std_errors": std_errors,
|
||||||
|
"angle_errors_deg": angle_errors_deg,
|
||||||
|
"relative_errors": relative_errors,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from PIL import Image
|
|||||||
from torch.utils.data import DataLoader, Dataset, Subset
|
from torch.utils.data import DataLoader, Dataset, Subset
|
||||||
from torchvision import transforms
|
from torchvision import transforms
|
||||||
|
|
||||||
from .utils import config, get_camera_matrix, generate_random_homography_params, homography_params_to_matrix, matrix_to_homography_params
|
from .utils import config, get_camera_matrix, generate_random_homography_params, homography_params_to_matrix
|
||||||
|
|
||||||
|
|
||||||
class YaGoDataset(Dataset):
|
class YaGoDataset(Dataset):
|
||||||
@@ -55,6 +55,7 @@ class YaGoDataset(Dataset):
|
|||||||
self._cached_google = [None] * len(self.image_pairs)
|
self._cached_google = [None] * len(self.image_pairs)
|
||||||
self._cached_yandex = [None] * len(self.image_pairs)
|
self._cached_yandex = [None] * len(self.image_pairs)
|
||||||
self._cached_homography = [None] * len(self.image_pairs)
|
self._cached_homography = [None] * len(self.image_pairs)
|
||||||
|
self._cached_params = [None] * len(self.image_pairs)
|
||||||
|
|
||||||
def _generate_augmented(self, idx):
|
def _generate_augmented(self, idx):
|
||||||
google_img = self._google_images[idx].copy()
|
google_img = self._google_images[idx].copy()
|
||||||
@@ -64,14 +65,11 @@ class YaGoDataset(Dataset):
|
|||||||
params2 = generate_random_homography_params()
|
params2 = generate_random_homography_params()
|
||||||
H1 = homography_params_to_matrix(params1, self.K)
|
H1 = homography_params_to_matrix(params1, self.K)
|
||||||
H2 = homography_params_to_matrix(params2, self.K)
|
H2 = homography_params_to_matrix(params2, self.K)
|
||||||
H_combined = np.linalg.inv(H1) @ H2
|
|
||||||
|
|
||||||
google_warped = cv2.warpPerspective(google_img, H2, (self.image_size[1], self.image_size[0]))
|
|
||||||
yandex_warped = cv2.warpPerspective(yandex_img, H1, (self.image_size[1], self.image_size[0]))
|
yandex_warped = cv2.warpPerspective(yandex_img, H1, (self.image_size[1], self.image_size[0]))
|
||||||
|
google_warped = cv2.warpPerspective(google_img, H2 @ H1, (self.image_size[1], self.image_size[0]))
|
||||||
|
|
||||||
target_params = matrix_to_homography_params(H_combined, self.K)
|
return google_warped, yandex_warped, H2, params2
|
||||||
|
|
||||||
return google_warped, yandex_warped, H_combined, target_params
|
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.image_pairs)
|
return len(self.image_pairs)
|
||||||
@@ -85,13 +83,14 @@ class YaGoDataset(Dataset):
|
|||||||
google_img = self._cached_google[idx]
|
google_img = self._cached_google[idx]
|
||||||
yandex_img = self._cached_yandex[idx]
|
yandex_img = self._cached_yandex[idx]
|
||||||
target_matrix = self._cached_homography[idx]
|
target_matrix = self._cached_homography[idx]
|
||||||
target_params = matrix_to_homography_params(target_matrix, self.K)
|
target_params = self._cached_params[idx]
|
||||||
elif self.augment:
|
elif self.augment:
|
||||||
google_img, yandex_img, target_matrix, target_params = self._generate_augmented(idx)
|
google_img, yandex_img, target_matrix, target_params = self._generate_augmented(idx)
|
||||||
if self.cache_level > 0:
|
if self.cache_level > 0:
|
||||||
self._cached_google[idx] = google_img
|
self._cached_google[idx] = google_img
|
||||||
self._cached_yandex[idx] = yandex_img
|
self._cached_yandex[idx] = yandex_img
|
||||||
self._cached_homography[idx] = target_matrix
|
self._cached_homography[idx] = target_matrix
|
||||||
|
self._cached_params[idx] = target_params
|
||||||
else:
|
else:
|
||||||
google_img = self._google_images[idx]
|
google_img = self._google_images[idx]
|
||||||
yandex_img = self._yandex_images[idx]
|
yandex_img = self._yandex_images[idx]
|
||||||
|
|||||||
@@ -29,11 +29,19 @@ class HomographyCNN6(nn.Module):
|
|||||||
nn.Dropout(dropout_rate),
|
nn.Dropout(dropout_rate),
|
||||||
nn.Linear(256, 6),
|
nn.Linear(256, 6),
|
||||||
)
|
)
|
||||||
|
self._init_weights()
|
||||||
|
|
||||||
def _normalize_sin_cos(self, _sin, _cos):
|
def _normalize_sin_cos(self, _sin, _cos):
|
||||||
_len = torch.sqrt(_sin ** 2 + _cos ** 2)
|
_len = torch.sqrt(_sin ** 2 + _cos ** 2)
|
||||||
return _sin / _len, _cos / _len
|
return _sin / _len, _cos / _len
|
||||||
|
|
||||||
|
def _init_weights(self):
|
||||||
|
for module in self.head.modules():
|
||||||
|
if isinstance(module, nn.Linear):
|
||||||
|
nn.init.kaiming_normal_(module.weight, mode='fan_in', nonlinearity='relu')
|
||||||
|
if module.bias is not None:
|
||||||
|
nn.init.zeros_(module.bias)
|
||||||
|
|
||||||
def forward(self, img1, img2):
|
def forward(self, img1, img2):
|
||||||
f1 = self.backbone(img1)
|
f1 = self.backbone(img1)
|
||||||
f2 = self.backbone(img2)
|
f2 = self.backbone(img2)
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
@@ -43,13 +44,16 @@ def homography_params_to_matrix(params, K):
|
|||||||
|
|
||||||
|
|
||||||
def matrix_to_homography_params(H, K):
|
def matrix_to_homography_params(H, K):
|
||||||
|
if hasattr(H, 'numpy'):
|
||||||
|
H = H.numpy()
|
||||||
K_inv = np.linalg.inv(K)
|
K_inv = np.linalg.inv(K)
|
||||||
E = K_inv @ H @ K
|
E = K_inv @ H @ K
|
||||||
scale = np.sqrt(np.linalg.det(E[:2, :2]))
|
scale = E[2, 2]
|
||||||
R = E[:2, :2] / scale
|
R_normalized = E / scale
|
||||||
tx, ty = E[0, 2], E[1, 2]
|
rz = np.arctan2(R_normalized[1, 0], R_normalized[0, 0])
|
||||||
rz = np.arctan2(R[1, 0], R[0, 0])
|
ry = np.arctan2(-R_normalized[2, 0], np.sqrt(R_normalized[2, 1]**2 + R_normalized[2, 2]**2))
|
||||||
r20, r21 = E[2, 0], E[2, 1]
|
rx = np.arctan2(R_normalized[2, 1], R_normalized[2, 2])
|
||||||
ry = np.arctan2(r20, r21)
|
A = R_normalized[:2, :2]
|
||||||
rx = np.arctan2(-E[1, 2], E[1, 1])
|
correction = scale * np.array([R_normalized[0, 2], R_normalized[1, 2]])
|
||||||
|
tx, ty = np.linalg.solve(A, E[:2, 2] - correction)
|
||||||
return np.array([tx, ty, rx, ry, rz, scale], dtype=np.float32)
|
return np.array([tx, ty, rx, ry, rz, scale], dtype=np.float32)
|
||||||
|
|||||||
Reference in New Issue
Block a user